From 57771707c5d17696fd3660ed6f10428b3c234e9a Mon Sep 17 00:00:00 2001 From: Packit Date: Oct 26 2020 17:06:21 +0000 Subject: papi-5.6.0 base --- diff --git a/ChangeLogP400.txt b/ChangeLogP400.txt new file mode 100644 index 0000000..1019d76 --- /dev/null +++ b/ChangeLogP400.txt @@ -0,0 +1,418 @@ +2010-01-14 terpstra + * src/perf_events.c 1.18: + More tweaks from Corey for event rescheduling problem. + Also a syntax fix for POWER platforms. + +2010-01-13 sbk + * src/configure.in 1.166: + + Enable the static perf events static table to be created and compiled in + again for Cray XT CLE. + +2010-01-13 terpstra + * release_procedure.txt 1.13: + Bump the date. + + * src/papi_internal.c 1.138: + Fix for rescheduling events after a failed add. This addresses the NULL pointer issue found in overflow_allcounters on i7. + Thanks to Corey Ashford of IBM for the fix. + + * papi.spec 1.4: + Final changes from Will Cohen. + + * src/libpfm-3.y/config.mk 1.3: + * src/libpfm-3.y/examples_v2.x/self_smpl_multi.c 1.3: + * src/libpfm-3.y/examples_v2.x/syst.c 1.3: + * src/libpfm-3.y/examples_v2.x/syst_multi_np.c 1.3: + * src/libpfm-3.y/examples_v2.x/syst_np.c 1.3: + * src/libpfm-3.y/lib/pfmlib_coreduo.c 1.3: + * src/libpfm-3.y/lib/power7_events.h 1.3: + *** empty log message *** + + * src/utils/event_info.c 1.11: + Change test for version number to differentiate between PAPI-C + and Classic PAPI. We were testing for versions >=3 && >= .9. + This was missing versions >= 4. + + * src/libpfm-3.y/include/perfmon/pfmlib_gen_mips64.h 1.1.1.4: + * src/libpfm-3.y/lib/intel_atom_events.h 1.1.1.5: + * src/libpfm-3.y/lib/intel_corei7_events.h 1.1.1.5: + * src/libpfm-3.y/lib/pfmlib_gen_mips64.c 1.1.1.6: + * src/libpfm-3.y/lib/pfmlib_intel_nhm.c 1.1.1.9: + Importing latest libpfm + + * src/Makefile.in 1.44: + * src/papi.h 1.193: + Update version numbers for impending release of PAPI 4.0.0. + +2010-01-13 jagode + * src/Makefile.inc 1.152: + Avoid printing conditional 'if' statements while compiling (but + they are performed). + + * src/perf_events.h 1.7: + Seg fault on i7 with perf_events. This was fixed a while ago for + perfmon and perfctr but perf-events was left behind. + +2010-01-13 bsheely + * src/configure 1.165: + Generated configure to correspond to ost recent change (Cray XT) + to configure.in + +2010-01-12 terpstra + * src/linux-bgp.c 1.3: + Restore prior native naming convention: PNE_BGP_... + Needed to avoid conflict with system level naming conventions. + + * src/ctests/bgp/Makefile 1.3: + Modifications to build test files for BGP. + + * INSTALL.txt 1.42: + Update description for BGP. + +2010-01-08 terpstra + * src/Rules.perfctr-pfm 1.47: + * src/Rules.pfm_pe 1.10: + Eliminate duplicate definitions of environment variable for the compile line. + These are now defined in configure. + + * src/ctests/test_utils.c 1.77: + Minor tweak to print native event codes in hex instead of + decimal -- far more useful that way. + + * src/perfctr-p4.c 1.106: + Minor tweak to get this file to compile with DEBUG turned on. + +2010-01-07 sbk + * src/Rules.pfm 1.50: + The libpfm flag CONFIG_PFMLIB_OLD_PFMV2 was correctly set for when compiling + and building libpfm, but it also needs to be set for installing also. The + header file libpfm-3.y/include/perfmon/perfmon.h uses this flag to + determine if a macro is prepended to perfmon.h when installing it. + +2010-01-07 jagode + * src/linux-acpi.c 1.16: + * src/linux-mx.c 1.15: + * src/linux-net.c 1.4: + Renamed identifier 'native_name' for net, mx, and acpi + components because of conflicts on POWER machines. This variable + has also been defined in powerX_events.h. + +2010-01-07 bsheely + * src/Rules.perfctr 1.57: + Added DEBUGFLAGS to OPTFLAGS since only OPTFLAGS gets used in + Makefile.inc + +2010-01-05 terpstra + * src/multiplex.c 1.76: + Modified license language for John May's LLNL portion of this code to conform with BSD as provided by LLNL. + Thanks, Bronis, for bird-dogging this. + +2009-12-20 terpstra + * src/solaris-niagara2.c 1.4: + Changes to fix overflow/profile issues in niagara2. + Thanks to Fabian Gorsler. + +2009-12-18 terpstra + * src/ctests/bgp/papi_1.c 1.2: + * src/ctests/native.c 1.61: + * src/ctests/papi_test.h 1.37: + * src/extras.c 1.159: + * src/linux-bgp-memory.c 1.2: + * src/linux-bgp-native-events.h 1.2: + * src/linux-bgp-preset-events.c 1.2: + * src/linux-bgp.h 1.2: + * src/papi.c 1.337: + * src/papiStdEventDefs.h 1.38: + * src/papi_data.c 1.35: + * src/papi_internal.h 1.181: + * src/papi_preset.h 1.17: + * src/papi_protos.h 1.69: + * src/papi_vector.c 1.22: + Committing changes for BG/P. + Utilities and basic counting works; + Not fully tested. + +2009-12-16 terpstra + * LICENSE.txt 1.6: + Minor tweaks on the header of the license text. + + * src/solaris-niagara2-memory.c 1.3: + * src/solaris-niagara2.h 1.3: + Commit initial changes for Niagara2 support for PAPI-C. + Thanks to Fabian Gorsler. + Basic counting works; some unresolved issues remain for overflow and profile. + +2009-12-11 terpstra + * src/papi_events.csv 1.3: + Add a synonym for Pentium M. + +2009-12-08 bsheely + * src/linux.c 1.69: + Fixed memory issue seen in testing on certain platforms + +2009-12-05 terpstra + * ChangeLogP372.txt 1.1: + file ChangeLogP372.txt was initially added on branch papi-3-7-0. + +2009-12-02 terpstra + * src/sys_perf_counter_open.c 1.10: + * src/syscalls.h 1.4: + Slightly cleaner syntax for redefinition of perf_event_attr in + KERNEL31. + +2009-12-01 terpstra + * src/ctests/sdsc4.c 1.14: + Fix from Will Cohen to avoid round-off errors in computing small + differences between large numbers, which occasionally resulted + in sqrt of negative numbers. Originally applied to sdsc2; + modified and applied to sdsc2. + +2009-11-30 terpstra + * src/x86_cache_info.c 1.7: + Strip the Windows version of cpuid out to make this version + compatible with the 3.7.x branch. + + * src/ctests/sdsc2.c 1.13: + Fix from Will Cohen to avoid round-off errors in computing small differences between large numbers, which occasionally resulted in sqrt of negative numbers. + Thanks Will + +2009-11-25 terpstra + * src/papi_hl.c 1.77: + PAPI_stop_counters was returning PAPI_OK even if PAPI_stop + returned something other than PAPI_OK. Uncovered as part of the + BG/P merge. + +2009-11-25 bsheely + * src/hwinfo_linux.c 1.2: + added test for topology/thread_siblings and topology/ + core_siblings + +2009-11-24 terpstra + * src/papi_vector.h 1.10: + Fix a bug in assigning signals for overflow. + + Also expose a vector_find_dummy routine to allow testing for component functions. If the function pointer is a dummy, it isn't implemented in the component. + This is used in extras to test for the implementation of a name_to_code routine. + +2009-11-24 bsheely + * src/ctests/hwinfo.c 1.7: + Removed invalid code (zero can be a valid value for nnodes) + +2009-11-23 bsheely + * src/solaris-ultra.c 1.125: + resolved compile error + + * src/run_tests.sh 1.37: + * src/run_valgrind_tests.sh 1.2: + valgrind code merged into run_tests.sh and commented out by + default + +2009-11-20 bsheely + * src/genpapifdef.c 1.41: + * src/papi_events.xml 1.3: + * src/papi_fwrappers.c 1.81: + Applied patch from Steve Kaufmann at Cray. Removes the remaining + Unicos, Catamount, T3E, X1 and X2 references. Only explicit + support for XT4+/CLE remains. + +2009-11-18 mucci + * src/any-null.c 1.52: + * src/linux-bgl.c 1.9: + * src/perfmon.c 1.97: + * src/windows.c 1.4: + Renamed shutdown_global to shutdown_substrate to make it more obvious that + this is per substrate. This callback will be important for freeing some memory + up and making sure locks are reset. Looks like a big patch, but only a few lines. + + * src/config.h.in 1.9: + Add support for detecting gettid and syscall(gettid) which results in + HAVE_GETTID and HAVE_SYSCALL_GETTID being defined in config.h + + This will be useful for Linux where we can remove all the special casing + for threads and locking and the errors with getpid. gettid all the time. + + * src/papi_lock.h 1.1: + Beginnings of a single function with all PAPI/Linux locking functions. + + Note to PAPI-C developers. The multiple context concept of PAPI-C has failed + to include the lock data structure. PAPI currently only has one scope of + locks that span the high-level to the low-level. This will need to be revisited + and the locks split into high-level and per-context locks. + +2009-11-13 terpstra + * ChangeLogP371.txt 1.1: + file ChangeLogP371.txt was initially added on branch papi-3-7-0. + +2009-11-12 bsheely + * src/papi_events_table.sh 1.1: + * src/papi_pfm_events.c 1.35: + * src/papi_pfm_events.h 1.4: + * src/perfmon_events.csv 1.57: + * src/perfmon_events_table.sh 1.6: + * src/pmapi-ppc64_events.c 1.8: + * src/ppc64_events.h 1.10: + renamed perfmon_events.csv perfmon_events_table.h + perfmon_events_table.sh to papi_events.csv papi_events_table.h + papi_events_table.sh and made code changes required by the + renaming + +2009-11-11 terpstra + * src/ctests/first.c 1.49: + Fix overly restrictive verification of results. In verifying + that FP_INS/FP_OPS/TOT_INS was non-zero, we were requiring it to + be near theoretical FP_OPS which caused false verification + failures in some edge cases. Now we just require count >= + iterations. + +2009-11-11 bsheely + * src/ctests/inherit.c 1.13: + * src/ctests/multiplex1_pthreads.c 1.49: + * src/ctests/overflow.c 1.66: + * src/ctests/overflow2.c 1.25: + * src/ctests/overflow3_pthreads.c 1.21: + * src/ctests/overflow_allcounters.c 1.5: + * src/ctests/overflow_force_software.c 1.24: + * src/ctests/overflow_index.c 1.9: + * src/ctests/overflow_one_and_read.c 1.5: + * src/ctests/overflow_single_event.c 1.45: + * src/ctests/overflow_twoevents.c 1.26: + * src/ctests/pthrtough2.c 1.7: + * src/ctests/zero_shmem.c 1.6: + * src/ftests/cost.F 1.18: + * src/ftests/fmultiplex1.F 1.37: + * src/ftests/ftests_util.F 1.49: + * src/ftests/native.F 1.55: + * src/perfmon.h 1.20: + removed code for obsolete cray builds + + * src/ctests/do_loops.c 1.32: + * src/ctests/zero_fork.c 1.9: + * src/linux-memory.c 1.41: + * src/linux.h 1.3: + * src/perfctr-p3.c 1.91: + * src/perfctr-p3.h 1.50: + * src/run_cat_tests.sh 1.4: + removed Catamount code + +2009-11-09 bsheely + * src/linux-ia64-memory.c 1.23: + * src/linux-ia64.c 1.176: + created hwinfo_linux.c to encapsulate code to set _papi_hw_info + struct on Linux platforms + + * src/unicosmp-memory.c 1.4: + removed obsolete file + +2009-11-06 terpstra + * src/libpfm-3.y/examples_v2.x/x86/smpl_nhm_lbr.c 1.1.1.2: + libpfm nhm and atom fixes + +2009-11-05 bsheely + * src/alpha-memory.c 1.11: + * src/ckcatamount.c 1.3: + * src/dadd-alpha.c 1.43: + * src/dadd-alpha.h 1.14: + * src/irix-memory.c 1.20: + * src/irix-mips.c 1.116: + * src/irix-mips.h 1.34: + * src/irix.c 1.2: + * src/irix.h 1.3: + * src/linux-alpha.c 1.24: + * src/linux-alpha.h 1.9: + * src/power3.c 1.41: + * src/power3.h 1.19: + * src/power3_events.c 1.9: + * src/power3_events.h 1.8: + * src/power4_events.h 1.9: + * src/power4_events_map.c 1.6: + * src/t3e_events.c 1.11: + * src/tru64-alpha.c 1.66: + * src/tru64-alpha.h 1.22: + * src/unicos-ev5.c 1.69: + * src/unicos-ev5.h 1.20: + * src/unicos-memory.c 1.12: + * src/unicosmp.h 1.5: + * src/x1-native-presets.h 1.4: + * src/x1-native.h 1.5: + * src/x1-presets.h 1.7: + * src/x1.c 1.38: + * src/x1.h 1.11: + removed files related to obsolete builds + +2009-11-03 terpstra + * src/libpfm-3.y/examples_v2.x/x86/Makefile 1.1.1.3: + * src/libpfm-3.y/examples_v2.x/x86/smpl_core_pebs.c 1.1.1.3: + * src/libpfm-3.y/examples_v2.x/x86/smpl_pebs.c 1.1.1.1: + * src/libpfm-3.y/include/Makefile 1.1.1.9: + * src/libpfm-3.y/include/perfmon/perfmon_pebs_smpl.h 1.1.1.1: + * src/libpfm-3.y/include/perfmon/pfmlib_intel_nhm.h 1.1.1.2: + * src/libpfm-3.y/lib/amd64_events_fam10h.h 1.1.1.5: + * src/libpfm-3.y/lib/intel_corei7_unc_events.h 1.1.1.2: + * src/libpfm-3.y/lib/pfmlib_amd64.c 1.1.1.10: + * src/libpfm-3.y/lib/pfmlib_core.c 1.1.1.12: + * src/libpfm-3.y/lib/pfmlib_intel_atom.c 1.1.1.6: + * src/libpfm-3.y/lib/pfmlib_intel_nhm_priv.h 1.1.1.2: + * src/libpfm-3.y/lib/power6_events.h 1.1.1.4: + latest libpfm changes + +2009-11-02 terpstra + * src/utils/avail.c 1.49: + * src/utils/native_avail.c 1.42: + Fixes to eliminate strcpy on overlapping strings The offending + calls were replaced with memmoves and encapsulated in a single + function for better maintenance. + +2009-10-29 bsheely + * src/solaris-ultra.h 1.41: + resolved compile errors on solaris + +2009-10-23 bsheely + * src/Rules.pfm_pcl 1.13: + * src/pcl.c 1.12: + * src/pcl.h 1.5: + Naming convention change from PCL to Perf Events: renamed pcl.h + and pcl.c to perf_events.h and perf_events.c, renamed + Rules.pfm_pcl to Rules.pfm_pe, configure option --with-pcl + changed to --with-perf-events + +2009-10-20 bsheely + * src/ctests/byte_profile.c 1.18: + corrected possible logic error in setting end point of profile + buffer + +2009-10-15 bsheely + * src/perfctr-ppc32.c 1.9: + corrected possible init error + +2009-10-14 terpstra + * src/ctests/calibrate.c 1.39: + Error checking was missing undercount conditions. + +2009-10-13 terpstra + * src/run_tests_exclude.txt 1.6: + This file never existed on the PAPI-C branch. + + * src/aix-memory.c 1.15: + * src/aix.c 1.84: + * src/aix.h 1.29: + * src/pmapi-ppc64.c 1.8: + * src/pmapi-ppc64.h 1.4: + * src/threads.c 1.33: + Conversion of AIX to PAPI-C. Most tests pass, except for some + overflow related stuff. Haven't examined things closely yet, but + thought I should check this stuff in. + +2009-10-12 bsheely + * src/ftests/fdmemtest.F 1.5: + * src/ftests/flops.F 1.14: + declare types explicitly + + * src/ctests/multiattach.c 1.5: + * src/ctests/zero_attach.c 1.5: + corrected logic error with pid type + +2009-10-09 terpstra + * src/power6_events.h 1.3: + * src/power6_events_map.c 1.4: + Somehow these got removed from the repository. diff --git a/ChangeLogP410.txt b/ChangeLogP410.txt new file mode 100644 index 0000000..e6d261f --- /dev/null +++ b/ChangeLogP410.txt @@ -0,0 +1,527 @@ +2010-06-21 terpstra + * src/Makefile.in 1.52: + * src/configure 1.224: + * src/configure.in 1.224: + Change version numbers in anticipation of the impending 4.1 + release. + +2010-06-18 vweaver1 + * src/components/example/example.c 1.4: + Correct a comment. + +2010-06-18 ralph + * doc/Doxyfile 1.5: + * doc/Doxyfile-everything 1.2: + Upped the version number in doxygen config files for upcoming + release. + + * INSTALL.txt 1.47: + Friday afternoon typo... the command given for generating all + documentation was wrong + + * src/components/lustre/linux-lustre.c 1.6: + * src/components/lustre/linux-lustre.h 1.5: + Fixed some of the comments to get doxygen's attention /* -> /** + + I'm still working out how to best do the papi_components group + but for now I just put the .h file for the component into the group. (@ingroup papi_components) + So that one file per component shows up listing. + + * src/papi.h 1.208: + Added a small section about components on the main doxygen + generated page. + +2010-06-17 jagode + * src/components/lustre/Rules.lustre 1.3: + * src/components/lustre/host_counter.c 1.2: + * src/components/lustre/host_counter.h 1.2: + Added new component for infiniband devices. Major changes for + lustre component. + + * src/components/README 1.4: + Added documentation (Doxygen) for InfiniBand (and lustre) + component. + +2010-06-15 ralph + * src/components/acpi/linux-acpi.c 1.3: + * src/components/acpi/linux-acpi.h 1.2: + * src/components/lmsensors/linux-lmsensors.h 1.3: + * src/components/mx/linux-mx.h 1.2: + * src/components/net/linux-net.h 1.2: + * src/papi.c 1.360: + * src/papi_hl.c 1.85: + * src/utils/avail.c 1.53: + * src/utils/clockres.c 1.25: + * src/utils/command_line.c 1.15: + * src/utils/cost.c 1.40: + * src/utils/decode.c 1.9: + * src/utils/event_chooser.c 1.18: + * src/utils/mem_info.c 1.17: + * src/utils/native_avail.c 1.47: + Added documentation for the several components. + Doxygen will now search recursivly under the components directory for documented *.[c|h] files ( /** @file */ somewhere in it). + + Several other files got brief descriptions of what is in the file. + +2010-06-14 terpstra + * papi.spec 1.9: + Minor tweak to make sure libpfm builds without warnings. + +2010-06-11 jagode + * src/components/lmsensors/linux-lmsensors.c 1.2: + removed compiler warnings for lm-sensors component; switched to + stderr so that papi_xml_event_info creates a clean output. + +2010-06-11 bsheely + * src/ctests/api.c 1.2: + Added first few api test cases + +2010-06-10 bsheely + * src/ctests/papi_test.h 1.39: + * src/ctests/test_utils.c 1.82: + Added test_fail_exit for use in single threaded tests + +2010-06-09 vweaver1 + * src/perfctr-2.6.x/patches/aliases 1.13: + * src/perfctr-2.6.x/usr.lib/Makefile 1.31: + Fix conflicts from import. + + * src/perfctr-2.6.x/CHANGES 1.1.1.28: + ... + * src/perfctr-2.6.x/usr.lib/x86.c 1.1.1.11: + Import of perfctr 2.6.41 + +2010-06-07 bsheely + * src/any-null.c 1.60: + * src/freq.c 1.1: + * src/papi_vector.c 1.31: + Moved timer impl from any-null.c into papi_vector.c and added + generic functionality to compute frequency if unable to + determine based on platform + + * src/papi_data.c 1.40: + * src/papi_data.h 1.6: + Added new error code + + * src/Makefile.inc 1.163: + Added freq.c to build + + * src/configure 1.223: + * src/configure.in 1.223: + ctests/api (not yet implemented) added to default ctests + +2010-06-03 bsheely + * src/ctests/Makefile 1.155: + Initial commit for ctests/api which is not yet implemented + +2010-06-02 bsheely + * src/papi_lock.h 1.7: + Fixed for BG/P + +2010-06-01 vweaver1 + * README 1.10: + Fix typo in README + +2010-06-01 bsheely + * src/config.h.in 1.13: + Added code to define _rtc when Cray is compiled with gcc + + * src/cycle.h 1.4: + Rolled back previous changes + +2010-05-27 bsheely + * src/papi_internal.c 1.158: + * src/threads.h 1.15: + --with-no-cpu-component renamed --with-no-cpu-counters + + * src/components/mx/configure 1.3: + * src/components/mx/configure.in 1.3: + Rollback last change + + * src/ctests/multiattach.c 1.8: + * src/ctests/zero_attach.c 1.8: + Attempt to fix xlc compile errors + +2010-05-21 bsheely + * src/Rules.perfctr 1.66: + * src/Rules.perfctr-pfm 1.57: + * src/Rules.pfm 1.57: + * src/Rules.pfm_pe 1.18: + Use MISCHDRS from configure + +2010-05-20 bsheely + * src/components/mx/linux-mx.c 1.2: + Fixed compile error and warnings. Added option to configure + +2010-05-19 terpstra + * src/ctests/all_native_events.c 1.24: + Hard-code an exception for Nehalem OFFCORE_RESPONSE_0. This + event can't be counted because it uses a shared chip-level + register. + +2010-05-19 bsheely + * src/linux-ia64-memory.c 1.25: + * src/linux-ia64.c 1.183: + * src/pfmwrap.h 1.43: + Fixed warning in ia64 + + * src/components/net/linux-net.c 1.2: + Fixed compile warnings + + * src/Makefile.in 1.51: + Extra compiler warning flags are not added until after the + libpfm build + +2010-05-14 vweaver1 + * src/linux-bgp.c 1.5: + Temporary fix to emulate cycles HW counter on BlueGeneP using + the get_cycles() call. + +2010-05-13 bsheely + * src/x86_cache_info.c 1.13: + added missing C library headers + + * src/hwinfo_linux.c 1.7: + fixed compile errors on torc0 by including missing C library + headers + + * src/ftests/Makefile 1.66: + * src/utils/Makefile 1.16: + Replaced missing MEMSUBSTR macro in configure. AC_ARG_ENABLE + macros replaced with AC_ARG_WITH macros. Continued changes for -- + with-no-cpu-component + +2010-05-07 ralph + * doc/Doxyfile-everything 1.1: + * doc/Makefile 1.1: + Added makefile in doc to generate user and developer documentation. + + from src, make doc builds the user documentation in doc/html + (do we want this?) + +2010-05-07 jagode + * src/utils/event_info.c 1.14: + papi_xml_event_info generated some invalid xml output. This bug + was introduced in Revision 1.10 + +2010-05-07 bsheely + * src/any-null-memory.c 1.11: + * src/any-null.h 1.23: + * src/extras.c 1.170: + * src/multiplex.c 1.85: + * src/papi_preset.c 1.29: + * src/papi_vector.h 1.14: + * src/threads.c 1.36: + Added --with-no-cpu-component option which has only been tested + on x86 + +2010-05-03 ralph + * src/freebsd-memory.c 1.1: + * src/freebsd.c 1.9: + * src/freebsd.h 1.6: + * src/papi_fwrappers.c 1.86: + Updated Harald Servat's freebsd work to Component Papi. + + Has had cursory testing, but should be considered alpha quality. + (there is a really nasty bug when running the overflow_pthreads test) + + * src/genpapifdef.c 1.43: + Removed a holdout from catamount support, + are there any platforms where we don't get malloc from stdlib? + +2010-05-03 bsheely + * src/papi_table.c 1.5: + Removed obsolete file + +2010-04-30 terpstra + * release_procedure.txt 1.17: + Add a few more steps on testing a patch. + +2010-04-30 bsheely + * src/components/acpi/Rules.acpi 1.2: + * src/components/lmsensors/Rules.lmsensors 1.2: + * src/components/lustre/Rules.lustre 1.2: + * src/components/mx/Rules.mx 1.2: + * src/components/net/Rules.net 1.2: + Adding new components no longer requires modification of Papi + code + +2010-04-29 bsheely + * src/components/Rules.components 1.1: + * src/components/acpi/linux-acpi-memory.c 1.1: + * src/components/lmsensors/Makefile.lmsensors.in 1.1: + * src/components/lmsensors/configure 1.1: + * src/components/lmsensors/configure.in 1.1: + * src/components/lustre/host_counter.c 1.1: + * src/components/lustre/host_counter.h 1.1: + * src/components/mx/Makefile.mx.in 1.1: + * src/components/net/Makefile.net.in 1.1: + * src/components/net/configure 1.1: + * src/components/net/configure.in 1.1: + * src/host_counter.c 1.2: + * src/host_counter.h 1.2: + * src/linux-acpi-memory.c 1.4: + * src/linux-acpi.c 1.18: + * src/linux-acpi.h 1.10: + * src/linux-lmsensors.c 1.4: + * src/linux-lmsensors.h 1.4: + * src/linux-lustre.c 1.4: + * src/linux-lustre.h 1.2: + * src/linux-mx.c 1.17: + * src/linux-mx.h 1.10: + * src/linux-net.c 1.6: + * src/linux-net.h 1.4: + Created new build environment for components + +2010-04-21 bsheely + * src/perfmon.c 1.105: + removed code that was commented out (accidentally uncommented + out on last commit + +2010-04-20 bsheely + * src/freebsd/map-i7.c 1.3: + * src/freebsd/map-i7.h 1.3: + Updated on 3.7 branch + + * src/linux-bgl-events.c 1.4: + * src/linux-bgl-memory.c 1.4: + * src/linux-bgl.c 1.11: + * src/linux-bgl.h 1.4: + * src/linux-ia64.h 1.61: + * src/linux.c 1.77: + * src/papi_events.csv 1.9: + * src/papi_pfm_events.c 1.40: + * src/perf_events.c 1.26: + * src/perf_events.h 1.11: + * src/perfctr-ppc64.c 1.19: + * src/perfctr-x86.c 1.4: + * src/perfmon.h 1.24: + * src/pmapi-ppc64.c 1.11: + * src/solaris-ultra.c 1.128: + Removed code for obsolete platforms + +2010-04-16 jagode + * src/ctests/native.c 1.63: + * src/papiStdEventDefs.h 1.41: + * src/papi_internal.h 1.190: + * src/papi_preset.h 1.22: + * src/papi_protos.h 1.74: + After further investigations of the stack corruption issue on + BGP, the real problem has been nailed down. The size of the + PAPI_event_info_t struct is different on BGP systems which is + due to a bigger PAPI_MAX_INFO_TERMS value. A _BGP was defined at + configure time to differentiate between BGP and other systems. + However, the problem is that a user program does not know this + macro. When PAPI_event_info_t is initialized to zero, the + beginning of the user program's stack frame is zeroed out --> + BAD. It was fun, though. + + * src/aix.c 1.87: + Fixed compilation errors for AIX which were due to missing + inclusion of new header file papi_defines.h. + +2010-04-15 bsheely + * src/freebsd/map-atom.c 1.5: + ... + * src/freebsd/memory.c 1.4: + Added files + +2010-04-09 bsheely + * src/linux-ppc64-memory.c 1.9: + * src/perfctr-ppc32.c 1.11: + * src/perfctr-ppc32.h 1.4: + * src/perfctr-ppc64.h 1.11: + * src/ppc32_events.c 1.8: + * src/ppc64_events.c 1.9: + * src/ppc64_events.h 1.12: + Removed support for ppc32 architectures. Removed support for + perfmon versions older than 2.5 except for Itanium. Removed all + code related to POWER3 and POWER4. + +2010-04-08 bsheely + * src/solaris-niagara2.h 1.5: + Added new include file + + * src/solaris-niagara2.c 1.7: + Removed recently added include file since that file is now + included in the header which is included here + +2010-04-06 jagode + * src/linux-bgp.h 1.4: + Missing declaration of PAPI_MAX_LOCK (fixed for linux-bgp only) + +2010-04-05 bsheely + * src/papi_memory.c 1.23: + Resolved compile warning + + * src/ctests/profile.c 1.60: + Modified code to exit properly on test failure + +2010-04-01 bsheely + * src/ctests/clockcore.c 1.21: + Prevent output after test failure + +2010-03-30 vweaver1 + * src/libpfm-3.y/lib/pfmlib_intel_nhm.c 1.4: + Fix conflict from merge. + + * src/libpfm-3.y/lib/intel_corei7_events.h 1.1.1.6: + * src/libpfm-3.y/lib/pfmlib_itanium2.c 1.1.1.3: + * src/libpfm-3.y/lib/pfmlib_montecito.c 1.1.1.4: + import libpfm CVS + + adds additional i7 model 46 support, fixes ia64 builds + +2010-03-29 bsheely + * src/ctests/pthrtough.c 1.11: + Fixed buffer overflow debug output related to threads.c. Rolled + back change to pthrtough.c + +2010-03-19 bsheely + * src/solaris-ultra.h 1.43: + Add new include for remaining substrates + +2010-03-18 bsheely + * src/ctests/p4_lst_ins.c 1.5: + * src/ftests/native.F 1.56: + * src/p3_pfm_events.c 1.14: + * src/p4_events.c 1.56: + * src/p4_events.h 1.10: + * src/papi_defines.h 1.2: + * src/papi_memory.h 1.12: + * src/perfctr-p3.c 1.95: + * src/perfctr-p3.h 1.52: + * src/perfctr-p4.c 1.109: + * src/perfctr-p4.h 1.47: + * src/perfctr-x86.h 1.2: + Merge bsheely-temp branch by hand + +2010-03-12 vweaver1 + * src/ctests/multiplex1.c 1.53: + * src/ctests/multiplex1_pthreads.c 1.54: + * src/solaris-memory.c 1.14: + Fix PAPI support for solaris-ultra. + This code had not worked for some time. + + * Derived events now work (although the events are still + hard-coded and not read from the csv file) + * Add cache size detection routines + * Fix ntv_code_to_name() + * Modify the multiplex* ctests to use proper events on + UltraSPARC + + All of the regression tests pass except for profile_pthreads. + This is because overflow handling is still partially broken. + +2010-03-05 ralph + * doc/doxygen_procedure.txt 1.1: + doc/doxygen_procedure.txt provides a quick overview of how to use doxygen + for commenting the PAPI code. + + The utilities are now commented, cloning the wiki man pages. + + The high level api is also documented, cloning the wiki again. + In the low level api, PAPI_accum - PAPI_destroy_eventset are documented. + +2010-03-05 bsheely + * src/ctests/thrspecific.c 1.6: + Test now passes while testing the same functionality without + memory leaks + +2010-03-04 vweaver1 + * src/libpfm-3.y/lib/pfmlib_priv.h 1.7: + Fix conflicts from the libpfm import. + + * src/libpfm-3.y/docs/man3/libpfm_westmere.3 1.1.1.1: + * src/libpfm-3.y/examples_v2.x/showevtinfo.c 1.1.1.3: + * src/libpfm-3.y/include/perfmon/pfmlib.h 1.1.1.13: + * src/libpfm-3.y/lib/intel_wsm_events.h 1.1.1.1: + * src/libpfm-3.y/lib/intel_wsm_unc_events.h 1.1.1.1: + * src/libpfm-3.y/lib/pfmlib_common.c 1.1.1.14: + * src/libpfm-3.y/lib/pfmlib_intel_nhm_priv.h 1.1.1.3: + Import latest libpfm, which includes Westmere support + +2010-03-04 bsheely + * src/ctests/fork.c 1.7: + * src/ctests/fork2.c 1.4: + * src/ctests/krentel_pthreads.c 1.8: + * src/ctests/kufrin.c 1.15: + * src/ctests/overflow_pthreads.c 1.43: + * src/ctests/profile_pthreads.c 1.37: + Fixed memory leaks + +2010-03-03 vweaver1 + * src/p3_ath_event_tables.h 1.4: + * src/p3_core_event_tables.h 1.5: + * src/p3_events.c 1.65: + * src/p3_opt_event_tables.h 1.4: + * src/p3_p2_event_tables.h 1.4: + * src/p3_p3_event_tables.h 1.4: + * src/p3_pm_event_tables.h 1.4: + Now that Athlon and Pentium II events use libpfm, remove the old + hard coded event table files. + + * src/perfctr-2.6.x/README 1.1.1.6: + * src/perfctr-2.6.x/patches/patch-kernel-2.6.18-164.el5-redhat 1.1.1.1: + * src/perfctr-2.6.x/patches/patch-kernel-2.6.31 1.1.1.1: + * src/perfctr-2.6.x/patches/patch-kernel-2.6.32 1.1.1.1: + Import of perfctr 2.6.40 + +2010-03-03 bsheely + * src/ctests/clockres_pthreads.c 1.11: + * src/ctests/fork_exec_overflow.c 1.12: + * src/ctests/zero_pthreads.c 1.29: + Fixed memory leaks + +2010-02-24 bsheely + * src/linux-memory.c 1.44: + Removed hack to compile without warnings using Wconversion + +2010-02-23 bsheely + * src/ctests/all_events.c 1.15: + * src/ctests/multiplex2.c 1.36: + * src/ctests/multiplex3_pthreads.c 1.45: + Fixed (debug) compile warnings + +2010-02-22 jagode + * src/.indent.pro 1.1: + ... + * src/utils/version.c 1.4: + Added and applied new PAPI-coding-style profile file + + * src/windows.c 1.6: + Added missing comment closer */ This misindented the rest of the + source code in windows.c + +2010-02-16 terpstra + * src/ctests/prof_utils.h 1.8: + Cleaned up a bunch of implicit type conversions. + +2010-02-15 terpstra + * src/run_tests_exclude.txt 1.7: + Remove the PAPI_set_event_info and PAPI_encode_event API calls, + since they were never supported, and generally come to be + thought of as a bad idea. + + * src/ctests/encode.c 1.7: + * src/ctests/encode2.c 1.5: + Remove the encode and encode2 tests that exercise + PAPI_set_event_info and PAPI_encode_event API calls, since they + were never supported, and generally come to be thought of as a + bad idea. + +2010-01-25 bsheely + * src/examples/PAPI_flips.c 1.4: + * src/examples/PAPI_flops.c 1.4: + * src/examples/PAPI_get_opt.c 1.5: + * src/examples/PAPI_ipc.c 1.4: + * src/examples/PAPI_overflow.c 1.5: + * src/examples/PAPI_profil.c 1.7: + * src/examples/high_level.c 1.4: + * src/examples/locks_pthreads.c 1.3: + * src/examples/overflow_pthreads.c 1.5: + Fixed remaining compile warnings + + * src/examples/sprofile.c 1.5: + Fixed compile warnings + diff --git a/ChangeLogP411.txt b/ChangeLogP411.txt new file mode 100644 index 0000000..d24a5cc --- /dev/null +++ b/ChangeLogP411.txt @@ -0,0 +1,476 @@ +2010-09-30 + + * src/: configure, configure.in: When --with-OS=CLE is enabled, + check the kernel version and use perfmon2 for old kernels and + perf_events for new kernels. + + * src/: configure, configure.in: If no sources of perf counters are + available, then use the generic_platform substrate instead. + + Currently the code would always fall back on perfctr even if no + perfctr support was available. + + * src/: configure, configure.in: If you specify --with-perf-events + or --with-pe-include but the required perf_event.h header is not + available, then have configure fail with an error. + + * papi.spec: Bump version number to 4.1.1 in affected files. Also + bump requirement for kernel from 2.6.31 to 2.6.32. This in prep + for the pending release. + + * src/: configure, Makefile.in, configure.in, papi.h: Bump version + number to 4.1.1 in affected files. This in prep for the pending + release. + + * INSTALL.txt: Hope this late commit doesn't interfere with + anything. This updates the INSTALL.txt to reflect all of the + improvements we've made to perf_event support since the last + release. + +2010-09-29 + + * src/Rules.pfm: The -Werror problem was still occurring on + ia64/perfmon compiles, as I hadn't updated Rules.pfm + + * src/: configure, configure.in, perf_events.c, perf_events.h, + sys_perf_counter_open.c, sys_perf_event_open.c, syscalls.h: + Remove support for the perf_counter interface in kernel 2.6.31. + Now supports only the perf_event interface in kernel 2.6.32 and + above. + +2010-09-22 + + * src/perf_events.c: Attempt to add mmtimer support to perf_events + substrate. + + * src/: multiplex.c, papi.c, papi_protos.h: The multiplex code + currently does not make a final adjustment at the time of + MPX_read(). This is to avoid the case where counts could be + decreasing if you have multiple reads returning estimated values + before the next actual counter read. + + While this code works to keep the results non-decreasing, it can + cause significant differences from expected results for final + reads, especially if many counters are being multiplexed. + + This is seen in the sdsc-mpx test. It was failing occasionally + on some machines by having error of over 20% (the cutoff for a + test error) when multiplexing 11 events. + + What this fix does is to special case the PAPI_stop() case when + multiplexing is enabled, having the PAPI_stop() do a final + adjustment. The intermediate PAPI_read() case is not changed. + + This fixes the sdsc-mpx case, while still passing the mendes-alt + case (which checks for non-decreasing values). + + There is a #define that can be set in multiplex.c to restore the + previous behavior. + + * src/ctests/mendes-alt.c: This is our only test that checks to see + if multiplexed values are non-decreasing or not. Unfortunately + the test currently doesn't fail if values do go backward. + + This change causes the test to fail if it finds multiplexed + counts that decrease. + +2010-09-17 + + * src/libpfm-3.y/: config.mk, lib/intel_wsm_events.h: Fix conflicts + from merge. + +2010-09-15 + + * src/: Makefile.inc, Rules.perfctr-pfm, Rules.pfm_pe: Finally fix + the -WExtra problem. + + The issue was -WExtra was being passed to libpfm, but only in the + case where the user had a CFLAGS env variable. + + It turns out this is due to the following from section 5.7.2 of + the gmake manual: Except by explicit request, make exports a + variable only if it is either defined in the environment + initially or set on the command line, + + And the fix is also described: If you want to prevent a + variable from being exported, use the unexport directive, + + So I've added an "unexport CFLAGS" directive, which seems to be + the right thing as our Makefile explicitly passes CFLAGS to the + sub-Makefiles that need it. This seems to fix the build. + +2010-09-13 + + * src/libpfm-3.y/: docs/man3/libpfm_westmere.3, + lib/intel_wsm_events.h, lib/intel_wsm_unc_events.h, + lib/pfmlib_intel_nhm.c, lib/pfmlib_priv.h: Fix the missing files + from the import (CVS claims this as a "conflict") + +2010-09-08 + + * src/Makefile.inc: Fixed the recipies for [c|f]tests and utils. + $(LIBRARY) => $(papiLIBS) (this way we don't build libpapi.a if + we won't want it) + +2010-09-03 + + * src/ctests/sdsc.c: Had a "%d" instead of "%lld" in that last + commit. + + * src/ctests/sdsc.c: Give a more detailed error message on the + sdsc-mpx test. + + We're seeing sporadic failures (probably due to results being + close to the threshold value) but it's hard to tell on buildbot + which counter is failing because the error message didn't print + the value. + +2010-09-02 + + * src/papi.c: Remove code that reported ENOSUPP if HW multiplexing + is not available. + + PAPI can automatically perform SW multiplexing if HW is not + available. + + With this part of my previous multiplexing patch reverted, + multiplexing seems to work even on 2.6.32 perf_events (by + reverting to SW mode on those machines) + +2010-08-31 + + * src/perf_events.c: Explicitly set the disabled flag to zero in + perf_events for new events. + + It was possible with an event set that if you removed an event + then added a new one that the disabled flag was obtaining the + value from the previously removed event. + + This fix doesn't seem to break anything, but the code involved is + a bit tricky to follow. + + This fixes the sdsc4-mpx test on sol. + + * src/components/coretemp/: Rules.coretemp, linux-coretemp.c, + linux-coretemp.h: Initial stab at a coretemp component. + + This component exposes every thing that looks like a useful file + under /sys/class/hwmon. + +2010-08-30 + + * src/perf_events.c: F_SETOWN_EX is not available until 2.6.32, so + don't use it unless we are running on a recent enough kernel. + + * src/perf_events.c: Pentium 4 was not supported by perf_events + until version 2.6.35. Print an error if we attempt to use it on + an older kernel. + +2010-08-27 + + * src/ctests/overflow_allcounters.c: The "overflow_allcounters" + test failed on perfmon2 kernels because the behavior of a + counter on overflow differs between the various substrates. + + Therefore detect if we're running on perfmon2 and print a + warning, but still pass the test. + + * src/libpfm-3.y/lib/: intel_wsm_events.h, intel_wsm_unc_events.h, + pfmlib_intel_nhm.c, pfmlib_priv.h: updating + + * src/libpfm-3.y/docs/man3/libpfm_westmere.3: removing westmere + documentation + + * src/perf_events.c: Fix warning in compile due to missing + parameter in a debug statement. + + * src/ctests/test_utils.c: In the ctests, test_skip() was + attempting a PAPI_shutdown() before exiting. On multithreaded + tests (that had already spawned threads before the decision to + skip) this really causes the programs to end up confused and + reports spurious memory errors. + + So remove the PAPI_shutdown() from test_skip(). There's a + comment in test_fail() that indicates this was already done + there for similar reasons. + +2010-08-26 + + * src/ctests/byte_profile.c: byte_profile was failing on systems + where fp_ops is a derived event. + + modify the test so it gives a warning instead of failing and + avoids using the derived event. + + * src/perf_events.c: At PAPI_stop() time a counter with overflow + enabled is being adjusted by a value equal to the sampling + period. + + It looks like this isn't needed (and is generating an overcount + that breaks overflow_allcounters). + + I'm still checking up on this code; if it turns out to be + necessary I may have ro revert this later. + + * src/ctests/overflow_allcounters.c: Add validation check to + overflow_allcounters + + It turns out perf_event kernels overcount overflows for some + reason, while perfctr doesn't. I'm investigating. + + * src/ctests/: overflow_allcounters.c, papi_test.h, test_utils.c: + On Power5 and Power6, hardware counters 5 and 6 cannot generate + interrupts. + + This means the overflow_allcounters test was failing because + overflow could not be generated for events 5 and 6. + + Add code that special cases Power5 and Power6 for this test (and + generate a warning) + + * src/perf_events.c: Change some debug messages to be warnings + instead of errors. + + * src/: papi.c, ctests/second.c: Fix ctests/second on bluegrass + (POWER6) + + The test was testing domains by trying + PAPI_DOM_ALL^PAPI_DOM_SUPERVISOR in an attempt to turn off the + SUPERVISOR bit. This fails on Power6 as it leaves the + PAPI_DOM_OTHER bit set, which isn't allowed. + + How did the test earlier measure PAPI_DOM_ALL then, which has all + bits set? Well it turns out papi.c silently corrects + PAPI_DOM_ALL to be available_domains. But if you fiddle any of + the bits this correction is lost. This is probably not the + right thing to do, but the best way to fix it is not clear. + + For now this modifies the "second" test to clear the DOM_OTHER + bit too if the domain setting fails with it set. + +2010-08-25 + + * src/: papi.c, papi.h, perf_events.c, ctests/kufrin.c, + ctests/mendes-alt.c, ctests/multiplex1.c, + ctests/multiplex1_pthreads.c, ctests/multiplex2.c, + ctests/multiplex3_pthreads.c, ctests/sdsc.c, ctests/sdsc2.c, + ctests/sdsc4.c, ftests/fmultiplex1.F, ftests/fmultiplex2.F: Add + support for including the OS version in the component_info_t + struct. + + Use this support under perf_events to disable multiplexing + support if the kernel is < 2.6.33 + + Modify the various multiplexing tests to "skip" if they get a + PAPI_ENOSUPP when attempting to set up multiplexing. + + * src/ctests/all_native_events.c: Update all_native_events ctest to + print warning in the case where we skip events because they + aren't implemented yet (offcore and uncore mostly). + +2010-08-24 + + * src/ctests/: papi_test.h, profile.c, test_utils.c: Adds a new + "test_warn()" function for the ctests. + + This allows you to let tests pass with a warning. + + This is useful in cases where you don't want to forget that an + option needs implementing, but that the feature being missed + isn't important enough to fail the test. + + The first user of this is the "profile" test. We warn that + PAPI_PROFIL_RANDOM is not supported on perf_events. + + * src/perf_events.c: From what I can tell, on perf_events the + overflow PAPI_OVERFLOW_FORCE_SW case was improperly falling + through in _papi_pe_dispatch_timer() to also run the HARDWARE + code. + + This meant that we were attempting to read non-existant hardware + overflow data, causing a lot of errors to be printed to the + screen. + + This shows up in the overflow_force_software test + + * src/ctests/: ipc.c, multiplex2.c, multiplex3_pthreads.c, + test_utils.c: Some minor changes to the ctests. + + + ipc -- fail if the reported IPC value is zero + multiplex2 -- + fail if all 32 counter values report as zero + multiplex3_pthread + -- give up sooner if each counter returns zero. otherwise + the test can take upwards of an hour to finish and makes the + fan on my laptop sound like it's going to explode in the + process + +2010-08-20 + + * src/Makefile.inc: Disable CFLAGS += $(EXTRA_CFLAGS) (-Wextra) for + now. This will get buildbot running again, and if I can manage + to figure out exactly what the Makefiles are doing I'll re-enable + it again. + + * src/perf_events.c: Add support for Pentium 4 under perf events. + This requires a 2.6.35 kernel. On p4 perf events requires a + special format for the raw event, so we modify the results from + libpfm3 to conform to what the kernel expects. + + * release_procedure.txt: release_procedure updated to reflect files + to keep under /doc + +2010-08-18 + + * src/perf_events.c: Patch from Gary Mohr that allows PAPI on perf + events to catch permissions problems at the time of + configuration, rather than only appearing once papi_start() is + called. + + Quick summary of changes: + Adds a check_permissions() routine + PERF_COUNT_HW_INSTRUCTIONS is used as the test event. + + check_permissions() is called during PAPI_ATTACH, + PAPI_CPU_ATTACH and PAPI_DOMAIN + Various "ctl" structures + renamed "pe_ctl" + Some minor debug changes + +2010-08-05 + + * src/perf_events.c: Use F_SETOWN_EX instead of F_SETOWN in + tune_up_fd() + + This fixes a multi-thread overflow bug found with the Rice + test-suite. + + F_SETOWN_EX doesn't exist until Linux 2.6.32. We really need + some infrastructure that detects the running kernel at init time + and warns that things like F_SETOWN_EX, multiplexing, etc., are + unavailable if the kernel is too old. + +2010-08-04 + + * src/: Makefile.inc, cpus.c, cpus.h, genpapifdef.c, papi.c, + papi.h, papi_defines.h, papi_internal.c, papi_internal.h, + perf_events.c, perf_events.h, threads.h: This is the + PAPI_CPU_ATTACH patch from Gary Mohr that also fixes a problem + with multiple event sets on perf events. + + Changes by file: + + papi.h + + Add PAPI_CPU_ATTACHED + + Add strutctures needed for CPU_ATTACH + Makefile.in + + include the new cpus.c file + papi_internal.c + + add call to _papi_hwi_shutdown_cpu() in + _papi_hwi_free_EventSet() + + make remap_event_position() non-static + + add_native_events() and remove_native_events() use + _papi_hwi_get_context() + + _papi_hw_read() has some whitespace and debug message + changes, + and removes an extraneous loop index + papi_internal.h + + a new CPUS_LOCK is added + + cpuinfo struct added to various structures + + an inline call called _papi_hwi_get_context() added + perf_events.h + + a cpu_num field added to control_state_t + perf_events.c + + open_pe_events() allows per-cpu counting, + additional debug was added + + set_cpu() function added + + new debug messages in set_granularity() and + _papi_pe_read() + + _papi_pe_ctl() has PAPI_CPU_ATTACH code added + + _papi_pe_update_control_state() has the default domain + set to be PAPI_DOM_USER instead of pe_ctl->domain + genpapifdef.c + + PAPI_CPU_ATTACHED added + threads.h + + an ESI field added to ThreadInfo_t + papi.c + + many new ABIDBG() debug messages added + + PAPI_start() updated to check for CPU_ATTACH conflicts, + has whitespace fixes, gets context now, + if dirty calls update_control_state() + + PAPI_stop(), PAPI_reset(), PAPI_read(), PAPI_read_ts(), + PAPI_accum(), PAPI_write(), PAPI_cleanup_eventset(), + all use _papi_hwi_get_context() to get context + + PAPI_read() has some braces added + + PAPI_get_opt() and PAPI_set_opt() have CPU_ATTACHED code + added. + + PAPI_overflow() and PAPI_sprofil() now report errors if + CPU_ATTACH enabled + cpus.c, cpus.h + + New files based on threads.c and threads.h + + I made some additional changes, based on warnings given by gcc + + Added a few missing function prototypes in cpus.h + Update + PAPI_MAX_LOCK as it wasn't increased to handle the new + addition of CPUS_LOCK + Removed various variables and functions + reported as being unused. + +2010-08-03 + + * src/: papi_internal.h, papi_lock.h: The option + --with-no-cpu-counters was not supported on AIX. This has been + fixed and works now. Also the get_{real|virt}_{cycles|usec} + implementations for AIX (checked in Jul 29) have now been tested + and work correctly. + +2010-07-29 + + * src/: configure, configure.in, papi_lock.h, papi_vector.c: Added + AIX support for the get_{real|virt}_{cycles|usec} functions +++ + Fortran tests are now compiling on AIX. Wrong compiler flags were + used for the AIX compilers. + +2010-07-26 + + * src/papi_events.csv: add PAPI_L1_DCM for atom + + * src/x86_cache_info.c: Update the x86 cache_info table. + + The data from this table now comes from figure 3-17 in the + Intel Architectures Software Reference Manual 2A (cpuid + instruction section) + + This fixes an issue on my Atom N270 machine where the L2 cache + was not reported. + +2010-07-16 + + * INSTALL.txt, src/perf_events.c, src/perf_events.h: Perf Events + now support attach and detach. + + The patch for supporting this was written by Gary Mohr + + * src/papi_events.csv: Add a few missing events to Nehalem, based + on reading Intel Volume 3b. + + * src/papi_events.csv: Fix Westmere to not use L1D_ALL_REF:ANY + + I tested this on a Nehalem which has the proper behavior, + unfortunately no Westmere here to test on. + + * src/: papi_events.csv, papi_pfm_events.c, perfctr-x86.c: Enable + support for having more than one CPU block with the same name in + the .csv file. This allows easier support for sharing events + between similar architectures. + + I *think* this is needed and *think* it shouldn't break anything, + but I might have to back it out. + + Also fixes event support for Pentium Pro / Pentium III/ P6 on + perfmon2 and perf events kernels. + + Also fixed some confusion where perfctr called chips "Intel Core" + meaning Core Duo wheras pfmon called "Intel Core" meaning + Core2. + + This was tested on actual Pentium Pro and PIII hardware (as well + as on a few Pentium 4 machines plus a Core2 machine) + +2010-07-02 + + * src/: papi_hl.c, ctests/api.c: Added remaining low-level api + tests diff --git a/ChangeLogP412.txt b/ChangeLogP412.txt new file mode 100644 index 0000000..bfbe4a3 --- /dev/null +++ b/ChangeLogP412.txt @@ -0,0 +1,517 @@ +2011-01-17 + + * src/configure: Ran autoconf to generate updated configure file. + +2011-01-16 + + * src/components/README: Adding a component for the FreeBSD OS that + reports the value of the thermal sensors available in the Intel + Core processors. There are as many counters as cores, and the + value reported by each counter is in Kelvin degrees. + + * src/freebsd.c: Implemented missing + _papi_freebsd_ntv_name_to_code. + + * src/: Makefile.in, Makefile.inc, configure.in, ctests/Makefile: + Fix dependency on -ldl + + Now configure checks if dl* symbols are in the base system + libraries (i.e., no -ldl needed). If so, avoid adding -ldl to + shlib example. + + If dl* symbols are not find in the base system libraries, then + check for -ldl, and if it exists, pass it to ctests/Makefile + through Makefile. If -ldl is not found, fail at configure time. + + * src/ctests/multiattach.c: Fix to compile in FreeBSD. + + * src/: freebsd-memory.c, freebsd.c: Code cleanup. + +2011-01-14 + + * src/: perf_events.c, perfmon.c: [PATCH 18/18] papi: make + _perfmon2_pfm_pmu_type variable static + + In perf_events.c and perfmon.c the variable + _perfmon2_pfm_pmu_type is used locally only, making it static. + + Signed-off-by: Robert Richter + + * src/: linux-bgp.c, linux-ia64.c, perf_events.c, perfctr.c, + perfmon.c: [PATCH 17/18] papi: remove inline_static macro in + Linux only code + + We better replace the macro with 'static inline'. Not sure if + this works for all compilers, so doing it for Linux only files. + + Signed-off-by: Robert Richter + + * src/x86_cache_info.c: [PATCH 16/18] papi: remove static inline + function declaration + + By moving the static inline function cpuid() to the begin of the + file we may remove its declaration. + + Signed-off-by: Robert Richter + + * src/linux.h: [PATCH 15/18] papi: remove unused linux.h header + file + + This file is included nowhere, removing it. + + Signed-off-by: Robert Richter + + * src/linux-ia64.c: [PATCH 14/18] papi: fix array out of bounds + access + + Fixing the following warning: + + linux-ia64.c: In function ?_ia64_init_substrate?: + linux-ia64.c:1123:22: warning: array subscript is above array + bounds + + Signed-off-by: Robert Richter + + * src/: configure, configure.in: [PATCH 13/18] papi: remove + unnecassary checks in configure.in + + The check is obsolete and covered by default. + + Signed-off-by: Robert Richter + + * src/: papi_pfm_events.c, perf_events.c, perfmon.c, perfmon.h: + [PATCH 12/18] papi: include perfmon header files only where + necessary + + This patch includes perfmon header files only where necessary. + Declarations in perfmon/perfmon.h are never used, removing its + inclusion. Itanium header files are needed only in perfmon.c and + perf_events.c. + + Signed-off-by: Robert Richter + + * src/: papi_pfm_events.c, perfctr-x86.c: [PATCH 11/18] papi: make + some functions in papi_pfm_events.c static + + Functions _pfm_decode_native_event() and _pfm_convert_umask() are + internally used only. Remove export declaration and make it + static. + + Signed-off-by: Robert Richter + + * src/: Rules.pfm, linux-ia64-pfm.h, linux-ia64.c, pfmwrap.h: + [PATCH 10/18] papi: rename pfmwrap.h -> linux-ia64-pfm.h + + pfmwrap.h actually only contains IA64 code included by + linux-ia64.c. Rename it to linux-ia64-pfm.h. + + Signed-off-by: Robert Richter + + * src/: linux-ia64.c, pfmwrap.h: [PATCH 09/18] papi, linux-ia64: + make inline functions static + + Inline functions should be static. Fixing it. + + Signed-off-by: Robert Richter + + * src/: linux-ia64.c, papi_pfm_events.c: [PATCH 08/18] papi: fix + _papi_pfm_ntv_name_to_code() function interface + + The function is supposed to return a PAPI error code which is an + integer. Make the function's return code an integer too. + + Signed-off-by: Robert Richter + + * src/perfctr-ppc64.c: [PATCH 07/18] papi: fix spelling modifer -> + modifier + + Fix spelling: modifer -> modifier + + Signed-off-by: Robert Richter + + * src/: linux-ia64.c, papi_pfm_events.c, papi_pfm_events.h, + perf_events.c, perfctr-x86.c, perfmon.c: [PATCH 06/18] papi: + define function interface in papi_pfm_events.h + + The header file should define the interface that + papi_pfm_events.c provides. Declarations used internally only in + papi_pfm_events.c are moved there. Now papi_pfm_events.h only + contains the prototype functions. Remapping of definitions is + removed too. This cleanup removes duplicate code and better + defines the interface. + + Signed-off-by: Robert Richter + + * src/: Rules.perfctr, Rules.perfctr-pfm, linux.c, multiplex.c, + papi_vector.c, perfctr-x86.c, perfctr.c, ctests/test_utils.c: + [PATCH 05/18] papi: rename linux.c -> perfctr.c + + The name of linux.c is misleading, it only implements perfctr + functionality. Thus renaming it to perfctr.c. + + Signed-off-by: Robert Richter + + * src/: papi_pfm_events.c, perfctr-x86.c: [PATCH 04/18] papi: make + _papi_pfm_init() static by moving it to perfctr-x86.c + + _papi_pfm_init() is only used in perfctr-x86.c but implemented in + papi_pfm_events.c. Move it to perfctr-x86.c and make it static. + + Signed-off-by: Robert Richter + + * src/perfmon.c: [PATCH 03/18] papi: make some functions static in + perfmon.c + + The functions are only used in perfmon.c, making it static. + + Signed-off-by: Robert Richter + + * src/: Rules.pfm, Rules.pfm_pe: [PATCH 02/18] papi: do not compile + libpfm examples to support cross compilation + + Signed-off-by: Robert Richter + + * src/Rules.pfm: To cross compile papi we need to pass the + architecture to libpfm. Otherwise it will be confused and tries + to build the host's make targets with the cross compiler ending + up in the following error: + + pfmlib_amd64.c: In function ?cpuid?: + pfmlib_amd64.c:166:3: error: impossible register constraint in + ?asm? + pfmlib_amd64.c:172:1: error: impossible register constraint in + ?asm? + make[2]: *** [pfmlib_amd64.o] Error 1 + + Signed-off-by: Robert Richter + + * src/ctests/Makefile: Temporarily back out the FreeBSD makefile + change that breaks the build so that I can properly test some + other changes. + + * src/papi_events.csv: Change the Core2 L1_TCM preset to be + LLC_REFERENCES + + The current event (L2_RQSTS:SELF:MESI) returns an event + equivelent to LLC_REFERENCES on libpfm3, but in libpfm4 + L2_RQSTS:SELF:MESI maps instead to L2_RQSTS:SELF:MESI:ALL which + counts prefetches too. + + By moving to LLC_REFERENCES both libpfm3 and libpfm4 count the + proper value. This also makes the "tenth" benchmark pass when + using PAPI/libpfm4. + + * src/configure: Update to match current configure.in + + * src/ctests/Makefile: Fix the if / fi syntax of the last change. + +2011-01-13 + + * src/: Makefile.inc, configure.in, freebsd-memory.c, freebsd.c, + ctests/Makefile, ctests/zero_attach.c: Changes from Harald Servat + for freebsd support. Note that configure has not been regenerated + from this version of configure.in. + + * papi.spec, doc/Doxyfile, doc/Doxyfile-everything, + src/Makefile.in, src/configure.in, src/papi.h: Change version + numbers to 4.1.2 in preparation for a release. + +2011-01-12 + + * src/ctests/code2name.c: The code2name test was assuming that the + native events start right at PAPI_NATIVE_MASK. We specifically + document elsewhere this might not be the case, and indeed for the + libpfm4 code this fails. + + This fix changes the code to properly enunmerate the native + events for the test. + +2011-01-06 + + * src/: papi.c, papi_internal.c: Fix a long-standing bug where we + were walking off the end of the EventInfoArray in + remap_event_position(). + + This was noticed by Richard Strong when instrumenting some of the + PARSEC benchmarks. + + In papi_internal.c in the remap_event_position() function we have + the loop + + for ( i = 0; i <= total_events; i++ ) { + + It seems weird that we are doing a <= compare, and in fact this + is why we + walk off the end of the array sometimes. But why only somtimes? + If I + change that <= to a < then many of the regression tests fail. + + It turns out that the two calls to remap_event_position() in + papi_internal.c are called with ESI->NumberOfEvents being one + less + than it should be, as it is incremented after the + remap_event_position() + call (though the new events are added before the call). This is + why + <= is used. + + However the call in PAPI_start() happens with + ESI->NumberOfEvents + with the right value. In this case < should be used. + + The fix I've come up with has a NumberOfEvents value passed in + as a + parameter to remap_event_position(). This way the value+1 can + be passed in the former cases. + +2010-12-20 + + * src/aix.c: Problem on POWER6 with AIX: pm_initialize() cannot be + called multiple times with PM_CURRENT. Instead, use the actual + proc type - here PM_POWER6 - and multiple invocations are no + longer a problem. Ctests/multiplex1.c passes now. + +2010-12-15 + + * src/run_tests.sh: If we don't run any tests, get buildbot's + attention. + +2010-12-14 + + * src/aix.c: number_of_nodes var was set to zero in + _aix_get_system_info. This caused the papi utilities to report + that the number of total CPUs is zero. This also caused + ctests/hwinfo to fail on POWER6 with AIX. + +2010-12-13 + + * src/papi_internal.h: Slight re-ordering of the no_vararg_macro + debug statements. (I actually tested the changes with + --with-debug and without on aix) + +2010-12-10 + + * src/run_tests.sh: Change the syntax on our find command to be + more posix compliant. + + GNU is Not UNIX, cute acronym or massive compatibility + conspiracy. I fall back to posix, you decide! + + * src/: configure, configure.in: Update configure file to be aware + of the existence of AIX-Power7. + + PAPI still won't build, but it gets further than before. + +2010-12-09 + + * src/run_tests.sh: Make our grep invocation posix compliant. + (--invert-match == -v & --regex == -e ) + + * src/ctests/overflow_allcounters.c: Separate 'indent' check-in so + that the previous modifications are comprehensible :) + + * src/ctests/overflow_allcounters.c: The overflow_allcounters test + failed on Power6 with AIX (pmapi) but passes on Power6 with linux + (perf_events | perfctr). Therefore detect if we're running on + AIX, print a warning, but still pass the test. + + * src/run_tests.sh: Move away from echo -n to the shell builtin + printf (echo -n is not portable) + + non-argumented instances of echo are fine. + + * src/run_tests_exclude.txt: Skip the non-test ctests/burn + executable. + + * src/Matlab/: PAPI_Matlab.c, PAPI_Matlab.readme: Change + documentation for matlab integration to reflect the need to link + to the libpapi.so library and not the static one. + + Also listed me and the ptools-perfapi list as points of contact + for future questions *gulp* + +2010-12-08 + + * src/: configure, configure.in, run_tests.sh: Clean up (purge) + references to libpfm-2.x in configure and run_tests.sh + + * src/Matlab/PAPI_Matlab.c: MATLAB fixups: Calls to PAPI('stop') + now stop counting even if we ignore the return values. + + * src/Matlab/PAPI_Matlab.c: Fixup for papi matlab integration. + + Calls to PAPI('stop') don't cause errors now. If you call + PAPI('stop') with out capturing its return value, it does + nothing. + + * src/Matlab/PAPI_Matlab.c: mex does not like c++ style comments + (double-backslash) + +2010-12-06 + + * src/solaris-ultra.c: Resolved a couple type cast warnings. Also + initialized a variable and enabled GET_OVERFLOW_ADDRESS code in + two places. The overflow test suite still has a number of + failures and is disabled in configure. + +2010-11-24 + + * src/papi_internal.h: That last commit was lacking in + creativity... By having the debug function names still a macro, + we get all the goodness of __FILE__ etc bing in the right place + and still not using variadic macros. + + #define SUBDBG do{ if (_papi_hwi_debug & DEBUG_SUBSTRATE ) + print_the_label; } while (0); _SUBDBG was the clever line that + eluded me yesterday. + +2010-11-23 + + * src/papi_internal.h: Turns out that when DEBUG and + NO_VARARG_MACRO are true, we didn't correctly implement + component-level debug functions. This change uses variable + argument lists ( man stdarg) to correctly handle this case. ( + papi_internal.h defines these) + + Note that debugging information is not completly useful; due to + functions which use variable argument lists not being inlinable ( + the inline keyword is afterall only a sugestion), all messages + appear to come from papi_internal.h:PAPIDEBUG:525:22619 and I am + not clever enough to get around that in general right now. + + Thanks to Maynard Johnson for reporting. + + * src/papi_events.csv: Enable the PAPI_HW_INT event on Nehalem, as + tests show the HW_INT:RCV event is the proper one to use here. + +2010-11-22 + + * src/papi_events.csv: Update the preset events for Nehalem, as + contributed by Michel Brown. + +2010-11-19 + + * src/: perf_events.h, perf_events.c: Address problem with overflow + handler continuing to count events. Add overflow status field to + determine if an event set has any events enabled for overflow. + Use IOC_REFRESH instead of IOC_ENABLE when overflowing. + Implement IOC_REFRESH at end of overflow handler. None of this + worked. Also implemented an IOC_DISABLE at top of overflow + handler. That worked, even though it's suboptimal. + +2010-11-17 + + * src/utils/command_line.c: test_fail_exit() substituted for + test_fail(). This became necessary because + PAPI_event_name_to_code now returns a PAPI_EATTR error if the + base name matches but attribute names don't. This utility was + producing an error message and then running the test. Perfctr + implementations will happily add a base name with no umasks and + then generate 0 counts. This fix prevents that behavior. + + * src/ctests/test_utils.c: Rewrite of test_fail_exit() to call + test_fail(). It should be noted that test_fail_exit() behaves the + way test_fail() used to behave, i.e. it exits after printing the + fail message. However, test_fail no longer exits as that was + causing problems with multi-threaded tests not freeing memory. In + those cases where an exit is desired, calls to test_fail_exit() + should be substituted for calls to test_fail(). + + * src/: papi.h, papi_data.c, papi_pfm_events.c, perfmon.c: Added 3 + new error codes: PAPI_EATTR, PAPI_ECOUNT, and PAPI_ECOMBO. These + map onto equivalent errors in libpfm and are provided to give + more detail on failures in libpfm calls. A new error mapping + function has been added to papi_pfm_events.c to map libpfm errors + to PAPI errors, and this function is employed in the + compute_kernel_args function in perfmon.c. It could also be + deployed elsewhere, but so far is not. + +2010-11-09 + + * src/x86_cache_info.c: The cpuid change yesterday broke + compilation on a 32-bit Pentium 3. Fix the inline assembly to + compile properly there too. + +2010-11-08 + + * src/: configure, configure.in: Fix configure script to properly + detect Pentium M machines. + + * src/x86_cache_info.c: Add cpuid leaf4 cache detection support. + + This has been available on intel processors since Late model P4s + and all Core2 and newer. It returns cache info in a different + way than the older leaf2 method. + + Currently we only use leaf4 data if the leaf2 results tell us to + (apparently Westmere does that). Otherwise we use the old + method. + + It might be interesting to use more of the leaf4 info. It can + tell us things such as how many processors share a socket, how + many processors share a cache, and info on the inclusivity of a + cache. + + * src/: linux.c, perfctr-x86.c: Add perfctr Westmere support. + + * src/perfctr-2.6.x/: patches/aliases, usr.lib/Makefile: Fix + conflicts from perfctr merge. + +2010-11-06 + + * src/perf_events.c: Replace KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN + with the proper dynamic kernel version number checking. This + should be the last place in our perf_events code that was using a + hard-coded rather than dynamic check for a kernel-version related + bugfix. + + * src/perf_events.c: This patch allows PAPI to read multiple events + at a time out of the kernel when the kernel is new enough (2.6.34 + or newer). The previous code required setting a #define by hand + to get this behavior, this new code picks the proper way to do + things based on the kernel version number. + + The patch was supplied by Gary Mohr + +2010-11-04 + + * src/: linux.c, perfctr-x86.c: Replace occurrances of + PERFCTR_X86_INTEL_COREI7 with PERFCTR_X86_INTEL_NHLM as the + former has been documented as being deprecated as of perfctr + 2.6.41. + +2010-11-03 + + * src/cycle.h: Change "unicos" to "CLE" since "unicos" no longer + exists. + +2010-10-26 + + * src/examples/locks_pthreads.c: Add a call to PAPI_thread_init(), + Thanks to Martin Schindewolf for pointing this out. + +2010-10-21 + + * src/: papi.c, components/lmsensors/linux-lmsensors.h: Fixup url's + that checkbot was finding in error. + +2010-10-05 + + * src/ctests/: multiattach.c, zero_attach.c: The zero_attach and + multiattach were forking before off children before testing + that PAPI in fact is available. Then when PAPI_init() failed + the children weren't being cleaned up properly. This was + confusing build bot. This changeset moves the fork to after the + check plus do a fail_exit() on failure. + + * src/: configure, configure.in: Solaris build will fail if + /usr/ccs/bin isn't in the path. Have it check there for "ar" on + Solaris systems if it can't be found by normal methods. + + * src/: configure, configure.in: Only run the EAR tests on itanium + systems. + + * src/: configure, configure.in: Pentium4-perfctr was skipping most + of the CTESTS. Make sure they are all run. + + diff --git a/ChangeLogP4121.txt b/ChangeLogP4121.txt new file mode 100644 index 0000000..fd1f31f --- /dev/null +++ b/ChangeLogP4121.txt @@ -0,0 +1,8 @@ +2011-01-20 + + * src/papi_events.csv: + Remove HW_INT:RCV event that was mistakenly enabled for Westmere + + + + diff --git a/ChangeLogP413.txt b/ChangeLogP413.txt new file mode 100644 index 0000000..8bf5841 --- /dev/null +++ b/ChangeLogP413.txt @@ -0,0 +1,598 @@ +2011-05-10 + + * src/Rules.pfm_pe: The --with-bitmode parameter was not being + passed along to libpfm3, so it was not possible to build + perf_event PAPI on non-default bitmodes. This change passes + along the $(BITFLAGS) value to the libpfm3 make invocation. + + * src/: papi_pfm_events.c, papi_pfm_events.h, perf_events.c: The + perf_events code was using __u64 instead of uint64_t and this was + causing a warning when compiling for 64-bit Power. + + * src/libpfm-3.y/lib/amd64_events_fam15h.h: Added Robert Richter's + patch with a few new events for AMD Family 15h. + +2011-05-06 + + * INSTALL.txt: Load the 'gcc' module not 'gnu' module for Cray. + + * INSTALL.txt: Update the install instructions for Cray XT and XE + systems. + + * src/ctests/: multiattach.c, multiattach2.c: Make the multiattach + and multiattach2 failures into warnings. + + I have a proposed fix that makes the failures go away, but it has + not been tested much and also causes some new fcntl() error + messages under perfctr. + + So temporarily make the tests only warn for the release and I'll + work on a proper fix for after. The behavior in these tests has + been broken for a long time so it is not a recent regression. + + * src/papi_memory.c: Band-aid for the leak debugging statement in + papi_memory.c on NO_VARARG_MACRO systems. (aix currently) + +2011-05-05 + + * src/ctests/multiattach.c: Had the division backwards on the + validation. + + * src/ctests/multiattach.c: Update the multiattach test to fail if + the results aren't in the proper ratio. This was failing on + perf_event kernels but since the results weren't checked it was + never reported as an error. + + * delete_before_release.sh: delete cvs2cl.pl before release + + * ChangeLogP413.txt: First cut change log for the 4.1.3 release. + Nothing's frozen yet... + + * cvs2cl.pl: Perl script to generate change logs. Keeping it with + the project makes life easier. + + * INSTALL.txt: Change INSTALL to reflect that we support power7. + + * src/Makefile.in, src/configure, src/configure.in, src/papi.h, + doc/Doxyfile, doc/Doxyfile-everything, papi.spec: Modfy version + number for pending release: 4.1.3.0 + +2011-05-03 + + * src/: papi_internal.c, papi_internal.h, sys_perf_event_open.c, + ctests/attach2.c: Cleanup the _papi_hwi_cleanup_eventset() + function in papi_internal.c + + This function was re-using existing functionality to remove one + event at a time before cleaning out the eventset. This is not + strictly necessary and was breaking on perf_event eventsets that + were attached to finished processes, as a call to + update_control_state() would close/reopen the perf_event fd, + failing when the finished process went away after the close. + + The new code removes all events from the eventset in one go + before calling update_control_state. + + The change here also updates code comments as necessary, as some + of the code in papi_internal.c can be a bit obscure. + + It also updates some of the comments in ctests/attach2.c to give + better debugging info. + +2011-04-28 + + * src/threads.c: Uncomment the actual signal passing functionality + in _papi_hwi_broadcast_signal + + * src/papi_debug.h: Include files added to papi_debug.h + + * src/components/README: Added detailed instructions on how to + build PAPI with the CUDA component + +2011-04-27 + + * src/threads.c: Move an escape test to the outer loop in + _papi_hwi_broadcast_signal. + + This cleans up an infinite loop where before we would only break + out of the component look, not the thread list walking loop. + + * src/: papi.c, papi_internal.c, papi_internal.h, papi_protos.h: + Clean up papi_internal.c so that functions not used outside are + marked static. + + * src/: papi_pfm_events.c, papi_preset.c, pmapi-ppc64_events.c: + papi: Fix some memory leaks + + Signed-off-by: Robert Richter + + * src/perf_events.c: papi: Make functions and variables static in + perf_events.c + + All this functions and variables are not used outside + perf_events.c. Making them static. + + Signed-off-by: Robert Richter + + * src/papi_pfm_events.c: papi: Fix crash in error handler for + pfm_get_event_code_counter() + + Signed-off-by: Robert Richter + + * src/utils/native_avail.c: papi: Fix error check in native_avail.c + + Signed-off-by: Robert Richter + +2011-04-26 + + * src/libpfm-3.y/: include/perfmon/pfmlib_amd64.h, + lib/pfmlib_amd64.c: AMD architectural PMU could not be detected + for family 15h as there was a strict check for AMD family 10h. + Enabling it now for all families from 10h. + + Signed-off-by: Robert Richter + + * src/libpfm-3.y/lib/amd64_events_fam15h.h: There is no kernel + support for AMD family 15h northbridge events, disabling them in + libpfm3 to not report them as available native events. + + Patch from Robert Richter + + * src/: configure, configure.in, linux-common.c: Add some extra + debug messages for better tracking of the --with-assumed-kernel + configure option. + +2011-04-25 + + * src/: configure, configure.in, linux-common.c: Add a new + configure option: --with-assumed-kernel= This allows you + to specify a kernel revision to (instead of being autodetected + with uname) for perf_event workaround purposes. With this you + can force PAPI to not use workarounds on kernels with + backported versions of perf_event features. + +2011-04-19 + + * src/: Makefile.inc, configure, configure.in, papi_debug.h, + papi_internal.h, sys_perf_event_open.c: Add debugging to + sys_perf_event_open.c to show exactly what values are being + passed to the perf_event_open syscall. + +2011-04-18 + + * src/: run_tests.sh, ctests/attach2.c, ctests/attach3.c: Fix for + finding attach_target with execlp to search the path. + +2011-04-14 + + * src/: Rules.pfm, configure, configure.in, linux-ia64-pfm.h, + linux-ia64.c, linux-ia64.h, perfmon-ia64-pfm.h, perfmon-ia64.c, + perfmon-ia64.h, perfmon.h: Rename the linux-ia64-* files to be + called perfmon-ia64-* + + This is a more descriptive name, and makes it more obvious what + the files are for. + + * src/libpfm-3.y/: include/perfmon/pfmlib_amd64.h, + lib/pfmlib_amd64.c, lib/pfmlib_amd64_priv.h: Patch to have + libpfm3 use 6 counters on Interlagos. + + Patch provided by Robert Richter + + * src/linux-memory.c: Fix the POWER cache detection routines to + work properly on POWER7. + + Patch provided by Corey Ashford + + * src/: configure, configure.in: Have configure check for ifort if + gfortran, etc, not found. + + Patch by Gary Mohr + + * src/ctests/johnmay2.c: Update the validation message on the + ctests/johnmay2.c test to be less confusing. Also add some + comments to the source code. + + Problem reported by Steve Kaufmann. + +2011-04-13 + + * src/ctests/: multiattach2, multiattach2.c: Remove the + accidentally added ctests/multiattach2 and add instead the proper + ctests/multiattach2.c + + * src/Makefile.inc: components_config.h is cleaned out with make + clobber, not make clean this should fix the build bot issues. + + * src/ctests/: Makefile, attach3.c, multiattach.c, multiattach2, + zero_attach.c: Minor typos in comments. Discovered another bug in + attach code demonstrated by multiattach2. You cannot have an + eventset running that is self counting as well as one that is + attached. PAPI thinks that both are running and throws an error. + + * src/perf_events.c: We must update the control state after + attaching for perf_events, zero_attach now passes + + * src/ctests/: Makefile, attach2.c, attach3.c, do_loops.c: This + commit adds testing of attaching to fork/exec'd executables. + zero_attach and multiattach just test forks. This also modifies + do_loops.c to be able to generate a test driver when + -DDUMMY_DRIVER is defined so we can use it to generate flops as a + sub process. + + Attach2 and attach3 have one important difference. + + Attach3 does a 'assign component' before attaching and then + adding events. Attach2 does not assign a component and thus + should inherit the default component. + + The current bug in PAPI is that: * The default component is not + assigned until you add an event. * However, attaching an + eventset without events is perfectly valid, but we get an error. + + Possible solution is that the default component should be + assigned at create time. + +2011-04-12 + + * src/ctests/multiattach.c: Make sure the two processes compute + different numbers of flops to test attach + +2011-04-05 + + * src/power7_events.h: Turns out Maynard Johnson answered my + questions about the native_name enum back in December. ( this is + a correct version of the events file ) + + As I found out, the AIX substrates do not use the native_name + enum. But a hypothetical perfctr build would. + +2011-04-04 + + * src/Makefile.inc: Clear out the components_config.h file on make + clobber + + * src/: aix.c, power7_events.h: Initial support for power7 aix, the + events file is a copy of power6_events.h with the number of + groups changed. The native_name enum is unchanged, but unused? + +2011-04-01 + + * src/configure.in: Commited wrong configure.in + + * src/: configure, configure.in: Clean up setting bitmode flags for + non-gcc (xlc in this case) compilers. + + * src/papi_events.csv: Change the Nehalem PAPI_FP_OPS event from + FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION+FP_COMP_OPS_EXE:DOUBLE_PRECISION + to FP_COMP_OPS_EXE:SSE+FP_COMP_OPS_EXE:X87 + + The new event gives the same results as the previous one, with + the added benefit of also counting 32-bit compiled x87 fp ops + properly. + + More detailed analysis can be found here: + http://web.eecs.utk.edu/~vweaver1/projects/nehalem-fp_ops/ + +2011-03-28 + + * src/utils/multiplex_cost.c: Turns out that getopt_long isn't as + standard as I had hoped. + + Convert multiplex_cost to use only getopt. -s disables software + multiplexing -k disables kernel multiplexing + +2011-03-25 + + * src/: configure, utils/Makefile, utils/multiplex_cost.c, + configure.in: Multiplex_cost utility. + + * src/utils/: Makefile, cost.c, cost_utils.c, cost_utils.h: Split + off the statistics functions from cost. + +2011-03-22 + + * src/: run_tests_exclude_cuda.txt, run_tests.sh: Exclude some + fork/thread tests from fulltest that won't run with CUDA (reason: + cannot invoke same GPU from different threads) + +2011-03-21 + + * src/utils/cost.c: Add a test for DERIVED_[ADD | SUB ] events to + papi_cost. + +2011-03-18 + + * src/components/cuda/linux-cuda.c: all_native_events ctest failed + when CUDA Component is used. Reason: removing cuda events from + the eventset is currently not supported. According to the NVIDIA + folks this is a bug in cuda 4.0rc and will be fixed in rc2. Note + also, several fork and thread tests fail since it's illegal to + invoke the same GPU device from different processes / threads. We + need a mechanism that allows us to run tests for the CPU + component only. + +2011-03-15 + + * src/utils/cost.c: Add a test case to cost util, look for a + derived-postfix event and if found, give timing information for + read calls to it. + + This is just a first run at the test, Core2 and AMD have + candidate events and the test runs, but that is the extent of my + testing so far. + +2011-03-11 + + * src/components/: README, cuda/Makefile.cuda.in, cuda/Rules.cuda, + cuda/configure, cuda/configure.in, cuda/linux-cuda.c, + cuda/linux-cuda.h: Added CUDA component, a hardware performance + counter measurement technology for the NVIDIA CUDA platform which + provides access to the hardware counters inside the GPU. PAPI + CUDA is based on CUPTI support - shipped with CUDA 4.0rc - in the + NVIDIA driver library. In any environment where the CUPTI-enabled + driver is installed, the PAPI CUDA component can provide detailed + performance counter information regarding the execution of GPU + kernels. + + * src/components/: coretemp/linux-coretemp.c, + lustre/linux-lustre.c: Add some missing includes to components. + + Thanks to Will Cohen for reminding us warnings matter. :) + + * src/: configure, configure.in, perf_events.c: The SYNC_READ + workaround in perf_events.c was being handled at compile time, + rather than at run-time like all of our other workarounds. + + Change it to be like our other kernel-version related + workarounds. + +2011-03-09 + + * src/ctests/multiplex1_pthreads.c: Between 4.0.0 and 4.1.0 a + pthread_exit() call was added to ctest/multiplex1_pthreads.c that + caused the test to exit partway through the test and without + doing a proper PASS/FAIL result. + + This changeset backs out that change, though the original change + was marked as a memory leak fix so a different fix may be needed. + + Reported by Steve Kaufmann + + * src/linux-timer.c: Add missing header needed by + --with-virtualtimer=times build. + + Reported by Steve Kaufmann + +2011-03-01 + + * src/: papi_pfm_events.c, perf_events.c: Fix broken Linux/PPC + build caused by my pfm_events code movement changes. + +2011-02-25 + + * src/: papi_pfm_events.c, papi_pfm_events.h, perfctr-x86.h: My + changes yesterday broke the perfctr build. This should fix it. + +2011-02-24 + + * src/ctests/inherit.c: Make the inherit test respect TESTS_QUIET + so that it does not print extra output during a run_tests.sh run + + * src/ctests/overflow.c: Fix missing newline in the overflow + output. + + Reported by Gary Mohr + + * src/: papi_pfm_events.c, papi_pfm_events.h, perf_events.c: Move + the libpfm3 specific functions from perf_events.c into + papi_pfm_events.c + + * src/perf_events.c: Separate the libpfm3-specific code from + _papi_pe_init_substrate() and _papi_pe_update_control_state() + into their own functions. This will allow eventual code sharing + and also make the libpfm4 merge easier. + + * src/perf_events.c: Some minor cleanups I found after reviewing + the inherit merge. + Add missing "static inline" to the new + kernel-version codes + Remove duplicated test for Pentium 4 + + Fix a warning only seen if --with-debug is enabled + + * src/: papi.c, papi.h, papi_internal.h, perf_events.c, + perf_events.h, ctests/Makefile, ctests/inherit.c, + ctests/test_utils.c: Merging Gary Mohr's re-implementation of + inherit into the code base. Thanks, Gary! + +2011-02-23 + + * src/: any-null.h, freebsd.h, linux-bgp.h, linux-common.c, + linux-common.h, linux-context.h, linux-ia64.c, linux-ia64.h, + linux-lock.h, linux-memory.c, linux-ppc64.h, linux-timer.c, + papi_internal.h, papi_pfm_events.c, perf_events.c, perf_events.h, + perfctr-x86.h, perfctr.c, perfmon.h, solaris-niagara2.h, + solaris-ultra.h, solaris.h, x86_cache_info.c: Move some more + duplicated OS common code (in this case the locking code and the + context accessing code) out of the various substrate include + files and into a common location. + +2011-02-22 + + * src/perf_events.c: Separate out the kernel-version dependent + checks and group them together near the beginning of the code. + This not only allows us to easily see which routines are + kernel-version dependent, but it makes it easier to disable the + checks one-by-one when debugging kernel-version related issues + like those found with the inherit patches. + +2011-02-21 + + * src/papi_internal.c: Extend _papi_hwi_cleanup_eventset to free + memory and better cleanup after us. + +2011-02-18 + + * src/papi.c: PAPI_assign_eventset_component changed; refuses to + reassign components. + +2011-02-17 + + * src/: papi_events.csv, libpfm-3.y/include/perfmon/pfmlib.h, + libpfm-3.y/lib/amd64_events.h, + libpfm-3.y/lib/amd64_events_fam10h.h, + libpfm-3.y/lib/amd64_events_fam15h.h, + libpfm-3.y/lib/pfmlib_amd64.c, + libpfm-3.y/lib/pfmlib_amd64_priv.h: Add support for AMD Family + 15h processors. Also adds suport for Family 10h RevE + + Patches provided by Robert Richter + + * src/utils/native_avail.c: Modify papi_native_avail to properly + handle event names with libpfm4-style "::" separators in them. + +2011-02-15 + + * src/Makefile.inc: make install-doxyman will build/install the + doxygen version of the manpages. + + Note that these pages are very rough right now, much work is + needed to get them to be a drop in replacement for the current + man pages. (mostly formatting related/use related issues, eg man + PAPI_start will not work yet; the content is there.) + + * doc/Makefile: Add install target for doxygen generated man pages. + +2011-02-11 + + * src/: perfctr-x86.c, perfctr.c: perfctr-2.6.42 introduced + PERFCTR_X86_INTEL_WSTMR PAPI added support for + PERFCTR_X86_INTEL_WSMR notice the missing T + + Fix PAPI to use the proper define. This should fix Westmere + support on perfctr kernels. + +2011-02-09 + + * src/: papi_protos.h, papi_vector.c, papi_vector.h, + papi_vector_redefine.h: Added function pointer destroy_eventset + to the PAPI vector table. Needed for the CUDA Component to + disable CUDA eventGroups, to destroy floating CUDA context, and + to free perfmon hardware on the GPU. (Note: the CUDA Component + cannot be released yet since we are still under NDA with NVIDIA. + Stay tuned.) + +2011-02-07 + + * src/x86_cache_info.c: The cpuid leaf2 code was printing a message + to stderr if leaf4 was needed (only happens on Westmere + currently). Change this to be a MEMDBG() debug message instead. + +2011-02-03 + + * src/: papi_events.csv, perfctr-x86.c: perfctr-x86 was reporting + "Core i7" instead of "Nehalem". i7 can mean Westmere or Sandy + Bridge too, so change the code to properly report Nehalem. + +2011-01-27 + + * src/ctests/all_native_events.c: + Fix this ctest. It failed when the package was built with several + components because the eventset was reused and failed to add + events that were not from the first component. + + In order to fix it, I recreate & destroy the eventset when the + current event does not belong to the previous component. + +2011-01-26 + + * src/: configure, configure.in, linux-timer.c, perfmon.c: Fix Cray + CLE build. + + * src/: configure, configure.in: Putting -Wall in cflags now + requires CC = gcc + + * src/: aix.c, freebsd.c, linux-bgp.c, linux-common.c, + linux-memory.c, linux-memory.h, papi.c, papi_protos.h, + papi_vector.c, papi_vector.h, solaris-niagara2.c, + solaris-ultra.c, windows-common.c, windows-memory.c: Change the + paramaters passed to update_shlib_info() to match better with + those passed to get_system_info(). This only affects the + substrates, outside users of PAPI will not notice this change. + +2011-01-25 + + * src/: configure, configure.in: Make sure that aix gets -g. + + * src/: configure, configure.in: Give everyone else -g when + configuring with debug. + + To wit, we pass gcc -g3 but neglected platforms where CC!=gcc. + + * src/aix.c: First run at supporting power7. NOTE: this code is + only good for getting event listings eg papi_native_avail, + + passing PM_GET_GROUPS causes our code to segfault later on, a + buffer overflow I'm still tracking down. + + * src/perfctr-x86.c: Accidentally converted a function to _perfctr_ + that should have stayed _linux_. + + * src/: perfctr-x86.c, perfctr.c: Rename the various perfctr + functions to be _perfctr_ rather than _linux_. This way _linux_ + is reserved for the common functions used by all. + + * src/: linux-common.c, linux-memory.c, linux-timer.c, + perf_events.c, windows-common.c, windows-memory.c, + windows-timer.c: Split the WIN32 specific code out from the new + linux common code. + + In most cases very little code was shared (it tended to be a big + #ifdef block) and it is confusing to have windows-specific code + in files named linux-* + +2011-01-24 + + * src/linux-timer.c: Fix a compile error that only shows up on PPC. + + * src/linux-timer.c: Fix compile warning if mmtimer is enabled. + + * src/perfctr-x86.c: Missing comma in the perfctr code. + + * src/: Makefile.inc, aix.c, configure, configure.in, + hwinfo_linux.c, linux-bgp.c, linux-common.c, linux-common.h, + linux-ia64.c, linux-timer.c, linux-timer.h, papi_vector.h, + perf_events.c, perfctr-x86.c, perfctr.c, perfmon.c, + solaris-niagara2.c, solaris-ultra.c: One last batch of + consolidation changes. + + This one moves get_system_info and get_cpu_info into + linux-common.c, plus moves some other routines from perf_events.c + there that are shared by the future libpfm4 version. + + Some non-linux substrates are touched here; these are just short + fixes to make sure the get_system_info() function pointed to by + the papi_vector has the same format on all substrates. + + * src/: Makefile.inc, configure, configure.in, linux-memory.c, + linux-memory.h, perf_events.c, perfctr-x86.c, perfctr.c, + perfmon.c: Move the various Linux update_shlib_info() functions + into a common place. + + * src/: Makefile.inc, linux-timer.c, linux-timer.h, perf_events.c, + perfctr-x86.c, perfctr.c, perfmon.c: Move the various + timer-related functions to linux-timer.c This gets rid of the + duplicated code spread throughout the substrates. + +2011-01-21 + + * delete_before_release.sh, release_procedure.txt: Updated the + release docs with what I learned when making the 4.1.2.1 release. + + * src/: configure, configure.in, freebsd-memory.c, + linux-ia64-memory.c, linux-memory.c, linux-memory.h, + linux-mx-memory.c, linux-ppc64-memory.c, perf_events.c, + perfctr-x86.c, perfmon-memory.c, perfmon.c: Currently there are + at least 3 identical copies of the linux memory detection code + spread throughout the PAPI source code. + + This change puts them all in linux-memory.c, and then has all the + individual substrates use the common code. diff --git a/ChangeLogP414.txt b/ChangeLogP414.txt new file mode 100644 index 0000000..a2507c7 --- /dev/null +++ b/ChangeLogP414.txt @@ -0,0 +1,881 @@ +2011-08-29 + + * src/configure: Rebuild from configure.in with version number bump + to 4.1.4 in advance of pending internal vendor release for Cray. + +2011-08-26 + + * release_procedure.txt: Update rel procedure to mention building + the man pages before a release. + + * man/: man1/avail.c.1, man1/clockres.c.1, man1/command_flags_t.1, + man1/command_line.c.1, man1/component.c.1, man1/cost.c.1, + man1/decode.c.1, man1/error_codes.c.1, man1/event_chooser.c.1, + man1/mem_info.c.1, man1/native_avail.c.1, man1/options_t.1, + man1/papi_avail.1, man1/papi_clockres.1, + man1/papi_command_line.1, man1/papi_component_avail.1, + man1/papi_cost.1, man1/papi_decode.1, man1/papi_error_codes.1, + man1/papi_event_chooser.1, man1/papi_mem_info.1, + man1/papi_multiplex_cost.1, man1/papi_native_avail.1, man3/CDI.3, + man3/HighLevelInfo.3, man3/PAPIF.3, man3/PAPIF_accum.3, + man3/PAPIF_add_event.3, man3/PAPIF_add_events.3, + man3/PAPIF_assign_eventset_component.3, + man3/PAPIF_cleanup_eventset.3, man3/PAPIF_create_eventset.3, + man3/PAPIF_destroy_eventset.3, man3/PAPIF_get_dmem_info.3, + man3/PAPIF_get_exe_info.3, man3/PAPIF_get_hardware_info.3, + man3/PAPIF_num_hwctrs.3, man3/PAPI_accum.3, + man3/PAPI_accum_counters.3, man3/PAPI_add_event.3, + man3/PAPI_add_events.3, man3/PAPI_addr_range_option_t.3, + man3/PAPI_address_map_t.3, man3/PAPI_all_thr_spec_t.3, + man3/PAPI_assign_eventset_component.3, man3/PAPI_attach.3, + man3/PAPI_attach_option_t.3, man3/PAPI_cleanup_eventset.3, + man3/PAPI_component_info_t.3, man3/PAPI_cpu_option_t.3, + man3/PAPI_create_eventset.3, man3/PAPI_debug_option_t.3, + man3/PAPI_descr_error.3, man3/PAPI_destroy_eventset.3, + man3/PAPI_detach.3, man3/PAPI_dmem_info_t.3, + man3/PAPI_domain_option_t.3, man3/PAPI_enum_event.3, + man3/PAPI_event_code_to_name.3, man3/PAPI_event_info_t.3, + man3/PAPI_event_name_to_code.3, man3/PAPI_exe_info_t.3, + man3/PAPI_flips.3, man3/PAPI_flops.3, man3/PAPI_get_cmp_opt.3, + man3/PAPI_get_component_info.3, man3/PAPI_get_dmem_info.3, + man3/PAPI_get_event_info.3, man3/PAPI_get_executable_info.3, + man3/PAPI_get_hardware_info.3, man3/PAPI_get_multiplex.3, + man3/PAPI_get_opt.3, man3/PAPI_get_overflow_event_index.3, + man3/PAPI_get_real_cyc.3, man3/PAPI_get_real_nsec.3, + man3/PAPI_get_real_usec.3, man3/PAPI_get_shared_lib_info.3, + man3/PAPI_get_thr_specific.3, man3/PAPI_get_virt_cyc.3, + man3/PAPI_get_virt_nsec.3, man3/PAPI_get_virt_usec.3, + man3/PAPI_granularity_option_t.3, man3/PAPI_hw_info_t.3, + man3/PAPI_inherit_option_t.3, man3/PAPI_ipc.3, + man3/PAPI_is_initialized.3, man3/PAPI_itimer_option_t.3, + man3/PAPI_library_init.3, man3/PAPI_list_events.3, + man3/PAPI_list_threads.3, man3/PAPI_lock.3, + man3/PAPI_mh_cache_info_t.3, man3/PAPI_mh_info_t.3, + man3/PAPI_mh_level_t.3, man3/PAPI_mh_tlb_info_t.3, + man3/PAPI_mpx_info_t.3, man3/PAPI_multiplex_init.3, + man3/PAPI_multiplex_option_t.3, man3/PAPI_num_cmp_hwctrs.3, + man3/PAPI_num_components.3, man3/PAPI_num_counters.3, + man3/PAPI_num_events.3, man3/PAPI_num_hwctrs.3, + man3/PAPI_option_t.3, man3/PAPI_overflow.3, man3/PAPI_perror.3, + man3/PAPI_preload_info_t.3, man3/PAPI_profil.3, + man3/PAPI_query_event.3, man3/PAPI_read.3, + man3/PAPI_read_counters.3, man3/PAPI_read_ts.3, + man3/PAPI_register_thread.3, man3/PAPI_remove_event.3, + man3/PAPI_remove_events.3, man3/PAPI_reset.3, + man3/PAPI_set_cmp_domain.3, man3/PAPI_set_cmp_granularity.3, + man3/PAPI_set_debug.3, man3/PAPI_set_domain.3, + man3/PAPI_set_granularity.3, man3/PAPI_set_multiplex.3, + man3/PAPI_set_opt.3, man3/PAPI_set_thr_specific.3, + man3/PAPI_shlib_info_t.3, man3/PAPI_shutdown.3, + man3/PAPI_sprofil.3, man3/PAPI_sprofil_t.3, man3/PAPI_start.3, + man3/PAPI_start_counters.3, man3/PAPI_state.3, man3/PAPI_stop.3, + man3/PAPI_stop_counters.3, man3/PAPI_strerror.3, + man3/PAPI_thread_id.3, man3/PAPI_thread_init.3, + man3/PAPI_unlock.3, man3/PAPI_unregister_thread.3, + man3/PAPI_write.3, man3/high_api.3, man3/low_api.3, + man3/papi_data_structures.3, man3/papi_vector_t.3, + man3/ret_codes.3: Switch over to doxygen generated man pages. + + * man/: man1/papi_avail.1, man1/papi_clockres.1, + man1/papi_command_line.1, man1/papi_cost.1, man1/papi_decode.1, + man1/papi_event_chooser.1, man1/papi_mem_info.1, + man1/papi_native_avail.1, man3/PAPI.3, man3/PAPIF.3, + man3/PAPIF_get_clockrate.3, man3/PAPIF_get_domain.3, + man3/PAPIF_get_exe_info.3, man3/PAPIF_get_granularity.3, + man3/PAPIF_get_preload.3, man3/PAPIF_set_event_domain.3, + man3/PAPI_accum.3, man3/PAPI_accum_counters.3, + man3/PAPI_add_event.3, man3/PAPI_add_events.3, + man3/PAPI_assign_eventset_component.3, man3/PAPI_attach.3, + man3/PAPI_cleanup_eventset.3, man3/PAPI_create_eventset.3, + man3/PAPI_destroy_eventset.3, man3/PAPI_detach.3, + man3/PAPI_encode_events.3, man3/PAPI_enum_event.3, + man3/PAPI_event_code_to_name.3, man3/PAPI_event_name_to_code.3, + man3/PAPI_flips.3, man3/PAPI_flops.3, man3/PAPI_get_cmp_opt.3, + man3/PAPI_get_component_info.3, man3/PAPI_get_dmem_info.3, + man3/PAPI_get_event_info.3, man3/PAPI_get_executable_info.3, + man3/PAPI_get_hardware_info.3, man3/PAPI_get_multiplex.3, + man3/PAPI_get_opt.3, man3/PAPI_get_overflow_event_index.3, + man3/PAPI_get_real_cyc.3, man3/PAPI_get_real_usec.3, + man3/PAPI_get_shared_lib_info.3, man3/PAPI_get_substrate_info.3, + man3/PAPI_get_thr_specific.3, man3/PAPI_get_virt_cyc.3, + man3/PAPI_get_virt_usec.3, man3/PAPI_help.3, man3/PAPI_ipc.3, + man3/PAPI_is_initialized.3, man3/PAPI_library_init.3, + man3/PAPI_list_events.3, man3/PAPI_list_threads.3, + man3/PAPI_lock.3, man3/PAPI_multiplex_init.3, man3/PAPI_native.3, + man3/PAPI_num_cmp_hwctrs.3, man3/PAPI_num_components.3, + man3/PAPI_num_counters.3, man3/PAPI_num_events.3, + man3/PAPI_num_hwctrs.3, man3/PAPI_overflow.3, man3/PAPI_perror.3, + man3/PAPI_presets.3, man3/PAPI_profil.3, man3/PAPI_query_event.3, + man3/PAPI_read.3, man3/PAPI_read_counters.3, + man3/PAPI_register_thread.3, man3/PAPI_remove_event.3, + man3/PAPI_remove_events.3, man3/PAPI_reset.3, + man3/PAPI_set_cmp_domain.3, man3/PAPI_set_cmp_granularity.3, + man3/PAPI_set_debug.3, man3/PAPI_set_domain.3, + man3/PAPI_set_event_info.3, man3/PAPI_set_granularity.3, + man3/PAPI_set_multiplex.3, man3/PAPI_set_opt.3, + man3/PAPI_set_thr_specific.3, man3/PAPI_shutdown.3, + man3/PAPI_sprofil.3, man3/PAPI_start.3, + man3/PAPI_start_counters.3, man3/PAPI_state.3, man3/PAPI_stop.3, + man3/PAPI_stop_counters.3, man3/PAPI_strerror.3, + man3/PAPI_thread_id.3, man3/PAPI_thread_init.3, + man3/PAPI_unlock.3, man3/PAPI_unregister_thread.3, + man3/PAPI_write.3: Remove the old manpages in preperation for + defaulting to doxygen generated ones. + +2011-08-25 + + * src/: perf_events.c, ctests/overflow_allcounters.c, + ctests/papi_test.h, ctests/test_utils.c: Block all PERF_COUNT_SW + events from overflow_allcounters test, as overflow on software + counter can crash perf_event kernels pre 3.1 + + * src/libpfm4/: Makefile, config.mk, lib/Makefile, + lib/pfmlib_common.c, lib/pfmlib_perf_event.c, lib/pfmlib_priv.h, + perf_examples/perf_util.c, perf_examples/task_smpl.c: Fix the + "conflicts" from the import + + * papi.spec, doc/Doxyfile, doc/Doxyfile-everything, + src/Makefile.in, src/configure.in, src/papi.h: Bump version + number to 4.1.4 in advance of pending internal vendor release for + Cray. + +2011-08-23 + + * src/: papi.c, papi_hl.c: Removed all references to Fortran APIs. + These are now all in papi_fwrappers.c Also normalized syntax for + many doxygen headers. + + * src/papi_fwrappers.c: Added doxygen skeleton for all remaining + Fortran functions in this file. Also added wrappers for four + additional APIs: PAPI_get_real_nsec PAPI_read_ts PAPI_lock + PAPI_unlock + +2011-08-19 + + * src/: papi.c, papi_fwrappers.c: Stubbed out doxygen pages for + Fortran functions. About half way done! + + * src/papi_libpfm4_events.c: Finish up the documentation/cleanup + pass through the libpfm4 code. + +2011-08-18 + + * src/papi_libpfm3_events.c: Fix code so we no longer get warnings + that 'setup_preset_term' and '_pfm_get_counter_info' are defined + but not used + + * src/: papi_libpfm3_events.c, papi_libpfm4_events.c, + papi_libpfm_events.h, perf_events.c, perfctr-x86.c: Consolidate + use of _papi_libpfm_init() and pass in MY_VECTOR when necessary. + + * src/papi_libpfm4_events.c: Dynamically allocate the libpfm4 + native events, rather than having a fixed array allocated at init + time. + + * src/papi_libpfm4_events.c: Some more minor cleanups and + documentation in the libpfm4 code. + + * src/components/coretemp/linux-coretemp.c: Fixup for linux + coretemp component, it pays to check cvs status once in a + while... + +2011-08-16 + + * src/papi.c: Update the PAPI_enum_event() Doxygen comments to + reflect modern values for the "modifier" parameter. + + * src/papi_libpfm4_events.c: Clean up code and add documentation + for all the functions involved in libpfm4's + _papi_libpfm_ntv_enum_events() function. + +2011-08-15 + + * src/mb.h: Updat the rmb() barrier for ARM. + + * src/papi_events.csv: Update SandyBridge EP support to match that + of mainline libpfm4 + + * src/papi_libpfm4_events.c: Cleanup libpfm4 code, and add more + comments to code. + + * src/perf_events.c: Fix bug where umask support was disabled. + + * src/Rules.perfctr-pfm: Make the perfctr code use the merged + preset event code. + + * src/: Rules.pfm_pe, papi_libpfm3_events.c, papi_libpfm_presets.c: + Have libpfm3 use the merged preset code. + + * src/: Rules.pfm4_pe, papi_libpfm4_events.c, + papi_libpfm_presets.c: Move the libpfm presets code to its own + file, and modify the libpfm4 code to use it. + + * src/papi_libpfm3_events.c: Make the libpfm3 predefined events + parser identical to the libpfm4 one, in preparation for a merge. + + * src/: papi_libpfm3_events.c, papi_libpfm4_events.c, + papi_libpfm_events.h, perf_events.c: Move vendor fixups into the + substrate and out of the naming library code. + + * src/: Rules.perfctr-pfm, Rules.pfm4_pe, Rules.pfm_pe, + papi_libpfm3_events.c, papi_libpfm4_events.c, + papi_libpfm_events.h, papi_pfm4_events.c, papi_pfm_events.c, + papi_pfm_events.h, perf_events.c, perfctr-x86.c, perfmon.c: + Rename papi_pfm_events.c to papi_libpfm3_events.c to make it more + clear what is in the file. + + Also rename papi_pfm4_events.c to papi_libpfm4_events.c and + papi_pfm_events.h to papi_libpfm_events.h + + * src/perfmon.c: Fixup perfmon2 case for the libpfm renaming + + * src/perfctr-x86.c: Fix perfctr breakage from the libpfm rename. + + * src/: papi_pfm4_events.c, papi_pfm_events.c, papi_pfm_events.h, + perf_events.c, perfctr-x86.c, perfmon-ia64.c, perfmon.c: The PAPI + code uses _pfm_ in function names to mean *both* perfmon2 code + and libpfm3/4 code. This can cause a lot of confusion. + + Rename libpfm specific function names to use _libpfm_ instead. + + * src/: papi_pfm_events.c, papi_pfm_events.h, perf_events.c: Fix + build error on perfmon2 due to movement of the + _papi_pfm_shutdown() + +2011-08-05 + + * src/: Makefile.in, Makefile.inc, configure, configure.in, + components/Makefile_comp_tests, + components/cuda/tests/HelloWorld.cu, + components/cuda/tests/Makefile, + components/example/tests/HelloWorld.c, + components/example/tests/Makefile, components/README: Added + generic implementation that makes it possible to add tests to + components without modifying any PAPI-specific code (other than + adding the tests and a makefile to the component directory). All + component tests will be compiled together with PAPI when typing + 'make' (as well as cleaned up when 'make clean' or 'make clobber' + is typed). +++ Also added tests to 2 components, the example and + cuda component. + + * src/: papi_defines.h, papi_internal.h, papi_pfm4_events.c, + perf_events.c: Add locking to papi_pfm4_events so that + adding/looking up event names doesn't have a race condition when + multiple threads are doing it at once. + + Also fix the recently-added pfm_shutdown() to be called at + substrate_shutdown() rather than plain shutdown() as the latter + is called at thread_shutdown() time too. + + * src/: papi_pfm4_events.c, papi_pfm_events.c, papi_pfm_events.h, + perf_events.c: Add a _papi_pfm_shutdown() function and have it + clear out the native events array at PAPI_shutdown(). + + This makes sample code that exhibits the libpfm4 event race much + easier to write. + + * src/ctests/multiplex2.c: Added some PAPI_set_domain's inside of + #if 0's for testing. + +2011-08-03 + + * src/papi_pfm4_events.c: Use the new ARM vendor code to force the + proper default domain on ARM cpus. + + * src/: linux-common.c, papi.h: Add an ARM vendor string and have + it properly set. + + The hardware detection logic is a horrible mess of parsing + /proc/cpuinfo I took the easy way out and just tacked the ARM + logic on the end rather than trying to clean it up at all. + + * src/perf_events.c: Clean up some comments, add a few debug + messages. + +2011-08-02 + + * src/linux-memory.c: The ARM warning for memory hierarchy not + being implemented was in the wrong place. + + * src/: papi_pfm4_events.c, sys_perf_event_open.c: Fix some + misleading debug messages. + + * src/papi_events.csv: Update ARM Cortex A9 preset events, and add + ARM Cortex A8 events + +2011-07-28 + + * src/: cycle.h, linux-context.h, linux-lock.h, linux-memory.c, + linux-timer.c, mb.h: Add remaining changes needed for ARM + compilation. This is enough for "papi_avail" and + "papi_native_avail" to work. + + Lots of #warning statements scattered around. ARM is a + complicated architecture and things like memory barriers and + mutexes are very dependent on what version of the architecture + they are running on. It will take a while to figure out the + proper way to handle this in PAPI. + + Also, on Cortex-A8 and Cortex-A9 there is no way to separate + kernel events from the user ones. So all measurements contain + both. This will probably confuse our ctests. + + * src/papi_events.csv: Add ARM Cortex A9 preset events to the CSV + file. + + * src/sys_perf_event_open.c: Add the perf_event syscall number for + ARM + + * src/papi_fwrappers.c: Create PAPIF group in doxygen, for the papi + fortran interface. + +2011-07-27 + + * src/x86_cache_info.c: My changes yesterday broke on the + --with-debug case, as noticed by buildbot. + +2011-07-26 + + * src/: papi.c, papi_fwrappers.c: Implement doxygen comments for + PAPI_get_opt; Implement doxygen comments for PAPIF_accum in + papi_fwrappers.c. This is a first step in providing separate + independent Fortran documentation. + + * doc/Doxyfile: Have doxygen parse papi_fwrappers.c for comments. + + * src/papi_pfm4_events.c: The last checkin broke papi_native_avail + on libpfm4. Fix it. + + * src/papi_pfm4_events.c: Cleanup some code in papi_pfm4_events.c + to avoid gcc-4.6 warnings + + * src/x86_cache_info.c: Fix some warnings in src/x86_cache_info.c + reported by gcc-4.6 + +2011-07-21 + + * src/ctests/all_native_events.c: Change all_native_events test to + create an eventset for each native event it finds. Also becomes + a good test of the number of outstanding eventsets allowed. + +2011-07-19 + + * src/papi.c: Doxygen rewrite for PAPI_set_opt. + +2011-07-13 + + * src/: papi_events.csv, libpfm4/lib/events/intel_snb_events.h: A + few more commits that get SandyBridge mostly working. + + * src/papi.h: Include a comment to the prototype for PAPI_read_ts. + This is apparently a requirement to get doxygen to link from the + prototype to the doc block for the function (a link shows up in + the low_api group now). + +2011-07-12 + + * src/libpfm4/lib/events/intel_snb_events.h: Temporarily add + missing SandyBridge FP events until support gets merged upstream. + + * src/papi.c: Some minor Doxygen fixes. This was my run through + the HTML output produced by my assigned functions. + +2011-07-11 + + * src/libpfm4/lib/pfmlib_intel_snb.c: Temporarily add model 45 + Sandy Bridge to our copy of libpfm4 until we can get this merged + upstream. + + * src/ctests/: multiattach.c, multiattach2.c, reset.c, val_omp.c, + zero_attach.c, zero_fork.c, zero_omp.c, zero_pthreads.c, + zero_smp.c: Fix all the remaining users of the ctests + add_two_events() helper + + * src/ctests/first.c: Fix first test bug due to add_two_events() + change. Clean up validation of results. + + * src/ctests/zero.c: Some cleanups I made to the testing routine + add_two_events() a while ago broke the zero test. (the cycles + result was swapped with the other counter result). + + This fixes this, plus adds a validation check to try to avoid + this happening in the future. + + * src/: configure, configure.in: Patch from William Cohen that sets + LD_LIBRARY_PATH and LIBPATH to include libpfm4/lib. + + A better fix would probably be to include only the libpfm library + we are currently configured for. I need to do more testing of + the --with-static-lib=no --with-shared-lib=yes --with-shlib + options + + * src/papi_hl.c: High level interface Doxygen comments updated to + include interface overview + +2011-07-08 + + * doc/Doxyfile, src/papi.h, src/papi_hl.c, src/papi_vector.h: Add + in the PAPI component development page. Currently not linked to + by anything yet, but can be found at file://$(html_dir)/CDI or + http://web.eecs.utk.edu/~ralph/html/CDI for an already built + page. + +2011-07-07 + + * src/: papi.c, papi.h: Add doxygen comments for + PAPI_get_executable_info(), PAPI_exe_info_t and + PAPI_address_map_t + + * src/papi.c: Add doxygen comments for PAPI_event_code_to_name() + and PAPI_event_name_to_code() + + * src/papi.c: Add doxygen comments for PAPI_enum_event() + + * src/papi.c: Add doxygen comments for PAPI_create_eventset() + + * src/papi.c: Add doxygen comments for PAPI_cleanup_eventset() and + PAPI_destroy_eventset() + + * src/papi.c: Add doxygen comments for PAPI_attach() and + PAPI_detach() + + * src/papi.c: Add doxygen comments for + PAPI_assign_eventset_component() + +2011-07-05 + + * src/components/cuda/linux-cuda.c: missing parentheses added in + CUDA_Shutdown() which caused a seg fault. + +2011-07-01 + + * src/papi.c: Add doxygen comments for PAPI_add_event() + + * src/papi.c: Add doxygen comments for PAPI_add_events() +++ + Updated PAPI_accum() + + * src/papi.c: Add doxygen comments for PAPI_accum() + + * src/ctests/: data_range.c, earprofile.c: Some more ia64 ctests + fixes + + * src/papi.c: Add doxygen comments for PAPI_register_thread() + + * src/papi.c: Add doxygen comments for: PAPI_read() + PAPI_read_ts() + + * src/ctests/earprofile.c: Another attempt at fixing earprofile on + ia64. + + * src/ctests/earprofile.c: PAPI for ia64 compiles now, and now it's + some of the ia64-specific ctests that are broken. + + There was a missing #include "papi.h" in earprofile + +2011-06-30 + + * src/papi.c: Doxygen for: PAPI_set_multiplex PAPI_shutdown + PAPI_sprofil_t PAPI_start (int EventSet) PAPI_state (int + EventSet, int *status) PAPI_stop (int EventSet, long long + *values) PAPI_strerror (int) + + * src/: linux-timer.c, perfmon-ia64-pfm.h, perfmon-ia64.c: more + ia64 fixes + + * src/papi.c: doxygen comments for: PAPI_query_event() + + * src/: linux-timer.c, linux-timer.h, papi_vector.c, papi_vector.h: + Some more ia64 fixes. + + * src/papi.c: add doxygen comments for PAPI_profil() + + * src/: linux-timer.c, linux-timer.h, perfmon-ia64.c: More ia64 + fixes. Getting closer. + + * src/: linux-context.h, perfmon-ia64.c, perfmon-ia64.h: One more + try at fixing ia64. + + The trick to cross compiling is ./configure --with-CPU=itanium2 + --with-arch=ia64 --with-perfmon=2.0 --with-tls=no make + __ia64__=1 + + and you still have to fiddle with some __ia64__ ifdefs scattered + in the code + +2011-06-29 + + * src/papi.c: Add doxygen comments for: * PAPI_num_events() * + PAPI_overflow() * PAPI_perror() + + * src/papi.c: Doxygen for PAPI_set_domain and PAPI + _set_granularity. Unfortunately, this seems to have raised more + issues about Fortran support... + + * src/papi.c: Add doxygen comments to * PAPI_list_threads() * + PAPI_lock() * PAPI_multiplex_init() * PAPI_num_hwctrs() * + PAPI_num_cmp_hwctrs() + + * src/papi.c: Doxygen for PAPI_set_debug and minor tweaks to other + function documentation. + +2011-06-28 + + * src/: linux-common.h, linux-timer.c, papi_pfm_events.c, + perfmon-ia64-pfm.h: some more itanium fixes. This won't be + enough to fix things but it is a start. + + * src/papi.c: Check in Kiran's doxygen work. This time hopefully + not clobbering anyone. + + * src/: linux-context.h, linux-timer.c, perfmon-ia64.h: Attempt to + fix the build for itanium systems. + + * src/papi.c: Fix comments embedded in doygen source to be C++ + single line format. + +2011-06-27 + + * src/papi.c: Commit documentation changes for PAPI_reset, + PAPI_set_thr_specific, and PAPI_get_thr_specific. The last one + wasn't on my list, but it mirrored _set_ so I did it anyway. + + * src/papi.c: [no log message] + + * src/papi.c: Commit Kiren's updates to the code documentation. + +2011-06-24 + + * doc/Doxyfile: One got left behind... ( see previous commit about + redoing doxygen procedures ) + + * src/Makefile.inc, src/configure, src/configure.in, + doc/Doxyfile.html, doc/Doxyfile.utils, + doc/Doxyfile.utils-everything, doc/Makefile, + doc/doxygen_procedure.txt: Update install process for man-pages, + install from pre-built pages living in $(PAPI_DIR)/man and update + $(PAPI_DIR)/doc to generate doxygen pages and copy them to + $(PAPI_DIR)/man. + + This removes doxygen from the install process. And when removes + the web of doxygen configurationf files, going back to just two, + lite and kitchen-sink. + + * src/papi.c: Updates to doxygen stuff for PAPI_remove_event{s} + + * src/: linux-bgp.c, perfmon-ia64.c, perfmon.c, solaris-niagara2.c, + solaris-ultra.c: When I made the multiattach change I forgot to + update _papi_hwi_lookup_thread calls on all architectures. This + should get the ones I missed. + +2011-06-23 + + * src/papi_pfm4_events.c: For libpfm4 we were setting available + counters to the number of generic counters. + + This was less than libpfm3, so update the code to set the number + of counters to be equal to generic+fixed. + + In theory whether an event can be added is determined at add + time, so the extra check for number of counters is unnecessarily + getting in the way. This should be fixed but might require a + re-write of some PAPI internals. + +2011-06-22 + + * src/ctests/test_utils.c: One more fix to the byte_profile code + + * src/ctests/byte_profile.c: Fix byte_profile ctest, as it was + breaking on libpfm4. + + * src/: extras.c, papi.c, perf_events.c, threads.c, threads.h, + ctests/multiattach.c, ctests/multiattach2.c: Add support for + handling multiattach properly. + + This adds a pid argument to the + _papi_hwi_lookup_or_create_thread() call. A pid of "0" falls + back to the old behavior of using the current tid/pid. + + If attaching to an outside pid/tid, a new thread object is + created to handle this. This seems like the right thing to do, + though there's enough complicated code in the threads code that I + haven't fully audited that this can't fail somehow in complicated + cases where lots of attaching/detaching is done in conjunction + with having a large multi-threaded program. + +2011-06-13 + + * src/papi_pfm4_events.c: Fix the libpfm4 enumerate code. + + It was possible for papi_native_avail to get stuck in an infinite + loop if two events had the same name on different PMUs and the + "default" PMU happened later in the enumeration. + + This was the case on SandyBridge at least. + + This should be fixed now. + + * src/ctests/test_utils.c: Make "test_fail()" actually fail. + + In the comments we say we don't exit to avoid leaking memory in + threads. + + That seems suspect. The threads should exit properly too. If + they don't, then we should fix the threading code and not make + our tests never exit on fail (which can make debugging a pain). + +2011-06-10 + + * src/: papi.c, papi_hl.c: Add example code to the high level + interface docs + + * src/papi_events.csv: Add initial Sandy Bridge event support. + + This is in no way nested, so be cautious if using. + + Sandy Bridge support is libpfm4 only, so you'll have to configure + with --with-libpfm4 + + * src/papi_hl.c: Added an example of how to embed example code in + PAPI_stop_counters documentation. + +2011-06-09 + + * src/Makefile.inc: Makefile fix for fortran wrapper files on + case-insensitive filesystems. During build, it renames the + preprocessed file PAPI_FWRAPPERS.c to upper_PAPI_FWRAPPERS.c + +2011-06-08 + + * src/: configure, Makefile.inc, configure.in: Have configure check + that doxygen is installed, and have make install only attempt to + build the doxygen docs if we found doxygen. + +2011-06-07 + + * src/: run_tests_exclude_cuda.txt, components/cuda/linux-cuda.c: + ctests/thrspecific works now too with the CUDA component + + * src/components/cuda/linux-cuda.c: clean up and indent + + * src/components/cuda/: linux-cuda.c, linux-cuda.h: Added + CudaRemoveEvent functionality (was broken in earlier CUDA RC + versions). ctests/all_native_events works now (at least for the + default CUDA device). +++ Minor exit/return mods in CUDA + component + + * doc/Doxyfile, doc/Doxyfile.html, doc/Doxyfile.utils, + doc/Doxyfile.utils-everything, doc/Makefile, src/Makefile.inc, + src/papi.c, src/papi.h, src/papi_hl.c: Rework doxygen to better + generate manpages from code comments. + +2011-06-03 + + * release_procedure.txt: Incorporate a note about using 2.59 + autoconf to build configure. + +2011-06-02 + + * src/utils/error_codes.c: Tweak the doxygen title text. + +2011-06-01 + + * src/: configure, configure.in: Modified configure.in to look for + a 2.59 autoconf prerequisite. Rebuilt configure with 2.59. We'll + try this out on buildbot. + +2011-05-31 + + * src/: run_tests_exclude_cuda.txt, components/cuda/linux-cuda.c, + components/cuda/linux-cuda.h: 2 things: (1) Bug in CUDA v4.0 + fixed. It caused a threaded application to hang when parent + called cuInit() before fork() and child called also cuInit(). + All fork ctests pass now if papi is configured with cuda + component. (2) If running a threaded application, we need to make + sure that a thread doesn't free the same memory location(s) more + than once. Now all pthread ctests pass, too (again, if papi is + configured with cuda component). + +2011-05-27 + + * src/perf_events.c: It turns out our FORMAT_ID workaround + detection code was identical to FORMAT_GROUP (and not really + necessary) so merge the two. + +2011-05-26 + + * src/papi_pfm_events.h: One last try at the cray compile fix, this + time using a suggestion from Steve Kaufmann. + + * src/perf_events.c: Update some comments on the workarounds. + + I've been writing some validation tests for our various + workarounds. + + It turns out the "no multiplexing before 2.6.33" problem is + actually an artifact of the check_schedulability bug on x86 (and + its interaction with our event partitioning code) rather than a + distinct kernel bug. + + * src/Rules.pfm4_pe: Now fix libpfm4. I think they should all be + fixed now. Too many permutations. + + * src/: Rules.pfm_pe, papi_pfm_events.h: One last try at fixing the + perfmon2 build. + + * src/papi_pfm_events.h: Fix the perfmon2 build that broke with the + libpfm4 merge. The previous fix only fixed perfctr, not perfmon2 + + This should fix the build for cray machines. + +2011-05-24 + + * src/utils/component.c: Add doxygen comments to components.c + + * src/papi_events.csv: Fix the PAPI_TOT_INS instruction for Atom, + as well as update the floating point events. + + * src/perf_events.c: We were using some of the perf_event + functionality in an susupported way and this broke recently + when the perf_event interface was made more strict. + + You can't use the PERF_EVENT_IOC_REFRESH ioctl on a group leader + to start all sampling siblings... use PERF_EVENT_IOC_ENABLE + + Don't pass NULL or 0 as the argument to the + PERF_EVENT_IOC_REFRESH ioctl. + + These fixes seem to work and fix the Nehalem regressions. The + above changes were made to PAPI back in November to fix the I/O + possible error, so we should check to be sure that this doesn't + reintroduce the problem. + + We should also probably back-port this fix to 4.1.2 and 4.2 + stable + +2011-05-23 + + * src/: configure, configure.in, papi.c, papi.h, papi_data.h, + utils/Makefile, utils/error_codes.c: New utility to display PAPI + error codes and description strings. There was no API to access + error descriptions, so I created PAPI_descr_error( int error_code + ) too. I also updated the error table to provide strings for all + defined codes. + + * src/aix.c: Define aix's .cmp_info.itimer_ns value to a default. + The multiplexing tests are happy on power7 aix now. + + * src/: sys_perf_event_open.c, ctests/overflow.c: cleanup some + debug messages + + * src/ctests/: overflow.c, test_utils.c: The overflow test depends + on the exact ordering of the flags in the add_test_event() + code. So my previous changes broke the test. This commit + fixes the test case again. + + * src/ctests/: byte_profile.c, prof_utils.c, prof_utils.h, + profile.c, profile_twoevents.c, sprofile.c: ctests: remove the + "hw_info" field from the profile setup functions, as the + field isn't used. + + * src/: configure, configure.in, utils/Makefile, utils/component.c: + Introduce a component avail utility, lists the components we were + built with, optionally with native/preset counts and version + number. + + * src/components/example/example.c: Add number of 'native' events + to the component info structure in example component. + + * src/ctests/: byte_profile.c, papi_test.h, prof_utils.c, + prof_utils.h, profile.c, profile_twoevents.c, sprofile.c, + test_utils.c, zero_smp.c: Clean up the ctest profile event + section code some more. + + This fixes a build error on AIX that I introuced on Friday. + + * src/papi_events.csv: Initial PAPI Fam14h Bobcat support. + + Only works with libpfm4 version of PAPI. + + Passes most of the tests, but still need to verify as there are a + number of subtle differences in the native events. + +2011-05-20 + + * src/ctests/: byte_profile.c, mendes-alt.c, papi_test.h, + prof_utils.c, test_utils.c: Fix byte_profile to work on Nehalem. + Still needs some more work to print the result properly. + + * src/ctests/: attach2.c, attach3.c, branches.c, byte_profile.c, + case1.c, case2.c, first.c, multiattach.c, multiattach2.c, + overflow.c, overflow3_pthreads.c, overflow_index.c, + overflow_one_and_read.c, overflow_pthreads.c, papi_test.h, + prof_utils.c, profile_pthreads.c, reset.c, sdsc.c, sprofile.c, + tenth.c, test_utils.c, zero.c, zero_attach.c, zero_fork.c, + zero_pthreads.c: Some cleanups to the ctests/test_utils.c code + + Remove the hw_info field from the add_two_events() and + add_two_nonderived_events() functions, as it wasn't used. + Make + the add_test_events() function loop through all the masks, + insteading having a hardcoded test for each possible mask + + * src/ctests/test_utils.c: buildbot didn't like the colored test + messages (despite the code having fancy checks for "isatty()"). + + So change the color thing to require an environment variable to + be set, TESTS_COLOR=y + +2011-05-19 + + * src/ctests/test_utils.c: Add color to the testsuite results if we + are running at a console. This makes is much easier to see + FAILED results. + + I can back this out if people don't like it, but it's made my + life a lot easier when running all the tests involved with the + libpfm4 merge. + + * src/: papi_pfm_events.c, papi_pfm_events.h: Fix the build with + perfctr introduced by libpfm4 changes. + + * src/configure.in: Documentation for the AIX heap fix. + + * src/: papi_pfm4_events.c, ctests/test_utils.c: power6 doesn't + work with libpfm4, as it reports num_cntrs=0 + + have PAPI print a better error in this case until we get a fix + upstream. + + * src/: configure, configure.in: On aix one has to ask really + nicely for a usable ammount of heap space. The omp tests should + run now. + + * src/: configure, configure.in, perf_events.c, + sys_perf_event_open.c: This is the last commit needed to get + libpfm4 support going. + + To build with libpfm4 support enabled, run configure like this: + ./configure --with-libpfm4 + + * src/: papi_pfm_events.c, papi_pfm_events.h, perf_events.c: Pass + the actual perf_attr structure around, rather than just a 64-bit + event value. This allows support for generalized events and + eventual offcore/uncore support. + + * src/: papi_pfm_events.c, perf_events.c, perf_events.h: Clean up + some debugging #ifdefs + + * src/papi_events.csv: The papi_events.csv file requires some + additions for libpfm4 to work + The CPU family names have + changed from libpfm3 to libpfm4 It should be backward + compatible to just add the libpfm4 ones in addition to the + libpfm3 ones + libpfm4 does not provide a helper to get the + instruction and cycle event names. So we have to add them + for all supported CPUs + + * src/: Rules.pfm4_pe, papi_pfm4_events.c: New files needed for + libpfm4 support + +2011-05-16 + + * release_procedure.txt: Add note to update from cvs before + tagging. Thanks, Will Cohen :) + diff --git a/ChangeLogP420.txt b/ChangeLogP420.txt new file mode 100644 index 0000000..b14bda3 --- /dev/null +++ b/ChangeLogP420.txt @@ -0,0 +1,1140 @@ +2011-10-25 + + * doc/: Makefile, doxygen_procedure.txt: Update doxygen_procedure + to note that we need a recent version of doxygen. + + * man/: man1/avail.c.1, man1/clockres.c.1, man1/command_flags_t.1, + man1/command_line.c.1, man1/component.c.1, man1/cost.c.1, + man1/decode.c.1, man1/error_codes.c.1, man1/event_chooser.c.1, + man1/mem_info.c.1, man1/native_avail.c.1, man1/options_t.1, + man1/papi_avail.1, man1/papi_clockres.1, + man1/papi_command_line.1, man1/papi_component_avail.1, + man1/papi_cost.1, man1/papi_decode.1, man1/papi_error_codes.1, + man1/papi_event_chooser.1, man1/papi_mem_info.1, + man1/papi_multiplex_cost.1, man1/papi_native_avail.1, man3/CDI.3, + man3/HighLevelInfo.3, man3/PAPIF.3, man3/PAPIF_accum.3, + man3/PAPIF_accum_counters.3, man3/PAPIF_add_event.3, + man3/PAPIF_add_events.3, man3/PAPIF_assign_eventset_component.3, + man3/PAPIF_cleanup_eventset.3, man3/PAPIF_create_eventset.3, + man3/PAPIF_destroy_eventset.3, man3/PAPIF_enum_event.3, + man3/PAPIF_event_code_to_name.3, man3/PAPIF_event_name_to_code.3, + man3/PAPIF_flips.3, man3/PAPIF_flops.3, + man3/PAPIF_get_clockrate.3, man3/PAPIF_get_dmem_info.3, + man3/PAPIF_get_domain.3, man3/PAPIF_get_event_info.3, + man3/PAPIF_get_exe_info.3, man3/PAPIF_get_granularity.3, + man3/PAPIF_get_hardware_info.3, man3/PAPIF_get_multiplex.3, + man3/PAPIF_get_preload.3, man3/PAPIF_get_real_cyc.3, + man3/PAPIF_get_real_nsec.3, man3/PAPIF_get_real_usec.3, + man3/PAPIF_get_virt_cyc.3, man3/PAPIF_get_virt_usec.3, + man3/PAPIF_ipc.3, man3/PAPIF_is_initialized.3, + man3/PAPIF_library_init.3, man3/PAPIF_lock.3, + man3/PAPIF_multiplex_init.3, man3/PAPIF_num_cmp_hwctrs.3, + man3/PAPIF_num_counters.3, man3/PAPIF_num_events.3, + man3/PAPIF_num_hwctrs.3, man3/PAPIF_perror.3, + man3/PAPIF_query_event.3, man3/PAPIF_read.3, + man3/PAPIF_read_ts.3, man3/PAPIF_register_thread.3, + man3/PAPIF_remove_event.3, man3/PAPIF_remove_events.3, + man3/PAPIF_reset.3, man3/PAPIF_set_cmp_domain.3, + man3/PAPIF_set_cmp_granularity.3, man3/PAPIF_set_debug.3, + man3/PAPIF_set_domain.3, man3/PAPIF_set_event_domain.3, + man3/PAPIF_set_granularity.3, man3/PAPIF_set_inherit.3, + man3/PAPIF_set_multiplex.3, man3/PAPIF_shutdown.3, + man3/PAPIF_start.3, man3/PAPIF_start_counters.3, + man3/PAPIF_state.3, man3/PAPIF_stop.3, + man3/PAPIF_stop_counters.3, man3/PAPIF_thread_id.3, + man3/PAPIF_thread_init.3, man3/PAPIF_unlock.3, + man3/PAPIF_unregister_thread.3, man3/PAPIF_write.3, + man3/PAPI_accum.3, man3/PAPI_accum_counters.3, + man3/PAPI_add_event.3, man3/PAPI_add_events.3, + man3/PAPI_addr_range_option_t.3, man3/PAPI_address_map_t.3, + man3/PAPI_all_thr_spec_t.3, + man3/PAPI_assign_eventset_component.3, man3/PAPI_attach.3, + man3/PAPI_attach_option_t.3, man3/PAPI_cleanup_eventset.3, + man3/PAPI_component_info_t.3, man3/PAPI_cpu_option_t.3, + man3/PAPI_create_eventset.3, man3/PAPI_debug_option_t.3, + man3/PAPI_descr_error.3, man3/PAPI_destroy_eventset.3, + man3/PAPI_detach.3, man3/PAPI_dmem_info_t.3, + man3/PAPI_domain_option_t.3, man3/PAPI_enum_event.3, + man3/PAPI_event_code_to_name.3, man3/PAPI_event_info_t.3, + man3/PAPI_event_name_to_code.3, man3/PAPI_exe_info_t.3, + man3/PAPI_flips.3, man3/PAPI_flops.3, man3/PAPI_get_cmp_opt.3, + man3/PAPI_get_component_info.3, man3/PAPI_get_dmem_info.3, + man3/PAPI_get_event_info.3, man3/PAPI_get_executable_info.3, + man3/PAPI_get_hardware_info.3, man3/PAPI_get_multiplex.3, + man3/PAPI_get_opt.3, man3/PAPI_get_overflow_event_index.3, + man3/PAPI_get_real_cyc.3, man3/PAPI_get_real_nsec.3, + man3/PAPI_get_real_usec.3, man3/PAPI_get_shared_lib_info.3, + man3/PAPI_get_thr_specific.3, man3/PAPI_get_virt_cyc.3, + man3/PAPI_get_virt_nsec.3, man3/PAPI_get_virt_usec.3, + man3/PAPI_granularity_option_t.3, man3/PAPI_hw_info_t.3, + man3/PAPI_inherit_option_t.3, man3/PAPI_ipc.3, + man3/PAPI_is_initialized.3, man3/PAPI_itimer_option_t.3, + man3/PAPI_library_init.3, man3/PAPI_list_events.3, + man3/PAPI_list_threads.3, man3/PAPI_lock.3, + man3/PAPI_mh_cache_info_t.3, man3/PAPI_mh_info_t.3, + man3/PAPI_mh_level_t.3, man3/PAPI_mh_tlb_info_t.3, + man3/PAPI_mpx_info_t.3, man3/PAPI_multiplex_init.3, + man3/PAPI_multiplex_option_t.3, man3/PAPI_num_cmp_hwctrs.3, + man3/PAPI_num_components.3, man3/PAPI_num_counters.3, + man3/PAPI_num_events.3, man3/PAPI_num_hwctrs.3, + man3/PAPI_option_t.3, man3/PAPI_overflow.3, man3/PAPI_perror.3, + man3/PAPI_preload_info_t.3, man3/PAPI_profil.3, + man3/PAPI_query_event.3, man3/PAPI_read.3, + man3/PAPI_read_counters.3, man3/PAPI_read_ts.3, + man3/PAPI_register_thread.3, man3/PAPI_remove_event.3, + man3/PAPI_remove_events.3, man3/PAPI_reset.3, + man3/PAPI_set_cmp_domain.3, man3/PAPI_set_cmp_granularity.3, + man3/PAPI_set_debug.3, man3/PAPI_set_domain.3, + man3/PAPI_set_granularity.3, man3/PAPI_set_multiplex.3, + man3/PAPI_set_opt.3, man3/PAPI_set_thr_specific.3, + man3/PAPI_shlib_info_t.3, man3/PAPI_shutdown.3, + man3/PAPI_sprofil.3, man3/PAPI_sprofil_t.3, man3/PAPI_start.3, + man3/PAPI_start_counters.3, man3/PAPI_state.3, man3/PAPI_stop.3, + man3/PAPI_stop_counters.3, man3/PAPI_strerror.3, + man3/PAPI_thread_id.3, man3/PAPI_thread_init.3, + man3/PAPI_unlock.3, man3/PAPI_unregister_thread.3, + man3/PAPI_write.3, man3/high_api.3, man3/low_api.3, + man3/papi_data_structures.3, man3/papi_vector_t.3, + man3/ret_codes.3: Update doxygen generated man-pages for the + pending release. + + In the future, we need to use a newer version of doxygen to + generate the pages (1.7 +) because locally installed verions + appear to have a bug. + + * src/ctests/nmi_watchdog.c: The nmi_watchdog test should report a + Warning if nmi_watchdog is enabled not an error. (Since we do + work around it, even if performance is likely impacted). + + * src/ctests/: Makefile, nmi_watchdog.c: I think the nmi_watchdog + stuff is going to cause us problems down the road. + + Thus add a test that will tell users about the issue. + + * src/perf_events.c: The nmi_watchdog workaround is needed for + multiplexing too. + + The kernel devs don't seem eager to fix this. Until they do, + we'll have to fall back to software multiplexing on recent + kernels that have nmi_watchdog enabled (most vendor kernels). + + * src/multiplex.c: Yesterday's coverity fix to make sure the + cleanup and destroy rerturn values were checked ended up + over-writing "retval" in a way that broke the sdsc4-mpx test. + Fix things so that doesn't happen. + + * src/: papi.c, perf_events.c, ctests/overflow_allcounters.c: Some + changes for perf_event MIPS support + + + Add __mips__ cases to the format_group, schedulability, and + broken multiplexing bug workarounds, as even new Linux mips + kernels have these bugs + fix overflow_allcounters to work + properly if the MHz value is zero. + Add some debugging to + PAPI_overflow() so that errors are more obvious than just + returning PAPI_EINVAL, which made the previous item a pain to + track down. + + * man/: footer.htm, header.htm, manServer_papi.pl, papiman.bat, + html/papi.html, html/papi_accum.html, + html/papi_accum_counters.html, html/papi_add_event.html, + html/papi_add_events.html, + html/papi_assign_eventset_component.html, html/papi_attach.html, + html/papi_avail.html, html/papi_cleanup_eventset.html, + html/papi_clockres.html, html/papi_command_line.html, + html/papi_cost.html, html/papi_create_eventset.html, + html/papi_decode.html, html/papi_destroy_eventset.html, + html/papi_detach.html, html/papi_encode_events.html, + html/papi_enum_event.html, html/papi_event_chooser.html, + html/papi_event_code_to_name.html, + html/papi_event_name_to_code.html, html/papi_flips.html, + html/papi_flops.html, html/papi_get_component_info.html, + html/papi_get_dmem_info.html, html/papi_get_event_info.html, + html/papi_get_executable_info.html, + html/papi_get_hardware_info.html, html/papi_get_multiplex.html, + html/papi_get_opt.html, html/papi_get_overflow_event_index.html, + html/papi_get_real_cyc.html, html/papi_get_real_usec.html, + html/papi_get_shared_lib_info.html, + html/papi_get_substrate_info.html, + html/papi_get_thr_specific.html, html/papi_get_virt_cyc.html, + html/papi_get_virt_usec.html, html/papi_help.html, + html/papi_ipc.html, html/papi_is_initialized.html, + html/papi_library_init.html, html/papi_list_events.html, + html/papi_list_threads.html, html/papi_lock.html, + html/papi_mem_info.html, html/papi_multiplex_init.html, + html/papi_native.html, html/papi_native_avail.html, + html/papi_num_cmp_hwctrs.html, html/papi_num_components.html, + html/papi_num_counters.html, html/papi_num_events.html, + html/papi_num_hwctrs.html, html/papi_overflow.html, + html/papi_perror.html, html/papi_presets.html, + html/papi_profil.html, html/papi_query_event.html, + html/papi_read.html, html/papi_read_counters.html, + html/papi_register_thread.html, html/papi_remove_event.html, + html/papi_remove_events.html, html/papi_reset.html, + html/papi_set_cmp_domain.html, + html/papi_set_cmp_granularity.html, html/papi_set_debug.html, + html/papi_set_domain.html, html/papi_set_event_info.html, + html/papi_set_granularity.html, html/papi_set_multiplex.html, + html/papi_set_opt.html, html/papi_set_thr_specific.html, + html/papi_shutdown.html, html/papi_sprofil.html, + html/papi_start.html, html/papi_start_counters.html, + html/papi_state.html, html/papi_stop.html, + html/papi_stop_counters.html, html/papi_strerror.html, + html/papi_thread_id.html, html/papi_thread_init.html, + html/papi_unlock.html, html/papi_unregister_thread.html, + html/papi_write.html, html/papif.html, + html/papif_get_clockrate.html, html/papif_get_domain.html, + html/papif_get_exe_info.html, html/papif_get_granularity.html, + html/papif_get_preload.html, html/papif_set_event_domain.html, + images/cssigoff.gif, images/cssigon.gif, images/headertop.jpg, + images/line.gif, images/logobottom.jpg, images/logoleft.jpg, + images/menubg.jpg, images/menubg95.jpg, images/rd.jpg, + images/spinbg.jpg, images/spinlogo.gif, images/stable.gif, + images/stripes2.jpg, images/trans.gif, images/utsigoff.gif, + images/utsigon.gif, images/white.jpg: Remove the old html + documentation and assorted helper files. + + * src/components/coretemp/linux-coretemp.c: Fix a possible + directory stream leak in the coretemp component. + + reported by coverity checker. + + * src/ctests/calibrate.c: Properly free the arrays in calibrate, + introduced by yesterdays coverity fix. + + Patch by Will Cohen + + +2011-10-24 + + * src/components/coretemp/linux-coretemp.c: Fix coretemp to not + fail if /sys/class/hwmon doesn't exist. + + * src/components/coretemp/linux-coretemp.c: Patch coretemp to only + free the initialized data in shutdown_substrate (once per + PAPI_init) rather than shutdown (once per thread). + + This was causing double free errors. + + Patch from Will Cohen + + * src/utils/multiplex_cost.c: Fix various calls to PAPI_start() and + PAPI_stop() in multiplex_cost that didn't check the return value. + Took care to try to avoid changing timing measurements. Noticed + by coverity checker. + + * src/utils/cost.c: In one case, cost was not checking the return + of PAPI_start()/PAPI_stop(). This change makes it does so, while + being careful not to interfere with the timing that is going on. + + * src/ctests/: pthrtough.c, pthrtough2.c: pthrtough and pthrtough2 + were not checking the return value for pthread_attr_setscope(). + Reported by coverity checker. + + * src/ctests/multiplex1_pthreads.c: multiplex1_pthreads was not + checking the return from PAPI_library_init() as flagged by + coverity checker. + + * src/ctests/inherit.c: inherit.c wasn't checking the result of the + waitpid() call, as reported by coverity checker. + + * src/ctests/clockres_pthreads.c: Check the return of + pthread_create(). + + Reported by coverity checker. + + * src/papi_libpfm4_events.c: Fix an actual bug (reported as + deadcode by coverity) where _papi_hwd_ntv_code_to_descr was + appending extraneous ", masks:" strings into an event + description. + + None of our utils/ctests exercise this function, which is + probably why the bug wasn't noticed. + + * src/: multiplex.c, papi.c: Fix cases where PAPI_*() functions + were called without checking the return for an error. + + Reported by coverity. + + * doc/Doxyfile.utils: Update version to 4.2.0 for pending release. + + * src/multiplex.c: Fix some code that could potentially dereference + a null pointer. + + Found by the coverity checker. + + * src/papi_vector.c: Remove a dead code case as reported by + coverity. Shouldn't break anything as I can't find anywhere that + vector_print_table() is actually called. + + * release_procedure.txt: Update release_procedure to reflect + another file that needs a version number bump. (Doxyfile.utils) + + * src/ctests/calibrate.c: Fix some weird code that was sharing a + memory allocation for both double and floats. This was really + ugly and made the coverity checker sad. + + Patch provided by Will Cohen. + + * src/testlib/test_utils.c: Fix a signed/unsigned comparison bug I + introduced. + + * src/components/coretemp/tests/coretemp_basic.c: Fix the test so + it correctly iterates all of the components. + + * src/components/coretemp/: linux-coretemp.c, tests/Makefile, + tests/coretemp_basic.c: Fix a potential memory leak in coretemp + (flagged by coverity). + + Also added a test case for coretemp so I can actually test if + these changes are breaking anything. + + * src/solaris-ultra.c: Remove const decleration from get_virt_* in + solaris substrate. Vince removed this from papi_vector.h back in + June. + + * src/testlib/test_utils.c: Improce the add_two_events() code in + the test library. Before it was possible to overrun a buffer if + none of the potential predefined events were available. + + Noticed by the coverity checker. + + * papi.spec, doc/Doxyfile, doc/Doxyfile-everything, src/configure, + src/papi.h, src/Makefile.in, src/configure.in: Update version to + 4.2.0 for pending release. + +2011-10-21 + + * src/: Makefile.inc, configure, configure.in, papi.c, papi.h, + papi_internal.c, papi_user_events.c, papi_user_events.h: Merge in + the user events code , protected by a configure option. ( + --with-user-events ) + + * src/testlib/test_utils.c: We now ensure that test_fail() always + exits. There was some code around that tracked the number of + times test_fail() was called. Remove that, as I think it was + confusing the coverity checker and causing a huge number of false + positives for NULL pointer dereferences. + + * src/components/acpi/linux-acpi.c: Some minor cleanups to the acpi + component. It was choking a bit if ACPI didn't provide thermal + information, and also fix a few coverity bugs involving not + checking the result of a dup() call. + + * src/testlib/test_utils.c: Another problem with negative numbers, + this time one could potentially be passed to a malloc call. + + noticed by coverity + + * src/ctests/overflow_pthreads.c: We were indexing an array with a + returned value that could be negative on failure. Add a check to + avoid that. + + We're also indexing a per-thread array with an EventSet number, + which sounds suspect, should probably investigate that further. + + * src/perf_events.c: perf_events.c was setting variables to -1 and + then potentially using them to index arrays or call close() on + them. + + This adds checks to avoid that. + + Noticed by the coverity checker. + + * src/components/lustre/linux-lustre.h: Include stdint.h and + ctype.h; needed for uint64_t and isspace() respectivly. + + * src/components/coretemp/linux-coretemp.c: Fix problem where we + try to manipulate a NULL directory entry. + + This fixes a segfault on a Nehalem machine we have here that has + a /sys/class/hwmon/hwmon0 directory without a "device" + subdirectory. + + * src/components/coretemp/linux-coretemp.c: We were opening a file + but not checking for failure before reading from it. + + Flagged by the coverity checker. + + * src/components/coretemp/linux-coretemp.c: Both gcc and coverity + were complaining about using an uninitialized pointer. This + makes sure it's not dereferenced if not initialized. + + * src/ctests/prof_utils.c: Stop doing unnecessary pointer math in a + print statement. + + This was flagged as a problem by the coverity tool. + + * src/components/coretemp/linux-coretemp.c: Fix some wrong buffer + sizes in the coretemp component. + + Patch from Will Cohen + + * src/ctests/sdsc.c: add some extra debug info for sdsc test + failures. + + * src/papi_hl.c: Add comment to PAPI_num_counters() documentation + about use of PAPI_num_cmp_hwctrs() for component counters. + +2011-10-19 + + * src/papi.c: Correct documentation errors for PAPI_strerror. + + * src/: configure, configure.in: Under a no-cpu-counters build, + still build all of the utils. We probably want to rethink some + of the cost util details. + +2011-10-11 + + * src/run_tests.sh: Remove an unneeded call to "cat". For some + reason it was printing pointless warnings that needlessly + cluttered the buildbot logs. + + * src/ctests/: Makefile, multiplex1.c: -lpapi should never be a + dependency. -I.. is missing in makefile + + You should be able to cd ctests and do: make or make + multiplex. + + Also, added the read after start multiplex case for multiplex1. + This triggers bugs in perf_events systems. + +2011-10-10 + + * src/: papi.c, papi_internal.c, threads.c: The multiplex1_pthreads + test was reporting a memory leak. + + This is because the test was calling PAPI_unregister_thread() + without destroying its EventSets. + + This added change adds code that at unregister_thread time will + destroy any events belonging to that thread. + + This works on all the current ctests but I should check some of + the various corner cases not currently tested. + +2011-10-07 + + * src/libpfm4/: config.mk, lib/pfmlib_amd64.c, lib/pfmlib_common.c, + lib/pfmlib_intel_x86.c, lib/events/intel_nhm_events.h, + lib/events/intel_wsm_events.h: Merge the "conflicts" from the + libpfm4 merge + + * src/: threads.c, threads.h: Fix the MEMORY LEAK errors involving + the attach ctests (as seen on buildbot) + + These came about when proper multiattach support was added. A + "fake" thread structure is created for each attached process. + These fake thread structures were not being cleaned up at + shutdown, hence the leak. + + This fix adds support so at thread shutdown, if we have any + "fake" threads that we created, also shut them down too. + + This was tricky, especially dealing with the circular-linked list + the thread info structs are in. This fix seems to work without + negatively affecting the pthread cases. + + ctests/multiplex1_pthreads still reports MEMORY LEAK but that + seems to be an eventset issue, not a thread issue, so will be + investigated separately. + +2011-10-06 + + * src/: papi.h, papi_fwrappers.c: Add Fortran reference to doxygen + main page. + +2011-10-05 + + * src/: papi.c, papi_internal.c, perf_events.c: There has been some + ongoing speculation about what would happen if you enabled + Multiplexing and Overflow at the same time. + + It turns out (at least on perf_events) that if you have kernel + multiplexing, the results are what you expect. You get + overflows, but less than in the non-multiplexing case because the + overflow counter isn't being run all the time. + + The results for software multiplexing involved a segfault. This + is because in the software multiplexing case the primary EventSet + is a fiction; a set of shadow EventSets are created behind the + scene, and these are the ones used. Therefore when you enable + overflow, the overflow event is attempted to be enabled on the + fictious main EventSet. There are no native events mapped for + it, so overflow tries to access native event array index "-1" + which causes bad things to happen. + + This change avoids the issue by catching the "-1" case and + failing accordingly. We should probably decide if we want to + catch the oflo/mpx combination earlier and outright ban it. + + I also went through a lot of the code involved adding comments, + as it was really hard following what was going on. This involved + the infamously dense "_papi_hwi_remap_event_position()" function + too. + + * src/papi.h: Moved cpu and inherit bits to end of structure for + compat across all 4.x lines. Found by Will Cohen. + + As it turns out, I ended up reviewing the CPU_ATTACH changes; I + had not done so before. This functionality actually belongs in + PAPI_set_granularity. A CPU is a natural unit of granularity of + counting, and that value was speced in papi.h a long time ago. + Right thing to do here is leave the current attach stuff but make + it work as part of set_granularity. + + Consider that a TODO for 4.3. + +2011-10-04 + + * doc/: Doxyfile, Doxyfile-everything: Enable macro expansion in + the doxygen preprocessor step. + + Doxygen was not creating docs for the fortran functions and I + believe it is because it was silently choking on our clever + preprocessor abuse; this fixes? that. However, its worth taking + a critical eye to the generated pages again. + + * src/: papi.c, papi_fwrappers.c, papi_hl.c: make "* #include" into + "* \#include" so doxygen doesn't treat it as a command. + + * src/papi_fwrappers.c: Added all doxygen stubs to the PAPIF group. + +2011-10-03 + + * src/ctests/ipc.c: My previous "fix" for the array bounds issue in + ipc.c had multiple embarassing bugs. + + Thanks to Will Cohen for noticing. Things should be better now. + + * src/: Rules.perfctr-pfm, Rules.pfm_pe: Additionally remove the + now extraneous papi_libpfm_preset definition from the other Rules + files too. + + * src/: Makefile.inc, Rules.pfm4_pe: The change to make the preset + code generic accidentally ended up defining the build rules for + the file in duplicate places. This fixes that. + +2011-09-30 + + * src/: linux-common.c, utils/decode.c: Fix two unused variable + warnings. + + * src/ctests/second.c: We were allocating the "values" array but + never freeing it. + + * src/ctests/: sdsc2.c, sdsc4.c: The SDSC tests could walk off the + end of an array. + + * src/ctests/overflow_twoevents.c: We could potentially access + outside an array boundary in overflow_twoevents. + + * src/ctests/ipc.c: ipc was also abusing array boundaries. + + * src/ctests/flops.c: The flops.c ctest was abusing the notion of C + arrays, by writing INDEX*INDEX values to mresult[0][i], I suppose + "knowing" that this would fill in the whole array. Fix things to + use an additional iterator. + + * src/ctests/byte_profile.c: The coverity checker rightly points + out that the last argument to strncat should be buffersize-1. + + * src/ctests/: exeinfo.c, shlib.c: Coverity flagged that there were + some tests that had no effect. In particular the are tests that + the pointers are non-null. However, they are arrays rather than + pointers. This patch make it clear that arrays are being used in + the code. + + Patch from Will Cohen at redhat + + * src/ctests/clockcore.c: This is a relatively minor patch that + ensures that all the allocated memory is initialized to zero + before it is used. Coverity might not be smart enough to + determine whether the test actually wrote into all the locations + because of the case statement. This is make it easier for + coverity to determine that the memory has been initialized. + + Path from Will Cohen at redhat. + + * src/multiplex.c: Coverity scan showed that MPX_cleanup() function + was blindly accessing a value through a pointer and then checking + to see that the pointer was null. This patch makes sure that the + pointer is checked before it is used. + + Patch from Will Cohen at redhat. + + * src/ctests/: pthrtough.c, pthrtough2.c: Coverity found that the + sizeof argument for pthrtough2.c and pthrtough.c was using + sizeof(pthread *) rather than sizeof(pthread). This patch fixes + that problem. + + Patch from Will Cohen at redhat + + * src/papi_internal.c: This change moves the setting for default + domain to be enforced at eventset add time, rather than eventset + creation time. + + This fixes some problems seen when multiplexing. + + The patch was provided by Phil Mucci. + + * src/pmapi-ppc64.h: One more file that is no longer needed. + + * src/: configure, configure.in, perfctr.c, pmapi-ppc64_events.c, + ppc64_events.c: Clean up the now not-needed pmapi-ppc64_events.c + file. + + * src/: Makefile.inc, aix.c, aix.h, configure, configure.in, + papi_libpfm_presets.c: Finalize the merge of the preset code. + + * src/aix.c: Fix a missing include. + + * src/: aix.c, configure, configure.in: Move more code to its + proper place. + + * src/: aix.c, configure, configure.in, pmapi-ppc64.c, + pmapi-ppc64_events.c, ppc64_events.c: Move the + ppc64_setup_native_table() routines out of the preset code. + + This is complicated, as there are two very similar routines + setup_ppc64_native_table() used by AIX/pmapi and + ppc64_setup_native_table() used by perfctr + + These could probably be merged too, but this is definitely not + the time. + + * src/: aix.c, papi_libpfm_presets.c, pmapi-ppc64_events.c: move + pmapi_find_full_event to be _aix_ntv_name_to_code() as it + probably always should have been. + + * src/: papi_libpfm_presets.c, papi_setup_presets.h, + pmapi-ppc64_events.c: Make papi_libpfm_presets more generic by + calling _papi_hwi_native_name_to_code() rather than a + substrate-specific call. + + * src/: aix.c, papi_libpfm_presets.c, pmapi-ppc64_events.c: I was + mainly doing this to aid debugging, but now the + papi_libpfm_presets.c file and pmapi-ppc64_events.c file are + close enough to being identical I might try to merge them. + +2011-09-29 + + * src/: papi_libpfm_presets.c, pmapi-ppc64_events.c, + ppc64_events.h: The files are almost the same now. + + * src/: papi_libpfm_presets.c, pmapi-ppc64_events.c: More making + these files the same, including some memory leak fixes that made + it to the former but not the latter. + + * src/: papi_libpfm_presets.c, pmapi-ppc64_events.c: Tracking down + problems on AIX can be a bit of a pain because + papi_libpfm_presets.c and pmapi-ppc64_events.c are almost (but + not quite) the same. This change makes the files more similar, + mostly by cleaning up whitespace and normalizing comments and + debugging statements between the two. + + * src/pmapi-ppc64_events.c: Ugh, obvious typo in that last commit. + + * src/pmapi-ppc64_events.c: In ppc64_setup_gps() the current code + sometimes walks off the end of the group array and trashes + unrelated memory. + + Until we work out the proper fix, this prints an error message + and stops the loop before memory is corrupted. + + * src/papi_data.h: No one seems to remember the last time this file + was used, so let's remove it. + +2011-09-28 + + * src/Makefile.inc: Remove the "u" option to the "ar" command that + links libpapi.a, as it was breaking the build on MIPS. + + This *shouldn't* break anything, but messing around with "ar" + options can be potentially dangerous. I'll double-check the + non-Linux builds. + + * src/libpfm4/lib/: Makefile, pfmlib_mips_priv.h, + events/intel_nhm_events.h, events/intel_wsm_events.h: Fix up the + "collisions" from the libpfm4 import + +2011-09-26 + + * src/Makefile.inc: We would like to use parallel make on packages + to speed things up. However, when this was tried with papi the + "make -j4" failed + (https://bugzilla.redhat.com/show_bug.cgi?id=740909). I took a + look through the code and found that some of dependencies were + not quite right. Turns out that $(papiLIBS) is substituted during + the configure, but it isn't available for the actual make. + Attached is the patch that ensures that the $(LIBS) are built + before utils and tests. + + Patch from Will Cohen + + * src/run_tests.sh: Modify run_tests.sh so that you can set the + VALGRIND command externally via environment variable without + having to edit run_tests.sh itself. + + Also adds Date and cpuinfo information to the beginning of + run_tests.sh results. This can help when run run_tests.sh output + is passed around when debugging a problem. + + Patch from Phil Mucci + + * src/: configure, configure.in: If we have no Fortran compiler + available, then our current build system tries to build the + Fortran examples with an empty compiler string which just + generates strange errors. + + This patch changes F77 to be "echo" which at least avoids the + errors. The proper fix is probably just not to build the Fortran + samples if no compiler is available. + + Patch from Phil Mucci + + * src/papi_libpfm4_events.c: The build on power6 was warning in a + DEBUG statement because sizeof() returns an int rather than a + long. So use a cast to avoid this. + + * src/perf_events.c: The move to use pid_t for pid values caused + warnings on a --with-debug build due to the lack of a way to + print a pid_t value without a cast. + + This fix adds the proper casts. + +2011-09-23 + + * src/papi_libpfm4_events.c: Rename the "perfmon_idx" structure + field the more evocative "libpfm4_idx" value. + + Patch from Phil Mucci + + * src/ctests/all_native_events.c: Fix problem where we were passing + a pointer to an EventSet rather than the actual EventSet number + to PAPI_cleanup_eventset(). + + Also include some of the cleanups from Phil Mucci's MIPS tree. + + * src/: perf_events.c, perf_events.h: Make the perf_event ctl + structure have more explicit data types. + + Patch from Philip Mucci + + * src/: cycle.h, linux-common.c, linux-context.h, linux-lock.h, + linux-timer.c, mb.h, papi.h: Add bare minimal MIPS74k support, + enough to compile. + + Patch from Philip Mucci + + * src/papi_events.csv: Add MIPS 74k pre-defined events + + Patch by Philip Mucci + +2011-09-22 + + * src/ctests/all_native_events.c: Heike's cleanup_eventset work + allows the calling of PAPI_cleanup_eventset with cuda, so + uncomment the eventset cleanup code in all_native_events. + + * src/papi.h: Update papi.h to properly detect if being built with + a C99 compiler. + + * src/papi_events.csv: Update PAPI_FP_INS event name on amd_fam14h + as it was changed in the most recent libpfm4 merge + + * src/libpfm4/: README, config.mk, docs/Makefile, + docs/man3/pfm_get_event_info.3, examples/Makefile, + examples/showevtinfo.c, include/Makefile, + include/perfmon/perf_event.h, lib/Makefile, lib/pfmlib_common.c, + lib/pfmlib_gen_mips64_priv.h, lib/pfmlib_mips.c, + lib/pfmlib_mips_74k.c, lib/pfmlib_mips_perf_event.c, + lib/pfmlib_mips_priv.h, lib/pfmlib_perf_event_pmu.c, + lib/pfmlib_priv.h, lib/events/intel_atom_events.h, + lib/events/intel_core_events.h, lib/events/intel_nhm_events.h, + lib/events/intel_snb_events.h, lib/events/intel_wsm_events.h: Fix + the "conflicts" from the libpfm4 git import + + * src/libpfm4/: docs/man3/libpfm_mips_74k.3, tests/validate_arm.c, + tests/validate_mips.c: Initial revision + +2011-09-21 + + * src/multiplex.c: Fix problem where we were freeing a + singly-linked list in a for loop, possibly free()ing the + allocation before dereferencing ->next + + Problem reported by coverity tool, via Will Cohen + + * src/utils/cost.c: Fixed uninitialized data problem in papi_cost + + Problem reported by coverity tool, via Will Cohen + + * src/papi_internal.c: Fix problem where we were copying around + chunks of memory that were not initialized yet. + + Problem reported by coverity tool, via Will Cohen + + * src/multiplex.c: Fix two cases where we were dereferencing a + pointer without checking for NULL. + + Problem reported by coverity tool, via Will Cohen + + * src/linux-memory.c: We were opening files but not properly + closing them if we returned early with an error condition. + + Problem reported by coverity tool, via Will Cohen + + * src/linux-common.c: The coverity tool noticed that we allocate + and populate a cpu node info structure, but we never pass any + info on this structure outside of the cpu detection routine, in + effect leaking the allocation. + + For now just comment out this code as it is not used by anyone. + + Problem reported by coverity tool, via Will Cohen + + * src/: papi.c, papi_libpfm3_events.c, perfctr-x86.c: The coverity + checker was reporting we forgot to fclose() /proc/cpuinfo in + papi.c + + The bigger question, is why were we unconditionally trying to + open /proc/cpuinfo in generic code in papi.c anyway? + + Turns out it was to set the event masks properly for itanium and + p4. + + The platform code sets CPU vendor and family for us though, so if + we just make the event mask code use those values then we don't + have to open cpuinfo. This also means that non-Linux users with + the misfortune of running on a P4 might actually work too. + + * src/: papi_internal.c, papi_libpfm_presets.c: In various places + we were using MAX_COUNTER_TERMS (defined by substrate) rather + than PAPI_MAX_COUNTER_TERMS (a papi predefined event define). + This could cause buffer overruns. + + This fixes things, though really we shouldn't have such similar + names for different defines. + + Problem reported by coverity tool, via Will Cohen + + * src/multiplex.c: Avoid case where we could have been + dereferencing a NULL pointer in MPX_stop() + + Reported by coverity tool, via Will Cohen + + * src/papi.c: Fix problem where thread and cpu could be + dereferenced as NULL in PAPI_start() + + Reported by coverity tool, via Will Cohen + + * src/papi_events.csv: Update the AMD Family 14h (Bobcat) + pre-defined events. + + It turns out they are different enough from 10h that they need + their own category. + + In going through the Fam14h BKDG it turns out that Bobcat has a + really nice set of events available, especially for + Floating-Point/SSE but also memory bandwidth. + + With this change, all of the ctests pass on a Bobcat machine. + + * src/: configure, configure.in: Recent Ubuntu versions use the ld + flag --as-needed by default. + + This breaks the PAPI configure step for the libdl check, as the + --as-needed flag enforces the rule that libraries (in this case + -ldl) must come after the object files on the command line, not + before. + + The fix for this is easy, the libdl check was wrongly sticking + -ldl in LDFLAGS rather than in LIBS. Putting it in LIBS makes + things work as expected. + + You can see here: + http://www.gentoo.org/proj/en/qa/asneeded.xml + + For more info on this issue than you probably ever want to know. + +2011-09-19 + + * src/: ctests/Makefile, ftests/Makefile, utils/Makefile: When + building testlib dependencies from ctests/ ftests/ and utils/ + call $(MAKE) and not make, this should fix aix. + +2011-09-14 + + * src/: aix.c, freebsd.c, linux-bgp.c, papi_vector.c, + perf_events.c, perfctr-ppc64.c, perfctr-x86.c, perfmon-ia64.c, + perfmon.c, solaris-niagara2.c, solaris-ultra.c, + components/acpi/linux-acpi.c, + components/coretemp/linux-coretemp.c, + components/coretemp_freebsd/coretemp_freebsd.c, + components/example/example.c, + components/infiniband/linux-infiniband.c, + components/lmsensors/linux-lmsensors.c, + components/lustre/linux-lustre.c, components/mx/linux-mx.c, + components/net/linux-net.c, win2k/substrate/win32.c, + win2k/substrate/winpmc-p3.c: Change initialization of function + pointer cleanup_eventset() from vec_int_dummy to vec_int_ok_dummy + so that it returns PAPI_OK by default. Roll back initialization + for every substrate. AGAIN, keep an eye on builtbot. + + * src/libpfm4/lib/: pfmlib_mips.c, pfmlib_mips_74k.c, + pfmlib_mips_perf_event.c, pfmlib_mips_priv.h, + events/mips_74k_events.h: Merged with HEAD, still passing all + tests + +2011-09-13 + + * src/papi_libpfm4_events.c: The libpfm4 code was doing a full call + to pfm_get_os_event_encoding() during every call to + update_control_state(). + + This is unnecessary, as we can call pfm_get_os_event_encoding() + once at event creation time and cache the results. There's no + need to call it each update_control_state(), as that is called + during PAPI_start() and thus relatively time critical. + + * src/run_tests.sh: Missed a $ + + * src/: run_tests.sh, components/example/tests/HelloWorld.c: Update + run_tests.sh to run component tests, and update the example test + to act more like a ctest. + + * src/components/example/example.c: Fix warnings generated by the + example component. + + * src/: Makefile.inc, components/Makefile_comp_tests, + ctests/Makefile, ctests/do_loops.c, ctests/dummy.c, + ctests/papi_test.h, ctests/test_utils.c, ctests/test_utils.h, + ftests/Makefile, testlib/Makefile, testlib/do_loops.c, + testlib/dummy.c, testlib/papi_test.h, testlib/test_utils.c, + testlib/test_utils.h, utils/Makefile: ctests, ftests, utils, and + the component tests were all using some files in ctests. + + These weren't being built when --with-no-cpu-counters was + enabled, so the PAPI build was breaking when that was enabled as + well as a component. + + Move the shared files to their own directory, testlib Then update + all the users to look in the right place. + + After this commit you might need to do a "cvs -d update" to make + sure you get the new subdirectory. + + * src/: configure, configure.in: When compiling with + --with-no-cpu-counters configure would report the platform as + linux-perfctr-x86. This changes it to report as + linux-no-counters + +2011-09-12 + + * src/: aix.c, freebsd.c, linux-bgp.c, perf_events.c, + perfctr-ppc64.c, perfctr-x86.c, perfmon-ia64.c, perfmon.c, + solaris-niagara2.c, solaris-ultra.c, + components/acpi/linux-acpi.c, + components/coretemp/linux-coretemp.c, + components/coretemp_freebsd/coretemp_freebsd.c, + components/example/example.c, + components/infiniband/linux-infiniband.c, + components/lmsensors/linux-lmsensors.c, + components/lustre/linux-lustre.c, components/mx/linux-mx.c, + components/net/linux-net.c, win2k/substrate/win32.c, + win2k/substrate/winpmc-p3.c: Initialize new function pointer + cleanup_eventset() for every substrate. Keep an eye on builtbot. + + * src/components/cuda/: linux-cuda.c, linux-cuda.h: Cannot override + void* definitions from PAPI framework layer (e.g. + hwd_control_state_t) with typedefs to conform to PAPI Component + layer code if this technique has already been used in another + substrate (e.g. perfctr-x86). Or short: #undef and typedef can't + be done twice. + + * src/perf_events.c: Fix bug caused by forgetting to drop the + stream name when converting a fprintf() into a SUBDBG() + + * src/papi_libpfm_presets.c: Patch from William Cohen fixing a + potential problem found by a static analysis tool where we could + possibly pass a NULL pointer to free_notes(). + + * src/papi_libpfm_presets.c: Some memory leak fixes made to libpfm3 + papi_pfm_events.c by Robert Richter were lost when the + libpfm4/libpfm4 presets merge was done. + + This re-applies these fixes. + +2011-09-10 + + * src/run_tests.sh: Cleaned up old comment regarding CUDA pre-4.0 + when it was not possible to access a GPU from multiple CPU + threads. + + * src/: papi.c, papi_protos.h, papi_vector.c, papi_vector.h, + components/README, components/cuda/linux-cuda.c, + components/cuda/linux-cuda.h: Deleted function pointer + destroy_eventset from the PAPI vector table, and added + cleanup_eventset instead. PAPI_destroy_eventset() requires an + empty EventSet. Hence, usually PAPI_cleanup_eventset() is called + before PAPI_destroy_eventset(); which also sets the CompIdx to + -1. This means, PAPI_destroy_eventset() won't have any knowledge + about components. However, in order to disable CUDA eventGroups + and to free perfmon hardware on the GPU, knowledge about the CUDA + component index is required. Hence, I replaced + CUDA_destroy_eventset() with CUDA_cleanup_eventset() in the CUDA + component. NOTE: Please make sure you call + PAPI_cleanup_eventset() before calling PAPI_shutdown(). + +2011-09-09 + + * src/: papi_protos.h, papi_vector.c, papi_vector.h, + components/cuda/linux-cuda.c, components/cuda/linux-cuda.h: CUDA + component is now thread-safe. Starting in CUDA 4.0, multiple CPU + threads can access the same CUDA context. This is a much easier + programming model then pre-4.0 as threads - using the same CUDA + context - can share memory, data, etc. Note, it's possible to + create a different CUDA context for each thread, but then we are + likely running into a limitation that only one context can be + profiled at a time. + +2011-09-07 + + * src/ctests/: do_loops.c, test_utils.c: Apply fixes to problems + noticed by a static analysis tool. + + Provided by William Cohen at RedHat + + * src/papi_events.csv: Update SandyBridge preset events. + + These were provided by Michel Brown at Bull + + * src/libpfm4/lib/: pfmlib_gen_mips64.c, pfmlib_mips.c, + pfmlib_mips_74k.c, pfmlib_mips_perf_event.c, pfmlib_mips_priv.h, + events/gen_mips64_events.h, events/mips_74k_events.h: MIPS 74K + little endian perf event support, requires 3.0.3+ kernel + +2011-09-06 + + * src/perf_events.c: The warning I had print on nmi_watchdog being + found was a bit much, make it a SUBDBG() call instead. + + I do wish there were a way to notify the user more visibly, + because losing a counter (when you might only have 4 total to + begin with) is a big deal, and most Linux vendors are starting to + ship kernels with the nmi_watchdog enabled. + + * src/: linux-common.c, linux-common.h, perf_events.c: On newer + Linux kernels (2.6.34+) the nmi_watchdog counter can steal one + of the counters, reducing by one the total available. + + There's a bug in Linux where if you try to use the full number of + counters on such a system with a group leader, the + sys_perf_open() call will succeed only to fail at read time. + (instead of the proper error code at open time). + + This patch attempts to work around this issue by detecting if a + watchdog timer is being used, and in that case re-use the + existing KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN bugfix code. + + * src/papi_events.csv: We were missing a proper libpfm4 interlagos + CPU name in the papi_events.csv file + +2011-09-02 + + * src/libpfm4/: include/perfmon/perf_event.h, lib/Makefile, + lib/pfmlib_intel_nhm_unc.c, lib/pfmlib_intel_x86.c, + lib/pfmlib_intel_x86_priv.h, lib/pfmlib_priv.h, + lib/events/amd64_events_fam10h.h, lib/events/amd64_events_k7.h, + lib/events/amd64_events_k8.h, lib/events/intel_atom_events.h, + lib/events/intel_core_events.h, + lib/events/intel_coreduo_events.h, lib/events/intel_nhm_events.h, + lib/events/intel_nhm_unc_events.h, lib/events/intel_p6_events.h, + lib/events/intel_snb_events.h, lib/events/intel_wsm_events.h, + lib/events/intel_wsm_unc_events.h, + lib/events/intel_x86_arch_events.h: Fix "conflicts" from the + libpfm4 import + + * src/papi_libpfm4_events.c: Explicitly set num_native_events to + zero at init time. + + Somehow the value was surviving fork/exec and making the + fork/exec test cases fail on a recent Debian system. + + * src/perf_events.c: Set FD_CLOEXEC on the overflow signal handler + fd. + + Otherwise if we exec() with overflow enabled, the exec'd process + will quickly die due to lack of signal handler. + + This patch is needed due to a change in behavior in Linux 3.0. + + Mark Krentel first noticed this problem. + + * src/: Rules.perfctr-pfm, Rules.pfm, Rules.pfm4_pe, Rules.pfm_pe: + Remove the "unexport CFLAGS" lines from the Rules files. + + * src/: multiplex.c, papi_internal.c, utils/component.c: Fix a few + warnings reported by gcc-4.6 + + * src/: configure, configure.in: Override auto-detection of + substrate if the user specifies what they want to build with. + This allows building perfctr and perfmon2 PAPI on systems + auto-detected as having perf_event support. + + * src/: configure, configure.in: Add a "--with-libpfm3" argument to + configure that lets us specify libpfm3 for testing purposes. + + * src/solaris-niagara2.c: Fix solaris niagara2 build problems + reported by tigrage on the PAPI forum. + +2011-08-30 + + * src/configure: Regen + +2011-08-29 + + * src/configure.in: Check for a requested interface to tweak build + flags + + * src/: configure, configure.in: Last bit for cross compiling... + + * src/: configure, configure.in: Better double quotes + + * src/: configure, configure.in: There can be only 1. (choice of + perfctr, perfmon or perf events) + + * src/: configure, configure.in: Further refinement of the + combinations of --with-perfctr --with-perfmon and + --with-perf-events + + True autotools cross not yet supported until we move to automake. + + I did trick it into doing a cross compile with... # ARCH=mips + CC=scgcc ./configure --with-arch=mips + --host=mips64el-gentoo-linux-gnu- --with-ffsll --with-libpfm4 --w + ith-perf-events --with-virtualtimer=times + --with-walltimer=gettimeofday --with-tls=__thread --with-CPU=mips + # cross compiling should work differently... + + Wow, do I hate specifying mips in 3 places... + + * src/: config.h.in, configure, configure.in: Some fixes for cross + compiling and not including x86_cache_info.c when not ensured an + x86. + + * src/Makefile.inc: Surround component tests and cleanup recipies + with a conditional, the version of sh that our aix machine has + does not handle for i in {Empty set}; + + treating it as a syntax error. + + NOTE: This requires gnu make, my shell-foo couldn't make sh + happy, so for now gnu conditionals! + + * ChangeLogP414.txt, RELEASENOTES.txt: Update Release Notes and add + ChangeLog for PAPI 4.1.4. + + * src/configure: Rebuild from configure.in with version number bump + to 4.1.4 in advance of pending internal vendor release for Cray. + diff --git a/ChangeLogP421.txt b/ChangeLogP421.txt new file mode 100644 index 0000000..320429f --- /dev/null +++ b/ChangeLogP421.txt @@ -0,0 +1,1103 @@ +2012-02-13 + + * src/components/net/linux-net.c: Repairing more coverity warnings. + +2012-02-11 + + * src/windows-common.c: Missed an instance of CPUs yesterday. + + * src/: papi_internal.c, threads.c: This changes fixes two race + conditions that are probably the cause of the pthrtough + double-free error. + + When freeing a thread, we remove and free all eventsets belonging + to that thread. This could race with the thread itself removing + the evenset, causing some ESI fields to be freed twice. + + The problem was found by using the Valgrind 3.8 Helgrind tool + + valgrind --tool=helgrind --free-is-write=yes ctests/pthrtough + + In order for Helgrind to work, I had to temporarily modify PAPI + to use POSIX pthread mutexes for locking. Is there any reason we + don't use these all the time? + +2012-02-10 + + * src/utils/: avail.c, component.c, event_chooser.c, + native_avail.c: ix one more case of "CPU's" in the print header + code. + + Also remove the extraneous The following correspond to fields + in the PAPI_event_info_t structure. message + + * src/: testlib/papi_test.h, testlib/test_utils.c, + ctests/all_native_events.c, ctests/calibrate.c, + ctests/code2name.c, ctests/hwinfo.c: Fix one more case of "CPU's" + in the print header code. + + Also remove the extraneous The following correspond to fields + in the PAPI_event_info_t structure. message + + * src/buildbot_configure_with_components.sh: take infiniband out of + the buildbot test. + + * src/: x86_cache_info.c, components/coretemp/linux-coretemp.c, + components/lmsensors/linux-lmsensors.c, + components/lustre/linux-lustre.c, components/net/linux-net.c, + utils/event_chooser.c: Fix coverity errors reported by Will + Cohen. + + * src/: aix.c, any-proc-null.c, linux-common.c, papi.c, papi.h, + papivi.h, solaris-niagara2.c, solaris-ultra.c, + ctests/clockres_pthreads.c: Address Redhat bug 785975. The + plural of CPU appears to be CPUs + + * src/Makefile.inc: Patch to cleanup dependencies, allowing for + parallel makes. Patch due to Will Cohen from redhat + +2012-02-09 + + * src/buildbot_configure_with_components.sh: Add infiniband and mx + component to buildbot component tests. + + * src/components/net/tests/: net_values_by_code.c, + net_values_by_name.c: Apply patch suggested by Will Cohen to + check for system return values. + + * src/components/lmsensors/linux-lmsensors.h: Added missing string + header + +2012-02-08 + + * man/... : update man pages one more time for 4.2.1 + release + + * release_procedure.txt: Make sure generated html has papi group + id. + +2012-02-07 + + * src/multiplex.c: Fix the @file matching multiple files warning. + + * src/components/README: Cleanup doxygen errors. + + * doc/Doxyfile-html: Typo introduced by the last commit. + + * doc/Doxyfile-html: Exclude linux-bgp.c from doxygen. + + * doc/Doxyfile-html: Make sure the component README file gets + included in doxygen. + + * src/components/coretemp_freebsd/coretemp_freebsd.c: Cleanup + doxygen warnings in freebsd coretemp component. + + * src/papi.h: Cleanup some doxygen warnings related to the + groupings. + + * src/components/example/example.c: fix doxygen warning in the + example component + + * doc/Doxyfile-html: Remove some cruft from doxygen config file. + + This addresses the warning about dot not found at /sw/bin/dot . + + * src/components/: infiniband/linux-infiniband.c, + infiniband/linux-infiniband.h, cuda/linux-cuda.c, + cuda/linux-cuda.h: Cleaned up some doxygen issues + + * src/components/lmsensors/linux-lmsensors.c: Removed long + forgotten debug outputs + + * src/papi_libpfm4_events.c: Fix minor doxygen typos. + + * src/components/vmware/vmware.c: Add params for doxygen + + * man/... : update man pages + +2012-02-06 + + * doc/Doxyfile-man1: Fix a typo in a doxygen config file. + +2012-02-03 + + * release_procedure.txt, doc/Doxyfile, doc/Doxyfile-everything, + doc/Doxyfile-html, doc/Doxyfile.utils, doc/Doxyfile-man1, + doc/Doxyfile-man3, doc/Makefile, doc/doxygen_procedure.txt: + Rework the doxygen configuration files. + + * RELEASENOTES.txt: Update for the impending release. + + * ChangeLogP421.txt, RELEASENOTES.txt: Updates for the impending + release. + +2012-02-02 + + * src/: papi.c, papi.h: Minor tweaks for doxygen errors + +2012-02-01 + + * src/components/lmsensors/: Rules.lmsensors, configure.in: Fixed + configure error message and rules link error for shared object + linking. Thanks Will Cohen. + + * src/components/appio/Rules.appio: Correct pathing + + * src/ctests/api.c: One minor tiny fix to check for PAPI_ENOEVNT + when testing PAPI_flops. If PAPI_FP_OPS does not exist on the + processor (like many of em), then this tests fails. + +2012-01-31 + + * src/ctests/multiattach.c: Increase acceptance criteria for + cycles. + + * src/Makefile.in, src/configure, src/configure.in, src/papi.h, + doc/Doxyfile, doc/Doxyfile-everything, doc/Doxyfile.utils, + papi.spec: Update version number to 4.2.1 in preparation for + release. + + * src/ctests/prof_utils.c: Correct a warning on 32bit builds about + casting caddr_t to (long long) + + Specifically: prof_utils.c:234: warning: cast from pointer to + integer of different size prof_utils.c:248: warning: cast from + pointer to integer of different size prof_utils.c:262: warning: + cast from pointer to integer of different size + + We first cast to unsigned long and then on to long long. ( This + maybe overkill, but its for a printf format string ) + +2012-01-30 + + * release_procedure.txt: Add the correct path for doxygen on ICL + machines. + + * src/papi_events.csv: Modify Intel Sandybridge PAPI_FP_OPS and + PAPI_FP_INS events to not count x87 fp instructions. + + The problem is that the current predefines were made by adding 5 + events. With the NMI watchdog stealing an event and/or + hyperthreading reducing the numbr of available counters by half, + we just couldn't fit. + + This now raises the potential for people using x87-compiled + floating point on Sandybridge and getting 0 FP_OPS. This is only + likely if running a 32-bit kernel and *not* compiling your code + with -msse. + + A long-term solution might be trying to find a better set of FP + predefines for sandybridge. + + * src/components/: lustre/linux-lustre.c, mx/linux-mx.c: Some + really minor cleanups to the lustre and mx components. + +2012-01-28 + + * src/components/example/: example.c, tests/example_basic.c: Update + example component + + Cleans up code, adds some more documentation, adds counter write + support. + +2012-01-27 + + * src/papi_user_events.c: Minor cleanups for user events. + + * src/libpfm4/: README, include/perfmon/pfmlib.h, lib/Makefile, + lib/pfmlib_amd64.c, lib/pfmlib_common.c, lib/pfmlib_priv.h: Fix + "conflicts" in git import of libpfm4. + + * src/libpfm4/lib/: pfmlib_amd64_fam11h.c, + events/amd64_events_fam11h.h: Initial revision + +2012-01-26 + + * src/papi_fwrappers.c: Escape the include directives in the + documentation. + + (Cleans up doxygen ) + + * src/components/README: Adding vmware to component README + + * src/components/vmware/: Makefile.vmware.in, + PAPI-VMwareComponentDocument.pdf, Rules.vmware, + VMwareComponentDocument.txt, configure, configure.in, vmware.c, + vmware.h: merge vmware branch to head + + * src/perf_events.c: Set fast_counter_read back to 0 on x86/x86_64 + perf_events, as currently rdpmc counter access is not supported. + + There are patches floating around that enable this (although + performance is still a long way from perfctr) but they will not + likely be merged for a while now, and the perf_events substrate + will require a lot of extra code to support it once it does make + it into a shipping kernel. + + * src/buildbot_configure_with_components.sh: Remove acpi from the + buildbot configure script. + +2012-01-25 + + * src/components/mx/: Makefile.mx.in, Rules.mx, configure, + configure.in, linux-mx.c, linux-mx.h, tests/Makefile, + tests/mx_basic.c, tests/mx_elapsed.c, utils/fake_mx_counters.c, + utils/sample_output: Re-write of the MX component + + + Add tests + Modernize code + Remove the need to run ./configure + in the mx directory + Add fake mx_counters program that lets you + test component on machine without myrinet installed + + * src/components/: README, acpi/Rules.acpi, + acpi/linux-acpi-memory.c, acpi/linux-acpi.c, acpi/linux-acpi.h: + Remove the ACPI component. + + It was one of the oldest components and needed a lot of cleanup + work, and it turns out that the main useful event it provided + (temperature) isn't available on modern machines/kernels + (coretemp should be used instead). + +2012-01-23 + + * src/perf_events.c: Restored Phil's changes that I inadvertently + clobbered with my last commit :( + + * src/perf_events.c: Remove a warning about an uninitialized + variable. + + * src/utils/: component.c, event_info.c, native_avail.c: Update the + Doxygen comments on these utilities to have the command line + options listed in a list like the other utils. + + * src/perf_events.c: More improvements to the read path for + multiplexed counters. Now the case for bad kernel behavior is + built in, and is not required with a #define. + + Basically, there are situations when either enabled or running is + zero but not both. This could result in a divide by 0 in the + worst case, as was observed by Tushar Mohan in papiex. You could + trigger it by doing a read immediately after doing a start with + perf events and use a FORMAT_SCALE argument. + + Now the logic goes, assuming mpxing. + + 1) if (running=enabled) return raw counter 2) if (running + && enabled) scale counter by ratio 3) else warn in debug mode + return raw counter + + Apparently we need a test case that does a read immediately after + a start. That's a hole. + + Tested on brutus, core2 2.6.36 + + Here's the original report. ------------------- Model string and + code : Intel(R) Pentium(R) M processor 1600MHz (9) Linux + thinkpad 2.6.38-02063808-generic #201106040910 SMP Sat Jun 4 + 10:51:30 UTC 2011 i686 GNU/Linux PAPI Version: 4.2.0.0 + + I think I ran into a bug similar to what we ran with MIPS. + + With the latest PAPI (from CVS), on an x86 (32-bit machine), when + using papiex with multiplex with anything more than two events, I + get a floating point exception in PAPI during the PAPI_read call. + On enabling debugging in the substrate, I think the problem is + the same (namely a division by zero, because some event had a + zero time of running): + + libpapiex debug: 24625,0x0,papiex_thread_init_routine Starting + counters with PAPI_start + SUBSTRATE:perf_events.c:pe_enable_counters:953:24625 + ioctl(enable): ctx: 0x96a4bc8, fd: 3 + SUBSTRATE:perf_events.c:pe_enable_counters:953:24625 + ioctl(enable): ctx: 0x96a4bc8, fd: 5 libpapiex debug: + 24625,0x0,papiex_thread_init_routine Calling PAPI_lock before + critical section libpapiex debug: + 24625,0x0,papiex_thread_init_routine Released PAPI lock libpapiex + debug: 24625,0x0,papiex_start START POINT 0 LABEL libpapiex + debug: 24625,0x0,papiex_start Reading counters (PAPI_read) to get + initial counts SUBSTRATE:perf_events.c:_papi_pe_read:1147:24625 + read: fd: 3, tid: 0, cpu: -1, ret: 56 + SUBSTRATE:perf_events.c:_papi_pe_read:1148:24625 read: 2 1341021 + 1341021 SUBSTRATE:perf_events.c:_papi_pe_read:1181:24625 + (papi_pe_buffer[3] 33405 * tot_time_enabled 1341021) / + tot_time_running 1341021 + SUBSTRATE:perf_events.c:_papi_pe_read:1181:24625 + (papi_pe_buffer[5] 44552 * tot_time_enabled 1341021) / + tot_time_running 1341021 + SUBSTRATE:perf_events.c:_papi_pe_read:1147:24625 read: fd: 5, + tid: 0, cpu: -1, ret: 40 + SUBSTRATE:perf_events.c:_papi_pe_read:1148:24625 read: 1 214777 0 + SUBSTRATE:perf_events.c:_papi_pe_read:1181:24625 + (papi_pe_buffer[3] 0 * tot_time_enabled 214777) / + tot_time_running 0 + + The above debug log is for three events: PAPI_TOT_CYC, + PAPI_TOT_INS and PAPI_L1_DCM. Multiplexing works with two events. + Adding the third (any event), gives this error. Basically, the + floating point exception kills the program, and PAPI_read never + returns. + + I think I know why papiex always hits this bug: It's because + right after starting the counters with PAPI_start, papiex does a + PAPI_read to store the initial values of the counters in a tmp + variable. These are then subtracted from the final counter + values. Should we put a deliberate delay? Of course, the real bug + should be fixed in PAPI. ---- + + * src/utils/event_info.c: Major re-write of the papi_xml_event_info + program. + Remove event code numbers, as they are not stable + run-to-run + Add some Doxygen comments + Remove some wrong + assumptions that could cause potential buffer overflows + Improve + usage information + +2012-01-20 + + * src/components/lustre/: Rules.lustre, linux-lustre.c, + linux-lustre.h, + fake_proc/fs/lustre/llite/hpcdata-ffff81022a732800/read_ahead_stats, + fake_proc/fs/lustre/llite/hpcdata-ffff81022a732800/stats, + tests/Makefile, tests/lustre_basic.c: Finish the re-write of the + lustre component. + + It would be nice if someone with access to a machine with a + lustre filesystem could test this for us. + + * src/: papi_internal.c, components/lustre/linux-lustre.c: Update + the component initialization code so that it can handle a PAPI + ERROR return gracefully. Previously there was no way to indicate + initialization failure besides just setting num_native_events to + 0. + +2012-01-19 + + * src/components/lustre/: linux-lustre.c, linux-lustre.h: First + pass at cleaning up the lustre component. + + It should now properly report no events when no lustre + filesystems are available. + +2012-01-11 + + * src/papi_events.csv: Add AMD fam12h support to the events file. + Right now it is just an alias to the similar fam10h event list; + this can be split out if necessary once we find a tester with the + hardware. + + * src/libpfm4/: README, docs/man3/pfm_get_event_next.3, + docs/man3/pfm_get_pmu_info.3, include/perfmon/perf_event.h, + include/perfmon/pfmlib.h, lib/Makefile, lib/pfmlib_amd64.c, + lib/pfmlib_amd64_priv.h, lib/pfmlib_common.c, + lib/pfmlib_perf_event.c, lib/pfmlib_priv.h, + lib/events/intel_coreduo_events.h, lib/events/perf_events.h, + perf_examples/Makefile, perf_examples/perf_util.c, + perf_examples/perf_util.h, perf_examples/self.c, + perf_examples/task_smpl.c, perf_examples/x86/bts_smpl.c: Fix + "merge" conflicts with libpfm4 merge. + + * src/libpfm4/lib/: pfmlib_amd64_fam12h.c, + events/amd64_events_fam12h.h: Initial revision + + * src/papi_libpfm4_events.c: Properly use the pfm_get_event_next() + iterator to find next event. + + Without this, on AMD Fam10h some events are missed. + + Some events are still missed due to libpfm4 bug, this will be + fixed once I update the libpfm4 tree included with PAPI. + + Note, enumeration fixes like this often break things, so please + test if possible. + + * src/papi_events.csv: Update the coreduo (not core2) events. Most + notably the FP events were wrong. + + This, along with a forthcoming libpfm4 update, make all the + CTESTS pass on an old Yonah coreduo laptop I have. + +2012-01-05 + + * src/ctests/api.c: Make the api test actually test PAPI_flops() as + it claims to do, rather than PAPI_flips(). + + Patch thanks to: Emilio De Camargo Francesquini + + * src/papi_hl.c: Fix some copy-and-paste documentation remnants in + the papi_hl.c file, mostly where it said FLIPS where it meant + FLOPS. + +2012-01-04 + + * src/utils/native_avail.c: Update papi_native_avail to *not* print + the event codes, as these are not guaranteed to be stable from + run to run. + + Also fix up the formatting and print some component info too. + + Please try and let me know if you don't like the new output. + + * src/: configure, configure.in: Respect a FORCED option in + configure. + +2011-12-22 + + * src/Rules.pfm4_pe: Remove perfmon.h from MISCHDRS. + +2011-12-20 + + * src/: Rules.perfctr, Rules.perfctr-pfm, Rules.pfm, Rules.pfm4_pe, + Rules.pfm_pe, linux-lock.h, mb.h: Merry Christmas ARM users. + + This patch fixes the SMP ARM issues reported by Harald Servat. + Also, adds proper header dependency checking in the Rules files. + People, please when you add headers, please add them to the + dependency lines so everything gets rebuilt properly. + + New implementation of SMP locks are very pedantic, that is, they + are nost the fastest, but they do use atomics and avoid kernel + intervention. + + Passed on our 2 core ARM v7. All pthreads tests now pass, except + the ones that also fail in the single processor case usually due + to a missing event. + + Samples: + + mucci@panda:~/papi.head/src$ uname -a Linux panda 3.0.0 #2 SMP + Fri Jul 29 16:23:54 EDT 2011 armv7l GNU/Linux + + mucci@panda:~/papi.head/src$ hostname panda + + mucci@panda:~/papi.head/src$ cat /proc/cpuinfo Processor: ARMv7 + Processor rev 2 (v7l) processor: 0 BogoMIPS: 2007.19 + + processor: 1 BogoMIPS: 1965.18 + + Features: swp half thumb fastmult vfp edsp thumbee neon vfpv3 CPU + implementer: 0x41 CPU architecture: 7 CPU variant: 0x1 CPU part: + 0xc09 CPU revision: 2 + + Hardware: OMAP4 Panda board Revision: 0020 Serial: + 0000000000000000 + + mucci@panda:~/papi.head/src$ ./ctests/locks_pthreads Creating 2 + threads 10000 iterations took 13489 us. Running 44480 iterations + Expected: 88960 Received: 88960 locks_pthreads.c + PASSED + + mucci@panda:~/papi.head/src$ ./ctests/pthrtough Creating 2 + threads for 1000 iterations each of: register create_eventset + destroy_eventset unregister pthrtough.c + PASSED + + mucci@panda:~/papi.head/src$ ./ctests/pthrtough2 Creating 2000 + threads for 1 iterations each of: register create_eventset + destroy_eventset unregister Failed to create thread: 238 + Continuing test with 237 threads. pthrtough2.c + PASSED + + mucci@panda:~/papi.head/src$ ./ctests/thrspecific Thread + 0x40ae1470 started, specific data is at 0xbea9c6d4 Thread + 0x40021000 started, specific data is at 0xbea9c6c4 Thread + 0x4244d470 started, specific data is at 0xbea9c6c8 Thread + 0x4138d470 started, specific data is at 0xbea9c6d0 Thread + 0x41c4d470 started, specific data is at 0xbea9c6cc Entry 0, + Thread 0x41c4d470, Data Pointer 0xbea9c6cc, Value 4000000 Entry + 1, Thread 0x40021000, Data Pointer 0xbea9c6c4, Value 500000 Entry + 2, Thread 0x40ae1470, Data Pointer 0xbea9c6d4, Value 1000000 + Entry 3, Thread 0x4244d470, Data Pointer 0xbea9c6c8, Value + 8000000 Entry 4, Thread 0x4138d470, Data Pointer 0xbea9c6d0, + Value 2000000 thrspecific.c PASSED + + mucci@panda:~/papi.head/src$ ./ctests/krentel_pthreads + program_time = 6, threshold = 20000000, num_threads = 3 + + launched timer in thread 0 launched timer in thread 1 launched + timer in thread 3 launched timer in thread 2 [1] time = 1, count + = 7, iter = 5, rate = 1400.0/Kiter [2] time = 1, count = 7, iter + = 5, rate = 1400.0/Kiter [0] time = 1, count = 7, iter = 5, rate + = 1400.0/Kiter [3] time = 1, count = 7, iter = 5, rate = + 1400.0/Kiter [1] time = 2, count = 25, iter = 16, rate = + 1562.5/Kiter [0] time = 2, count = 25, iter = 16, rate = + 1562.5/Kiter [3] time = 2, count = 25, iter = 16, rate = + 1562.5/Kiter [2] time = 2, count = 25, iter = 16, rate = + 1562.5/Kiter [1] time = 3, count = 25, iter = 16, rate = + 1562.5/Kiter [2] time = 3, count = 25, iter = 16, rate = + 1562.5/Kiter [0] time = 3, count = 25, iter = 16, rate = + 1562.5/Kiter [3] time = 3, count = 25, iter = 16, rate = + 1562.5/Kiter [1] time = 4, count = 25, iter = 16, rate = + 1562.5/Kiter [0] time = 4, count = 25, iter = 16, rate = + 1562.5/Kiter [3] time = 4, count = 25, iter = 16, rate = + 1562.5/Kiter [2] time = 4, count = 25, iter = 16, rate = + 1562.5/Kiter [3] time = 5, count = 25, iter = 16, rate = + 1562.5/Kiter [0] time = 5, count = 25, iter = 16, rate = + 1562.5/Kiter [2] time = 5, count = 25, iter = 16, rate = + 1562.5/Kiter [1] time = 5, count = 26, iter = 17, rate = + 1529.4/Kiter [2] time = 6, count = 25, iter = 16, rate = + 1562.5/Kiter [0] time = 6, count = 27, iter = 17, rate = + 1588.2/Kiter done krentel_pthreads.c PASSED + +2011-12-15 + + * src/papi_libpfm_presets.c: Change PAPI_PERFMON_EVENT_FILE + environment variable name to PAPI_CSV_EVENT_FILE since it's not + just for perfmon anymore. + + * src/: configure, configure.in: Open mouth, insert foot; fix + perfctr configure by not testing a library we have not built yet. + +2011-12-14 + + * src/: configure, configure.in: Missed one more place where we + tested perfctr != "no" + + * src/: configure, configure.in: Fix a typo in the perfctr section; + it was causing a machine to default to perfctr when it had no + performance interface. ( a centos vm image with a 2.6.18 kernel + ) + + Also checks that we actually have perfctr if we specify + --with-perfctr. + +2011-12-08 + + * src/components/cuda/: Makefile.cuda.in, Rules.cuda, configure, + configure.in, linux-cuda.c, linux-cuda.h: Added auto-detection of + CUDA version to PAPI CUDA Component. Reason is, the interface has + changed between CUDA/CUPTI 4.0 and 4.1. PAPI now supports both + CUDA versions without any exposure to the users. Configure step + is unchanged and no additional knowledge of which CUDA version is + installed is required. + +2011-12-03 + + * src/components/appio/: CHANGES, README, Rules.appio, appio.c, + appio.h, tests/Makefile, tests/appio_list_events.c, + tests/appio_values_by_code.c, tests/appio_values_by_name.c: [no + log message] + +2011-11-25 + + * src/linux-timer.c: Fix compilation warning if you specify + --with-walltime=gettimeofday + + * src/linux-timer.c: Fix the build on Linux systems using mmtimer + + * src/linux-common.c: Update the linux MHz detection code to use + bogoMIPS when there is no MHz field available in /proc/cpuinfo. + + This gives roughly correct MHz on ARM, and the MIPS workaround + should also still work. + +2011-11-23 + + * src/components/net/linux-net.c: Fix compile errors in a debug + message. (pathname didn't exist but we are working on + NET_PROC_FILE) + +2011-11-22 + + * src/components/net/: linux-net.c, tests/net_values_by_code.c, + tests/net_values_by_name.c: Change the ping command in the net + tests to not use &> to redirect to NULL. + + This would work on a system with csh, but on systems with a bash + shell this runs ping in the background instead, so the test + finishes before ping can generate any packets. + + * src/components/net/linux-net.c: Fix slight bug in the net + component, where a memset() had the wrong arguments. This made + for weird results in the case where we start/stop quickly enough + that we return the initial data. + + * src/components/net/: CHANGES, Makefile.net.in, README, Rules.net, + configure, configure.in, linux-net.c, linux-net.h, + tests/Makefile, tests/net_list_events.c, + tests/net_values_by_code.c, tests/net_values_by_name.c: Replace + net component with updated version written by Jose Pedro + Oliveira + + * Dynamically detects the network interfaces + (i.e. the ones listed in /proc/net/dev) + + * No longer needs to fork/exec the external ifconfig command and + parse its output. It now reads the Linux kernel network + statistics directly from /proc/net/dev. + + * Each network interface now has 16 events instead of 13 + (all counters in /proc/net/dev). + + * Adds support for PAPI_event_name_to_code() + + * Adds a couple of small tests/examples + +2011-11-16 + + * doc/Doxyfile-everything: Fix the exclude libpfm/perfctr config. + +2011-11-10 + + * src/perf_events.c: Only scale when running != enabled. + + Now verified on ig, brutus and the malta + + * src/perf_events.c: Further tuneups for mpx'ing. + + Previous commit broke systems with valid return values from + perf_events for running & enabled. My attempt at scaling in long + long world caused an overflow which led to a negative number when + passed up the chain. + + Also consolidated types... best way to avoid this stuff is to + start as the type you are ending as. + + Now we use some better integer scaling...guaranteed within +-0.5% + of the actual scaled value of enabled / running. + + New results on brutus: multiplex1 + + case1: Does PAPI_multiplex_init() not break regular operation? + Added PAPI_TOT_CYC Added PAPI_FP_INS case1: PAPI_TOT_CYC + PAPI_FP_INS case1: 2739865106 600002876 + + case2: Does setmpx/add work? Added PAPI_TOT_CYC Added PAPI_FP_INS + case2: PAPI_TOT_CYC PAPI_FP_INS case2: 2739678237 + 600002258 + + case3: Does add/setmpx work? Added PAPI_TOT_CYC Added PAPI_FP_INS + case3: PAPI_TOT_CYC PAPI_FP_INS case3: 2739847832 + 600002298 + + case4: Does add/setmpx/add work? Added PAPI_TOT_CYC Added + PAPI_FP_INS case4: PAPI_TOT_CYC PAPI_FP_INS case4: + 2737832980 600013404 + + case5: Does setmpx/add/add/start/read work? Added PAPI_TOT_CYC + Added PAPI_FP_INS read @start counter[0]: 7106 read @stop + counter[0]: 2740387017 difference counter[0]: 2740379911 read + @start counter[1]: 0 read @stop counter[1]: 600017169 difference + counter[1]: 600017169 multiplex1.c + PASSED + +2011-11-09 + + * src/components/cuda/linux-cuda.c: For the CUDA Component, + PAPI_read() now accumulates event values. This has to be + explicitly done in PAPI because CUPTI automatically resets all + counter values to 0 after a read. (PAPI_start()/stop() continues + to reset the values to 0) + + * src/perf_events.c: Last of the multiplex fixes to perf events. + The root of all evil was this: + + counts[i] = ( uint64_t ) + ( ( double ) buffer[count_idx] * ( double ) + + buffer[get_total_time_enabled_idx( )] / + ( double ) + + buffer[get_total_time_running_idx( )] ) ; In addition to + improper casting to uints... (papi returns int64s), using + floating point arith is a no-no. Plus this resulted in divide by + zeros... + + Before: + + SUBSTRATE:perf_events.c:_papi_pe_read:1155:12218 read: fd: 3, + tid: 0, cpu: -1, buffer[0-2]: 0x6cba, 0x0, 0x0, ret: 24 + SUBSTRATE:perf_events.c:_papi_pe_read:1155:12218 read: fd: 4, + tid: 0, cpu: -1, buffer[0-2]: 0x23, 0x0, 0x0, ret: 24 + SUBSTRATE:perf_events.c:_papi_pe_read:1155:12218 read: fd: 3, + tid: 0, cpu: -1, buffer[0-2]: 0x6de72b5d, 0x8ae0fa80, 0x8ae0fa80, + ret: 24 SUBSTRATE:perf_events.c:_papi_pe_read:1155:12218 read: + fd: 4, tid: 0, cpu: -1, buffer[0-2]: 0x4c4b46b, 0x8ae0fa80, + 0x8ae0fa80, ret: 24 + + So kernel is good, but errors in multiplexed scaling. + + case5: Does setmpx/add/add/start/read work? Added PAPI_TOT_CYC + Added PAPI_FP_INS read @start counter[0]: 9223372034707292159 + read @stop counter[0]: 1843791732 difference counter[0]: + -9223372032863500427 multiplex1.c + FAILED Line # 389 + + With fix: + + SUBSTRATE:perf_events.c:_papi_pe_read:1151:12821 read: fd: 3, + tid: 0, cpu: -1, buffer[0-2]: 0x6782, 0x0, 0x0, ret: 24 + SUBSTRATE:perf_events.c:_papi_pe_read:1151:12821 read: fd: 4, + tid: 0, cpu: -1, buffer[0-2]: 0x0, 0x0, 0x0, ret: 24 + SUBSTRATE:perf_events.c:_papi_pe_read:1151:12821 read: fd: 3, + tid: 0, cpu: -1, buffer[0-2]: 0x6de725dc, 0x8ae0fa80, 0x8ae0fa80, + ret: 24 SUBSTRATE:perf_events.c:_papi_pe_read:1151:12821 read: + fd: 4, tid: 0, cpu: -1, buffer[0-2]: 0x4c4b400, 0x8ae0fa80, + 0x8ae0fa80, ret: 24 read @start counter[0]: 26498 read @stop + counter[0]: 1843865052 difference counter[0]: 1843838554 read + @start counter[1]: 0 read @stop counter[1]: 80000000 difference + counter[1]: 80000000 + SUBSTRATE:perf_events.c:_papi_pe_update_control_state:1288:12821 + Called with count == 0 + SUBSTRATE:papi_libpfm4_events.c:_papi_libpfm_shutdown:1178:12821 + shutdown multiplex1.c PASSED + + New code is vastly simpler and smaller and checks for bad kernel + behavior: + + int64_t tot_time_running = + papi_pe_buffer[get_total_time_running_idx( )]; + int64_t tot_time_enabled = + papi_pe_buffer[get_total_time_enabled_idx( )]; + #ifdef BRAINDEAD_MULTIPLEXING if (tot_time_enabled == 0) + tot_time_enabled = 1; if (tot_time_running == 0) + tot_time_running = 1; #else /* If we are convinced this + platform's kernel is fully operational, then this stuff will + never happen. If it does, then BRAINDEAD_MULTIPLEXING + needs to be enabled. */ if ((tot_time_running == 0) && + (papi_pe_buffer[count_idx])) { PAPIERROR("This platform + has a kernel bug in multiplexing, count is %lld (not 0), but time + running is 0.\n",papi_pe_buffer[count_idx]); return + PAPI_EBUG; } if ((tot_time_enabled == 0) && + (papi_pe_buffer[count_idx])) { PAPIERROR("This platform + has a kernel bug in multiplexing, count is %lld (not 0), but time + enabled is 0.\n",papi_pe_buffer[count_idx]); return PAPI_EBUG; + } #endif pe_ctl->counts[i] = + (papi_pe_buffer[count_idx] * tot_time_enabled) / + tot_time_running; + + Also, renamed all instances of 'buffer' to papi_pe_buffer because + buffer is a global variable on MIPS/Linux/libc. Yikes! (gdb) + whatis buffer type = struct utmp * + + * src/ctests/multiplex1.c: Made sure that PAPI_TOT_CYC is the first + event added to multiplexing event set. + + This will demonstrate the bug in perf_event multiplexing + arithmetic in case5 on MIPS and other perf_event subsystems that + likely have some breakage in the kernels handling of + multiplexing. The common bug is that the perf_event subsystem + does not fill in the second and third elements of the 24 byte + read that gets returned from the kernel. These values are + time_enabled and time_running. MIPS as of 3.0.3 just fills this + in after a HZ tick has happened. Workarounds are pretty simple in + the low level layer... + + A buggy output looks like this (3.0.3 MIPS/Linux Big Endian) + + -bash-4.1$ ./ctests/multiplex1 case1: Does PAPI_multiplex_init() + not break regular operation? Added PAPI_TOT_CYC Added PAPI_FP_INS + case1: PAPI_TOT_CYC PAPI_FP_INS case1: 1843775252 + 80000000 + + case2: Does setmpx/add work? Added PAPI_TOT_CYC Added PAPI_FP_INS + case2: PAPI_TOT_CYC PAPI_FP_INS case2: 1843773254 + 80000037 + + case3: Does add/setmpx work? Added PAPI_TOT_CYC Added PAPI_FP_INS + case3: PAPI_TOT_CYC PAPI_FP_INS case3: 1843772919 + 80000037 + + case4: Does add/setmpx/add work? Added PAPI_TOT_CYC Added + PAPI_FP_INS case4: PAPI_TOT_CYC PAPI_FP_INS case4: + 1843773959 80000037 + + case5: Does setmpx/add/add/start/read work? Added PAPI_TOT_CYC + Added PAPI_FP_INS read @start counter[0]: 9223372034707292159 + read @stop counter[0]: 1843784577 difference counter[0]: + -9223372032863507582 multiplex1.c + FAILED Line # 389 Error: Difference in start and stop resulted in + negative value! + +2011-11-08 + + * src/components/cuda/: linux-cuda.c, linux-cuda.h: Updated CUDA + component for CUPTI 4.1 (RC1). Note, SetCudaDevice() should now + work with the latest CUDA 4.1 version. + +2011-11-07 + + * src/components/coretemp/linux-coretemp.c: Update coretemp to + better handle sparse numbering of the inputs. + + * doc/Doxyfile-everything: Exclude the libpfm* and perfctr-* + directories from consideration when generating Doxygen docs. + + * src/: papi.h, components/acpi/linux-acpi.h, + components/coretemp_freebsd/coretemp_freebsd.c, + components/cuda/linux-cuda.h, + components/infiniband/linux-infiniband.h, + components/mx/linux-mx.h, components/net/linux-net.h: Place a + space in < your name here > to cleanup doxygen warnings. + + * src/perf_events.c: Only perf event systems that have FAST counter + reads and FAST hw timer access are x86... + + * src/linux-common.c: MIPS clock and Linux fixup code + + * src/components/example/example.c: A little more documentation on + which of the component vector function pointers are relevant. + + * src/papi_vector.c: Tested the dummy get_{real,virt}_{cyc,usec} + functions on zeus, they appear to work. + + * src/components/example/tests/example_multiple_components.c: + Another fix to properly skip the multiple component case if CPU + component not available. + + * src/components/example/tests/example_multiple_components.c: Skip + the test if no CPU component enabled, rather than fail. + +2011-11-04 + + * src/components/example/example.c: Free example_native_table with + papi_free, glibc didn't like it if we just called free. (we + allocate it with papi_calloc) + + * man/...: Version number bump. (since the pages are + quantifiably different from those released in 4.2.0 ) + + * doc/: Doxyfile, Doxyfile-everything, Doxyfile.utils: Bump version + number in the doxygen config files. + + * src/components/example/example.c: + _papi_example_shutdown_substrate does not have any arguments. + + * src/components/net/linux-net.c: Include ctype.h for isspace(). + + * release_procedure.txt: release_procedure now reflects the correct + version of doxygen to use. + + * src/buildbot_configure_with_components.sh: Do not always + configure with not cpu counters, allow this to be passed in. + Allows us to use one script for both types of builds we test. + + * delete_before_release.sh, + src/buildbot_configure_with_components.sh: Create a script for + buildbot to configure with several components. + + Buildbot runs all commandline arguments through a sanitization + before passing them to sh. Thus --with-configure="a b c" => + '--with-configure="a b c"' which is bad. + delete_before_release.sh has been instructed to remove this file. + + * man/...: Rebuild the manpages with doxygen 1.7.4 to + remove the 's at the end of sentances. + + The html output looks clean. + +2011-11-03 + + * src/: multiplex.c, papi.c: Fix some gcc-4.6 compile warnings + complaining that retval was being set but not used. + + * src/papi.c: Add some extra comments to the PAPI_num_cmp_hwctrs() + code that describe its limitations a bit better. + +2011-11-02 + + * src/: ctests/overflow_allcounters.c, testlib/test_utils.c: Add + lots of debugging to make results of overflow_allcounters test a + bit more clear. + + * src/components/coretemp/tests/coretemp_pretty.c: coretemp_pretty + wasn't printing the description for fan inputs. + + The result on an apple MacBook Pro (running Linux) now looks like + this: + + Trying all coretemp events Found coretemp component at cid 2 + hwmon0.temp1_input value: 33.50 degrees C, applesmc + module, label TB0T hwmon0.temp2_input value: 33.50 degrees C, + applesmc module, label TB1T hwmon0.temp3_input value: 32.00 + degrees C, applesmc module, label TB2T hwmon0.temp4_input value: + 0.00 degrees C, applesmc module, label TB3T hwmon0.temp5_input + value: 62.25 degrees C, applesmc module, label TC0D + hwmon0.temp6_input value: 54.25 degrees C, applesmc module, + label TC0F hwmon0.temp7_input value: 57.25 degrees C, applesmc + module, label TC0P hwmon0.temp8_input value: 69.00 degrees C, + applesmc module, label TG0D hwmon0.temp9_input value: 58.00 + degrees C, applesmc module, label TG0F hwmon0.temp10_input + value: 51.25 degrees C, applesmc module, label TG0H + hwmon0.temp11_input value: 58.25 degrees C, applesmc + module, label TG0P hwmon0.temp12_input value: 60.75 + degrees C, applesmc module, label TG0T hwmon0.temp13_input + value: 62.25 degrees C, applesmc module, label TN0D + hwmon0.temp14_input value: 59.25 degrees C, applesmc + module, label TN0P hwmon0.temp15_input value: 49.00 + degrees C, applesmc module, label TTF0 hwmon0.temp16_input + value: 54.00 degrees C, applesmc module, label Th2H + hwmon0.temp17_input value: 58.75 degrees C, applesmc + module, label Tm0P hwmon0.temp18_input value: 31.50 + degrees C, applesmc module, label Ts0P hwmon0.temp19_input + value: 44.25 degrees C, applesmc module, label Ts0S + hwmon0.fan1_input value: 1999 RPM, applesmc module, label Left + side hwmon0.fan2_input value: 2003 RPM, applesmc module, + label Right side coretemp_pretty.c PASSED + + * src/components/coretemp/: linux-coretemp.c, linux-coretemp.h, + tests/coretemp_pretty.c: Make the coretemp code a bit pickier + about which events it supports. Add descriptions to the events. + Also add support for Voltage (in*) events. + + On an amd14h machine I have access to, coretemp_pretty now + prints: + + Trying all coretemp events Found coretemp component at cid 2 + hwmon0.in1_input value: 1.31 V, it8721 module, label ? + hwmon0.in2_input value: 2.22 V, it8721 module, label ? + hwmon0.in3_input value: 3.34 V, it8721 module, label +3.3V + hwmon0.in4_input value: 1.02 V, it8721 module, label ? + hwmon0.in5_input value: 1.52 V, it8721 module, label ? + hwmon0.in6_input value: 1.13 V, it8721 module, label ? + hwmon0.in7_input value: 3.26 V, it8721 module, label 3VSB + hwmon0.in8_input value: 3.17 V, it8721 module, label Vbat + hwmon0.temp1_input value: 28.00 degrees C, it8721 module, label ? + hwmon0.temp2_input value: -128.00 degrees C, it8721 module, label + ? hwmon0.temp3_input value: -128.00 degrees C, it8721 module, + label ? hwmon0.fan1_input value: 0 RPM hwmon0.fan2_input value: + 1320 RPM hwmon1.temp1_input value: 33.00 degrees C, jc42 module, + label ? hwmon2.temp1_input value: 31.75 degrees C, jc42 module, + label ? hwmon3.temp1_input value: 53.00 degrees C, radeon module, + label ? hwmon4.temp1_input value: 53.12 degrees C, k10temp + module, label ? coretemp_pretty.c PASSED + + * src/components/coretemp/: linux-coretemp.c, + tests/coretemp_pretty.c: Cut and paste error slipped in to that + last commit. Fixes a build issue. + + * src/components/coretemp/: linux-coretemp.c, tests/Makefile, + tests/coretemp_pretty.c: Clean up coretemp with same cleanups + done in example component. + + Add a new test, "coretemp_pretty" that prints coretemp results in + a more user-friendly way. + + * man/:... Rebuild the man pages with a newer version of + doxygen. ( older versions of doxygen had a nasty bug in man + output. ) + + Also reworked the utilities documentation to remove pages for the + files. Thanks to Jose Pedre Oliveria for pointing this out. + + * src/components/example/tests/: Makefile, + example_multiple_components.c: Add a test that makes sure you can + have active EventSets on multiple components at the same time. + + * release_procedure.txt: Change PATH specification to include tcsh + syntax; other minor syntax corrections. + + * src/components/example/example.c: More cleanups and documentation + for the example component. + +2011-11-01 + + * src/components/example/example.c: Some more major overhaul of the + example component. A lot more documentation, plus make is behave + a lot more like a real component would. + + * doc/Doxyfile.utils: Turn off undocumented warnings for the utils. + doxygen run. + + * src/utils/: avail.c, command_line.c, cost.c, event_chooser.c, + multiplex_cost.c: Add spaces to the comments so doxygen doesn't + think is an xml tag. + +2011-10-31 + + * src/utils/: avail.c, clockres.c, command_line.c, component.c, + cost.c, decode.c, error_codes.c, event_chooser.c, mem_info.c, + multiplex_cost.c, native_avail.c: Remove the @file directive from + the doxygen comment blocks for the utilities. This cleans up the + generated man pages. ( we nolonger build *.c.1 ) + + * src/components/example/: example.c, tests/example_basic.c: + Clarify in the example component that ->reset only gets called if + an eventset is currently running. + + Extend the example_basic test to test PAPI_reset() + + * release_procedure.txt: Fix a maketarget typo. + + * release_procedure.txt: We now have a good version of doxygen + installed on most icl run machines. ( + /mnt/scratch/sw/doxygen-1.7.5.1 ) + + * doc/doxygen_procedure.txt: [no log message] + + * release_procedure.txt: Update release_procedure to inform how to + update the website documentation link. + +2011-10-28 + + * RELEASENOTES.txt: Correct the RELEASENOTES for some things I + missed when reviewing it. + + It's Offcore events that we don't support on + Nehalem/Westmere/Sandybridge. + + Also the power6 libpfm4 bug that was listed as an outstanding bug + was fixed a long time ago. + + * src/components/coretemp/linux-coretemp.c: Have coretemp set the + num_native_events field. + + * src/components/example/tests/example_basic.c: Update example test + to print num_native_events, to help debug issues with other + components not updating the value. + + * src/components/coretemp/: linux-coretemp.c, linux-coretemp.h: Fix + typo enent -> event Also remove residual LMSENSOR mentions from + the coretemp header. + + * src/papi_libpfm4_events.c: Fix two memory leak locations. + + The attached patch reduces the number of lost memory blocks + reported by valgrind from 234 to 39. It frees the memory + allocated by the 4 strdups and the calloc functions in + papi_libpfm4_events.c:allocate_native_event(). + + Patch by: Jos� Pedro Oliveira + + * src/components/cuda/tests/Makefile: The change to pass the PAPI + CC/CFLAGS to the component tests broke the nvidia test as it + wants CC to be nvcc. So update that Makefile to use nvcc + instead. + +2011-10-27 + + * src/components/example/tests/example_basic.c: Improve the + example_basic component test to be much more comprehensive. + + * src/components/example/: example.c, tests/HelloWorld.c, + tests/Makefile, tests/example_basic.c: Cleanup the example test. + Fix various mistakes in the comments as well as add better error + checking. + + Also rename the "HelloWorld" test to "example_basic" + + * src/components/coretemp/tests/Makefile: The coretemp_test target + was example_test due to cut-and-paste error. + + Patch from Jose Pedro Oliveira + + * src/Makefile.inc: Add a component_tests dependency so that the + component_tests are made during a make -j build + + * src/Makefile.inc: Make sure the component test makefiles get + passed the CC and CFLAGS definitions. + + * src/components/coretemp/: linux-coretemp.c, tests/Makefile, + tests/coretemp_basic.c: Fix up the coretemp component some more. + Make sure the enumerate function returns PAPI_ENOEVNT if no + events are available. + + Update the Makefile so it has proper dependencies. + + Update the test so it prints the first event available. (The + latter based on a patch from Jose Pedro Oliveira) + + * src/: solaris-ultra.c, ctests/all_native_events.c: The + solaris-ultra substrate was still broken. This is because recent + changes to component bind time explictly used the ->set_domain() + call, and this vector was not set up in solaris_ultra. + + Also made the all_native_events test report the returned error + value to aid in debugging problems like this in the future. diff --git a/ChangeLogP440.txt b/ChangeLogP440.txt new file mode 100644 index 0000000..6cd8b0e --- /dev/null +++ b/ChangeLogP440.txt @@ -0,0 +1,131 @@ +2012-04-17 + + * 8782daed cvs2cl.pl delete_before_release.sh gitlog2changelog.py...: Update + the release machinery for git. gitlog2changelog.py takes the output of git + log and parses it to something like a changelog. + + * 80ff04a9 doc/Doxyfile-html: Cover up an instance of doxygen using full + paths. Doxygen ( up to 1.8.0, the most recent at this writing ) would use + full paths in directory dependencies ignoring the use relative paths config + option. + + * c556dad1 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump the version + for the PAPI 4.4.0 release. + +2012-04-14 + + * 27174c0b src/components/bgpm/CNKunit/CVS/Entries + src/components/bgpm/CNKunit/CVS/Repository + src/components/bgpm/CNKunit/CVS/Root...: Removed CVS stuff from Q code. + + * 970a2d50 src/configure src/configure.in src/linux-bgq.c...: Removed + papi_events.csv parsing from Q code. (CVS stuff still needs to be taken care + of.) + +2012-04-13 + + * 853d6c74 src/libpfm-3.y/lib/intel_corei7_events.h + src/libpfm-3.y/lib/intel_wsm_events.h src/libpfm-3.y/lib/pfmlib_intel_nhm.c: + Add missing update to libpfm3 Somehow during all of the troubles we had with + importing libpfm3 into CVS, we lost some Nehalem/Westmere updates. Tested on + a Nehalem machine to make sure this doesn't break anything. + +2012-04-12 + + * 07e4fcd6 INSTALL.txt: Updated INSTALL notes for Q + + * 2a0f919e src/Makefile.in src/Makefile.inc src/components/README...: Added + missing files for Q merge. + + * 0b0f1863 src/Rules.bgpm src/components/bgpm/CNKunit/CVS/Entries + src/components/bgpm/CNKunit/CVS/Repository...: Added PAPI support for Blue + Gene/Q. + +2012-02-17 + + * 147a4969 src/perfctr-2.6.x/usr.lib/event_set_centaur.o + src/perfctr-2.6.x/usr.lib/event_set_p5.o + src/perfctr-2.6.x/usr.lib/event_set_p6.o: Remove a few binary files in + perfctr-2.6.x + +2012-02-23 + + * 955bd899 src/perfctr-2.6.x/usr.lib/event_set_centaur.os + src/perfctr-2.6.x/usr.lib/event_set_p5.os + src/perfctr-2.6.x/usr.lib/event_set_p6.os: Removes the last of the binary + files from perfctr2.6.x Some binary files were left out in the cold after a + mishap trying to configure perfctr for the build test. + +2012-02-17 + + * 5fe239c8 src/perfctr-2.6.x/CHANGES src/perfctr-2.6.x/INSTALL + src/perfctr-2.6.x/Makefile...: More cleanups from the migration, latest + version of libpfm-3.y perfctr-2.[6,7] Version numbers got really confused in + cvs and the git cvsimport didn't know that eg 1.1.1.28 > 1.1 ( see + perfctr-2.6.x/CHANGES revision 1.1.1.28.6.1 :~) + +2012-03-13 + + * e7173952 src/libpfm-3.y/examples_v2.x/multiplex.c + src/libpfm-3.y/examples_v2.x/pfmsetup.c + src/libpfm-3.y/examples_v2.x/rtop.c...: Fix some libpfm3 warnings. libpfm3 + is not maintained anymore, so applied these changes locally. libpfm3 is + compiled with -Werror so they broke the build with newer gcc even though they + are just warnings in example programs. + +2012-04-09 + + * 10528517 src/libpfm-3.y/Makefile src/libpfm-3.y/README + src/libpfm-3.y/docs/Makefile...: Copy over libpfm-3.y from cvs. libpfm3 was + another one of our skeletons in CVS. Thanks to Steve Kaufmann for keeping us + honest. + +2012-02-17 + + * ec8c879e src/aix.c src/components/coretemp/linux-coretemp.c + src/components/coretemp_freebsd/coretemp_freebsd.c...: The git conversion + reset all of the CVS $Id$ lines to just $Id$ Since we depend on the $Id$ + lines for the component names, I had to go back and fix all of them to be the + component names again. + +2012-03-09 + + * 71a2ae4f src/components/lmsensors/linux-lmsensors.c: Fix buffer overrun in + lmsensors component Conflicts: src/components/lmsensors/linux-lmsensors.c + + * ec0e1e9a src/libpfm4/config.mk + src/libpfm4/docs/man3/pfm_get_os_event_encoding.3 + src/libpfm4/examples/showevtinfo.c...: Update to current git libpfm4 snapshot + +2012-02-15 + + * 1312923e src/libpfm4/debian/changelog src/libpfm4/debian/control + src/libpfm4/debian/rules...: The git cvsimport didn't get the latest version + of the libpfm4 import. This should be the versions as were in cvs now. + +2012-02-24 + + * 81847628 src/papi_events.csv: Fix broken Pentium 4 Prescott support We + were missing the netbusrt_p declaration in papi_events.csv + +2012-03-01 + + * 917afc7f src/papi_internal.c: Add some locking in + _papi_hwi_shutdown_global_internal This caused a glibc double-free warning, + and was caught by the Valgrind helgrind tool in krentel_pthreads There are + some other potential locking issues in PAPI_shutdown, especially when debug + is enabled. + + * f85c092f src/papi.c: Fix possible race in _papi_hwi_gather_all_thrspec_data + The valgrind helgrind tool noticed this with the thrspecific test + +2012-03-09 + + * 912311ed src/multiplex.c src/papi_internal.c src/papi_libpfm4_events.c...: + Fix issue when using more than 31 multiplexed events on perf_event On + perf_event we were setting num_mpx_cntrs to 64. This broke, as the + MPX_EventSet struct only allocates room for PAPI_MPX_DEF_DEG events, which is + 32. This patch makes perf_event use a value of 32 for num_mpx_cntrs, + especially as 64 was arbitrarily chosen at some point (the actual value + perf_event can support is static, but I'm pretty sure it is higher than 64). + Conflicts: src/papi_libpfm4_events.c diff --git a/ChangeLogP500.txt b/ChangeLogP500.txt new file mode 100644 index 0000000..51a2aba --- /dev/null +++ b/ChangeLogP500.txt @@ -0,0 +1,2279 @@ +2012-08-08 + + * 4b4f87ff ChangeLogP5000.txt: Changelog for PAPI5 + + * 6f208c06 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version + numbers in prep for a 5.0 release. + + * c6fdbd11 release_procedure.txt: Update release_procedure.txt Change the + order of when we branch git, so that the main dev branch gets some of the + release related changes. + +2012-04-17 + + * 97d4687f ChangeLogP440.txt: Pickup the changelog from papi 4.4 This was + only included in the stable-4.4 branch. + +2012-08-23 + + * 628c2b6e src/buildbot_configure_with_components.sh: Take debug out of the + with several components build test config. When built with PAPI's memory + wrapper routines, the threaded stress tests will sometimes get into poor + performing situations. See trac ticket 148 for discussion. + http://icl.utk.edu/trac/papi/ticket/148 + +2012-08-22 + + * 46faae8e src/ctests/overflow2.c src/ctests/overflow_single_event.c + src/ctests/overflow_twoevents.c...: Move find_nonderived_event() from + overflow_twoevents to test_utils and call it from overflow2 and + overflow_single_event to insure that we're not trying to overflow on a + derived event. + + * 3e7d8455 src/ctests/zero_smp.c: Fix a memory leak reported on the aix + power7 machine. zero_smp.c did not unregister at the end of its thread + function. + + * 3ad5782f src/perf_events.c: perf_events: fix segfault if DEBUG is enabled + Was incorrectly using "i" as an index where it should be "0" in a debug + statement. + + +2012-08-21 + + * a3cadbdb src/ftests/accum.F src/ftests/avail.F src/ftests/case1.F...: Take + #2. Changing len_trim function in ftests to last_char. This time, I respect + 72 char line limit. + + * c9db8fbf src/ctests/overflow_force_software.c: overflow_force_software was + the only test that used a different hard_tolerance value (0.25) than the + other overflow tests (0.75). This caused trouble on Power7/AIX. Now we are + using the same hard_tolerance value in all overflow tests. + + * 70515343 src/ftests/accum.F src/ftests/avail.F src/ftests/case1.F...: + Changed name of function len_trim to last_char. + + * 95168d79 src/components/cuda/linux-cuda.c: Cleanup cuda shutdown code. + * The + shutdown_thread code cleaned out the whole component's state. This has been + split into shutdown_global for the whole component, and shutdown_thread is + left to cleanup some control state info. + + * 56284f81 src/ctests/multiplex1_pthreads.c: Fix memory leaks in pthread + multiplex tests. + + * aeead8b6 src/threads.c: Remove an outdated comment about + _papi_hwi_free_EventSet holding INTERNAL lock + + * e598647b src/perf_events.c: perf_events: fix issue where we dereference a + pointer before NULL check. Fix suggested by Will Cohen, based on a coverity + report. + + * 4e0ed976 src/ctests/calibrate.c: Modify warning message to eliminate the + word "error" Hopefully this will suppress it in buildbot outputs. + + * 50fbba18 src/ctests/api.c src/ftests/case2.F: Cleanup a few more warnings + from the PAPI_perror change. + + * 1f06bf28 src/ftests/case2.F: Missed an instance of perror in the fortran + code. + + * 93e6ae2c src/ftests/ftests_util.F: Fix warning in ftest_util.F + +2012-08-20 + + * 60c6029e src/perf_events.c: perf_events: Update multiplexing code It + * turns + out the PERF_EVENT_IOC_RESET ioctl resets the count but not the multiplexing + info. This means that when we fiddle with the events then reset them in + check_scheduability(), we are not really resetting things to zero. The + effect might be small, but since the new multiplex code by definition is + always scheduable, then let's skip the test if multiplexing. + + * 9079236c src/ctests/zero.c: Change error reporting so FLOPS > 100% above + theoretical FAIL and FLOPS > 25% above theoretical WARN. + +2012-08-18 + + * 980558af src/papi_internal.c: papi_internal: fix memory leak When I made + some changes a while back I forgot to free ESI->NativeBits properly. This + was causing memory leak warnings on buildbot. + +2012-08-17 + + * 83a14612 src/perf_events.c: perf_events: more cleanups and comments We + really need to go back and figure out in more detail what the + profile/sampling/overflow code is doing. + + * 7cafb941 src/perf_events.c: perf_events: more cleanups and comments + + * e9e39a4b src/perf_events.c: perf_events: disable kernel multiplexing + * before + 2.6.34 It turns out even our simple multiplexing won't work on kernels + before 2.6.34, so fall back to sw multiplex in that case. + + * 05801901 src/perf_events.c: perf_events: more cleanup and comments + + * 268e31d7 src/perf_events.c: perf_events: more cleanup and commenting + + * d62fc2bf src/perf_events.c: perf_events: more cleanup and comments + + * fb0081bc src/perf_events.c: perf_events: more cleanups and comments + + * a1142fc8 src/perf_events.c: perf_events: cleanup and comment the kernel + * bug + workarounds + + * b8560369 src/perf_events.c: perf_events: minor cleanups and new comments + + * 6c320bb2 src/perf_events.c: perf_events: fix some debug messages I forget + to test with --with-debug enabled + + * f7a3cccf src/perf_events.c: perf_events: enable new read_code This makes + the read code much simpler. It finishes the multiplexing changes. To avoid + complication, we no longer enable PERF_FORMAT_ID as reading that extra info + is unnecessary with the current implementation. This passes all the tests +on + a recent kernel, but on 2.6.32 there are still a few issues. + + * 15749cff src/ctests/all_events.c src/ctests/all_native_events.c: Fix + warning in all_events and all_native_events. In the perror semantic change, + several strings for use in the old interface were left. + +2012-08-16 + + * afdd25fa src/perf_events.c: perf_events: always enable kernel multiplexing + The new code should work on any kernel version. + + * 9f5e23ae src/perf_events.c: Rewrite multiplex support. Drop support for + the former "partitioned" multiplexing, as we could never use it. Instead +use + the simple/braindead model. This still needs more work, as sometimes reads + are failing. + + * cdd29909 src/ftests/strtest.F: Fix strtest.F ftest It was still making + some assumptions about PAPI_perror() writing to a string rather than +directly + to standard error. + + * 565f60b3 src/papi_internal.c: Missing code to set num_error_chunks to 0 + The new _papi_hwi_cleanup_errors() function was not resetting + num_error_chunks to 0, leading to a segfault in the fmultiplex1 test. + +2012-08-02 + + * bb85bafd src/genpapifdef.c src/papi.c src/papi_common_strings.h...: Remove + usage of _papi_hwi_err. Move PAPI over to storing errors in a runtime list. + * Functions to add/lookup errors. * Generate the list of PAPI_E* errors at + library_init time. * genpapifdef pulled the values for the PAPI_* error + return codes from the _papi_hwi_err structure at configure time. Since this + is now built at run-time, I added the appropriate values to genpapifdef's + builting describe_t table. See : _papi_hwi_publish_error + _papi_hwi_init_errors For usage hints. + +2012-08-10 + + * e27af085 src/perf_events.c: perf_event: rename BRAINDEAD_MULTIPLEXING It + is now "simple_multiplexing" and is a variable not an #ifdef This is needed + before perf_event multiplexing can be sorted out. It's unclear if it + actually works anyway. + + * 7f8e8c58 src/perf_events.c: perf_event: remove context "cookie" field It + was a bit of overkill, we just need an initialized field. Also revamp how + context and control are initialized. + + * 8cb8ac6d src/perf_events.c: perf_event: move all event specific info to + * the + control state previously half was in the context state and half in the + control state perf_event has a strange architecture with each event being + created having its own fd, which is context wide. In PAPI though we usually + only have one eventset (control state) active at once, so there's no need to + have the context be aware of this. + +2012-08-09 + + * 8d7782cb src/perf_events.c: perf_event: rename evt_t to perf_event_info_t + This just makes the code easier to follow. + + * 349de05c src/perf_events.c: perf_event: remove the superfluous + per_event_info_t structure + +2012-08-08 + + * da8ad0a2 src/ctests/all_native_events.c src/ctests/get_event_component.c + src/utils/native_avail.c: Fix warnings about PAPI_enum_cmp_event() return +not + being checked Reported by coverity checker via Will Cohen Harmless + warnings, and now the checker will likely complain about the value being + checked but ignored. + + * b4719888 src/papi_user_events.c: Fix unused value in papi_user_events.c + Reported by Coverity checker by Will Cohen + + * 6a8f255c src/utils/event_chooser.c: remove unused + * PAPI_get_component_info() + call in event_chooser Reported by Will Cohen from coverity checker + +2012-08-06 + + * 62cda478 src/genpapifdef.c src/papi_common_strings.h + src/papi_internal.c...: Remove usage of _papi_hwi_err. genpapifdef pulled + the values for the PAPI_* error return codes from the _papi_hwi_err structure + at configure time. Since this is now built at run-time, I added the + appropriate values to genpapifdef's builting describe_t table. + +2012-08-02 + + * d11259f3 src/papi.c src/papi_internal.c src/papi_internal.h...: Move over + to generating the list of PAPI errors at library_init time. + + * 097ffc44 src/papi_internal.c: Functions to add/lookup errors. + +2012-08-07 + + * 2530533f src/papi_events.csv: tests/zero fails on Power7 due to PAPI_FP_INS + Error of 50%. Preset definition has been redefined and test now passes. + + * 8e17836f src/components/appio/Rules.appio src/components/appio/appio.c + src/components/appio/appio.h...: We now intercept recv(). The support for + recv() requires dynamic linkage. For static linkage, recv is not intercepted. + +2012-08-06 + + * 8b1eb84c src/perf_events.c: perf_events: some whitespace cleanup and extra + comments + + * f10edba6 src/perf_events.c: perf_events: MAX_READ was no longer being used, + remove it + + * 08c06ed1 src/perf_events.c: perf_event event_id is actually 64-bit, so make + our copy match + + * a33e8d9c src/perf_events.c: Rename context_t pe_context_t in perf_events.c + Makes the code a bit clearer and matches how other components name things. + + * 96ce9dcd src/perf_events.c: Rename control_state_t pe_control_state_t This + makes the code a bit easier to follow and matches how other components name + things. + +2012-08-03 + + * 4c5dce7f src/ctests/zero.c: Beef up error reporting. + + * 83b5d28a src/ctests/cycle_ratio.c: Have the cycle_ratio test skip if + PAPI_REF_CYC event is not defined. + +2012-08-02 + + * 25b1ba41 src/ctests/cycle_ratio.c: Removed all TESTS_QUIET blocks. They + aren't needed because tests_quiet() overloads printf. We should probably + remove TEST_QUIET blocks in ALL tests at some point for code clarity… + + * 8777d7d4 src/ctests/zero.c: Fixed error reporting. The error computation + was inside a TESTS_QUIET block and wasn't getting executed when run quietly. + Thus this test always passed on buildbot, even when it didn't. + + * 006fe8e9 src/ctests/Makefile: Fix typo in cycle_ratio make line. + + * 88e6d6a4 src/aix.c src/aix.h: Setting number of multiplex counters back to + 32 for AIX. Before it was set equal to number of max HW counters. This caused + ctests/sdsc-mpx to fail. + + * ab78deda src/papi_events.csv: ctests/calibrate on Power7/AIX failed with a + 50% error all the way through. Updated the preset FP_OPS with a more + appropriate definition. Now the calibrate errors range from 0.0002 to 0.0011% + for double and single precision + + * fadce32f src/ctests/calibrate.c: Modify calibrate test in two ways: 1. add + a -f flag to suppress failures and allow test to run to completion; 2. change + error detection to allow warnings above MAX_WARN and failures above MAX_FAIL. + Currently set to 10% and 80% respectively. This allows speculative over + counting to pass with warning rather than fail completely. + + * 8a39ac9d src/papi_events.csv: LST_INS for Power7 was defined from 3 native + events that cannot be counted together at the same time. Caused + ctests/all_events to fail. Updated the preset with a more appropriate + definition. + + * cdc16e5d src/papi_events.csv: L1_DCA for Power7 was defined from 3 native + events that cannot be counted together at the same time. That caused + ctests/tenth to fail. Updated the preset with a more appropriate definition. + +2012-08-01 + + * 2bf44d13 src/papi_internal.c src/perf_events.c: icc does not like + arithmetic on void pointers. Added cast to unsigned char* when arithmetic was + being performed on void pointers in papi_internal and perf_events. + + * 7825ec14 src/ctests/api.c src/ctests/attach2.c src/ctests/attach3.c...: + Modify tests that FAIL if PAPI_FP_OPS or PAPI_FP_INS not implemented. Now + they will warn and continue. This is specifically to accommodate the + brain-dead IvyBridge implementation. + + * fd70a015 src/testlib/test_utils.c: Re-writing of test_utils introduced new + bugs that caused ctests/tenth to fail. test_events struct lists the same + event twice (MASK_L1_DCW), hence PAPI_add_event() fails because it's forced + to add the same preset twice. + + * 74ece3a0 src/run_tests.sh: run_tests.sh was clobbering $EXCLUDE variable if + $CUDA was defined. Changed to add entries from run_tests_exclude_cuda.txt to + $EXCLUDE which should already contain entries from run_tests_exclude.txt + instead of replacing the entries already contained. + + * 11ed2364 src/libpfm4/config.mk: Added check in libpfm4/config.mk to check + if using icc. If so, the -Wno-unused-parameter flag will no longer be used + because icc does not provide it and provides no alternative. + + * dedf73f6 src/papi_user_events.c: fget() returns an int it should be treated + as an int The coverity scan flagged that the int return by fget was stored + in a char. The main concern with this is the EOF that fget() could return is + -1. Do not want to mess up that value by typecasting to char and then back to + int. + + * c4fcbe7e src/ctests/kufrin.c: Check return values of PAPI_get_cmp_opt() and + calloc A coverity scan showed that PAPI_get_cmp_opt() could potentially + return a negative number. Also it is good form to check the return value of + calloc to ensure it is a non-null pointer. + + * e89d6ffa src/testlib/test_utils.c: Clean up test_print_event_header() + There were a couple warnings flagged by coverity on + test_print_events_header(). The function now checks for error conditions + flagged by PAPI_get_cmp_opt() and also frees memory allocated by a calloc() + function. + + * c81d8b60 src/threads.h: Eliminate deadcode from threads.h If + HAVE_THREAD_LOCAL_STORAGE is defined, a portion of the + _papi_hwi_lookup_thread() will never be executed. This patch make either one + section or the other section of code be compiled. This will eliminate a + coverity scan warning about unreachable code. + + * f70f3f56 src/ctests/all_native_events.c: Eliminate unused variable in + ctests/all_native_events.c Coverity identified a variable that was set but + never used in all_native_events.c. This patch removes the unused variable to + eliminate that warning. + + * a9f29840 src/components/appio/appio.c: A couple places in appio.c used the + FD_SET() without initializing the variable. Coverity scan pointed out this + issue. + + * 9e535ae2 src/components/rapl/linux-rapl.c: A Coverity scan pointed out that + read_msr() could potentially use an invalid value of fd for pread(). Need to + check the value of fd before using it. + + * 7b55c675 src/components/rapl/linux-rapl.c: The arrays used for + initialization were hard coded to 1024 packages. Want to avoid hard coding + that so the day when machines with 1025 packages are available is a + non-event. Also changed the initialization code to avoid having the + initialization be O((number of packages)^2) in time complexity. + +2012-07-27 + + * 3703995a src/papi_internal.c: Fix the component name predending code. When + presented with a NULL component .short_name, the code did the wrong thing. + + * 5258db8b src/components/cuda/linux-cuda.c: Fix a warning in cuda. + +2012-07-26 + + * ddd6f193 src/ctests/Makefile src/ctests/cycle_ratio.c: Add a test to + compute nominal CPU MHz from real cycles and use PAPI_TOT_CYC and + PAPI_REF_CYC to compute effective MHz. Warns if PAPI_REF_CYC is zero, which + can happen on kernels < ~3.3. + + * fab5e9ef src/papiStdEventDefs.h src/papi_common_strings.h + src/papi_events.csv: Add PAPI_REF_CYC preset event. Define it as + UNHALTED_REFERENCE_CYCLES for all Intel platforms on which this native event + is supported. + +2012-07-25 + + * 8b9b6bef src/papi_events.csv: Modify SandyBridge and IvyBridge tables: + SandyBridge FP_OPS only counts scalars; SP_OPS and DP_OPS now count + correctly, including SSE and AVX. IvyBridge can't count FP at all; + adjustments made to eliminate event differences with SandyBridge. + +2012-07-26 + + * 5b11c982 src/components/cuda/linux-cuda.c: Fix the cuda component. The + cuda component prepended CUDA. to all its event names, this is no longer the + case. + +2012-07-25 + + * db5b0857 src/papi_events.csv: Added 2 new preset definitions for BGQ. Note, + these presets use the new feature where a generic event mask together with an + ORed opcode string is used. This won't work until the new Q driver is + released (currently scheduled for end of August). + +2012-07-24 + + * af7cd721 src/components/coretemp/linux-coretemp.c + src/components/coretemp/tests/coretemp_pretty.c + src/components/cuda/linux-cuda.c...: Enforce all our components to use the + same naming. We setteled on :'s as inter-event seperators. This also touches + a few of the components' tests, we changed the name field so their searches + needed help. + +2012-07-23 + + * 57aeb9d4 src/papi_internal.c: Prepend component .short_name to each event + name. Use ::: as a sep. + +2012-07-24 + + * 762e9584 src/ctests/multiplex2.c src/sw_multiplex.c: Fix multiplex2 test + It complained if it tried to add a multiplex event and PAPI properly told it + that it couldn't. + + * 531870f1 src/papi_internal.c: Add sanity check at component init time + Looks for num_cntrs being larger than num_mpx_cntrs which doesn't make much + sense. + + * 53ad0259 src/extras.c src/genpapifdef.c src/papi.c...: Rename + PAPI_MAX_HWCTRS to PAPI_EVENTS_IN_DERIVED_EVENT Hopefully this will make + things a little less confusing. + + * 700af24b src/papi_internal.c: Change EventInfoArrayLength to always return + num_mpx_cntrs Things should be consistently using num_mpx_cntrs rather than + num_cntrs now. Issue reported by Steve Kaufmann + + * d1570bec src/sw_multiplex.c: Fix sw_multiplex bug when max SW counters is + less than max HW counters this was causing kufrin and max_multiplex to fail + + * f47f5d6a src/aix.c src/components/appio/appio.c + src/components/bgpm/CNKunit/linux-CNKunit.c...: Remove PAPI_MPX_DEF_DEG It + was not well documented and being used in confused ways all over the code. + Now there is a different define PAPI_MAX_SW_MPX_EVENTS used by the software + multiplex code. All other components have had the value replaced with just + the maximum number of counters. If a component can handle its own + (non-software) multiplexing it is up to it to set .num_mpx_cntrs to a value + that's different from .num_cntrs + + * 0d83f5db src/papi_internal.c src/papi_internal.h: Split NativeBits off of + NativeInfoArray in EventSet previously we were doing some crazy thing where + we allocated both at once and then split them afterward. The new code is + easier to follow. + + * 98f2ecbd src/papi_internal.c: Clean up EventSet creation Sort out which + sizes are used for allocating which structures. + + * e1024579 src/Makefile.inc src/multiplex.c src/multiplex.h...: Rename the + multiplex files to be sw_multiplex That way it's more clear the stuff + included only relates to software multiplexing, not generic multiplexing. + + * a6adc7ff src/multiplex.h src/papi_internal.c src/papi_internal.h: Move some + sw-multiplex specific terms out of papi_internal.h and into multiplex.h + +2012-07-23 + + * 1ddbe117 src/components/README: Added note that lmsensors component + requires lmsensors version >=3.0.0 + + * 94676869 src/components/appio/appio.c + src/components/appio/tests/appio_test_pthreads.c: proper checking of return + codes in response to tests using coverity + + * ea958b18 src/components/appio/tests/appio_list_events.c + src/components/appio/tests/appio_values_by_code.c: As component name in table + has been changed from appio.c to appio, we now use appio in the tests. + +2012-07-20 + + * f212cc34 src/components/appio/appio.c + src/components/coretemp/linux-coretemp.c + src/components/coretemp_freebsd/coretemp_freebsd.c...: Add .short_name + entries to each component. + + * 1e755836 src/papi_libpfm4_events.c src/perf_events.c: Fix use-after-free + bug in perf_events.c This turned up in the ctests/flops test, and Valgrind + made it easy to track down. + + * 4580ed1d src/perf_events.c: Update perf_event.c rdpmc support Use the + libpfm4 definition for mmap rather than our custom one, now that libpfm4 has + been updated + + * 47558b2c src/libpfm4/examples/showevtinfo.c + src/libpfm4/include/perfmon/perf_event.h + src/libpfm4/lib/pfmlib_intel_nhm_unc.c...: Import current libpfm4 from + libpfm4 git It has some minor uncore fixes plus the header changes needed + for rdpmc. + +2012-07-17 + + * 65d4c06c src/linux-common.c: Reorder statements to ensure the fclose() are + performed Coverity pointed out that it was possible for resources to be + leaked in linux-common.c if the fscanf() encountered error. This reordering + of the statements ensures that the fclose() calls are done regards of the + results of the fscanf() functions. + +2012-07-18 + + * 7bf071ff src/papi_user_events.c: Ensure that load_user_event_table() frees + files and memory on error A Coverity scan showed that an error condition in + load_user_event_table() function would exit the the function without closing + the table file or freeing allocated memory. This patch addresses those + issues. + +2012-07-17 + + * 1ba52e35 src/components/stealtime/linux-stealtime.c: Ensure that + read_stealtime() closes the file in case of an error condition A Coverity + scan showed that an error condition could cause read_stealtime() to exit + without closing the file. This patch ensures that the file is closed + regardless of success or failure. + +2012-07-18 + + * f37f22e5 src/papi_libpfm4_events.c: Fix warning in papi_libpfm4_events.c + We were setting a value but never using it. + + * 8e8401bc src/testlib/test_utils.c: Remove unused variable in test_utils.c + Most of the machines in buildbot were complaining about this. + + * 133ce6a9 src/linux-timer.c: Add missing papi_vector.h include to + linux-timer.c This was breaking on PPC Linux + +2012-07-17 + + * 6fd3cedd src/perf_events.c: Fix perf_events.c warnings reported by ICC + + * 21c8f932 src/perfctr-x86.c: Fix perfctr-x86 build with debug enabled + + * 08f76743 src/configure src/configure.in src/linux-bgq.c: Attempt to fix + linux-bgq compilation error. It turns out BGQ uses the standard + linux-context.h header + + * 43457f4f src/linux-bgq.c: Made check for opcodes more robust. + + * d58116b4 src/perf_events.c: More cleanups of perf_events.c file + + * 409438b7 src/freebsd-context.h src/freebsd.c src/freebsd.h: Fix FreeBSD + compile warnings Similar to the perfctr issues. + + * 1e6dfb02 src/aix.c src/aix.h: Fix AIX build warnings They were similar in + cause to the perfctr issues. + + * 3d0b5785 src/Rules.perfmon2 src/components/appio/appio.c + src/components/bgpm/CNKunit/linux-CNKunit.h...: Remove papi_vector.h include + from papi_internal.h There were some semi-circular dependencies that came up + with the context split changes. The easiest way to fix things for perfctr + was just move papi_vector.h out to be included explicitly. This touches a + lot of files because a lot of files include papi_internal.h This should also + fix the perfctr and perfmon2 builds that were broken yesterday. + +2012-07-16 + + * a7a14a5b src/ctests/zero.c src/testlib/test_utils.c: Modify zero test to + warm up processor before measuring events, and report timing errors as signed + deviations. Modify test_utils add_two_events code to check for errors after + adding nominally valid events. This is a more rigorous test than just + counting available registers. + + * de0860d3 src/perf_events.c: Remove perf_events.h module header It's no + longer needed, everything important is merged into the perf_events.c file. + + * 22975f14 src/perf_events.c: Remove perf_event SYNCHRONIZED_RESET code This + was never defined and never used, just remove the code. + + * 48750b8c src/perf_events.c: Remove papi_pe_allocate_registers On + perf_event this code wasn't really doing anything useful, as + update_control_state would end up re-doing any possible tests we could want + to do here. + + * 1775566f src/Makefile.in src/Makefile.inc src/Rules.pfm4_pe...: Remove + "include CPUCOMPONENT" from papi_internal.h This was the last major + dependency on CPU component in common PAPI code. It was mostly necessary for + the ucontext definitions when trying to get the instruction pointer when + doing sampling. This change creates new OS-specific header files that handle + the ucontext related code, and has papi_internal.h include that instead. + + * 969ce035 src/Rules.pfm4_pe src/Rules.pfm_pe src/configure...: Make + perf_event libpfm4 only perf_event libpfm3 support is not really needed + anymore and supporting it was cluttering up the perf_event component. + +2012-07-13 + + * adad1d2a src/perf_events.c: Add init time error messages to perf_event + component + + * 827ccc07 src/perf_events.c: Add perf_event rdpmc / fast_real_timer + detection Currently we need a custom copy of struct perf_event_mmap_page + because the version included with libpfm4 doesn't define the fields we need + yet. + + * 4f82fe94 src/perf_events.c: Read in paranoid info on perf_events This + indicates whether a regular user can read CPU-specific or system-wide + measurements. + + * 03080450 src/perf_events.c: Add proper perf_event detection Using the + official /proc/sys/kernel/perf_event_paranoid file + + * 6e71d3f7 src/linux-bgq.c: Updated BGQ opcode stuff; cleaned up code. + +2012-07-11 + + * 3114d3dc src/multiplex.c src/papi_internal.c src/perf_events.c: Minor + documentation improvements Plus fixes some typos + +2012-07-09 + + * b60c0f0c src/perf_events.c: Minor cleanups to perf_events.c + + * 075278a0 src/aix.c src/freebsd.c src/linux-bgp.c...: Change return value + for .allocate_registers For some reason it returned 1 on success and 0 on + failure. Change it so you return PAPI_OK on success and a PAPI error on + failure, to better match all of the other component vectors. + + * 29d9e62b src/testlib/test_utils.c: Fixed the print_header routine to report + an error message if counters are not found, instead of a negative counter + number. Tested by forcing the return value negative; not by running on a Mac, + where the error first appeared. + + * 74257334 src/ctests/Makefile src/ctests/remove_events.c: Add remove_events + test This just makes sure EventSets still work after an event has been + removed. This is probably covered by other more elaborate tests, but I + needed a simple test to make sure I wasn't breaking anything. + + * 1372714f src/papi.c src/papi_internal.c src/papi_internal.h: Clean up, + rename, and comment _papi_hwi_remap_event_position I've found this section + of code to be confusing for a long time. I think I finally have it mostly + figured out. I've renamed it _papi_hwi_map_events_to_native() to better + describe what it does. The issue is that the native event list in an + EventSet can change at various times. At event add, event remove, and + somewhat unexpectedly, whenever ->update_control_state is called (a component + can re-arrange native events if it wants, to handle conflicts, etc.) Once + the native event list has been changed, _papi_hwi_map_events_to_native() has + to be called to make sure the events all map to the proper native_event + again. Currently we have the _papi_hwi_map_events_to_native() calls in odd + places. It seems to cover all possible needed locations, but analyzing that + we do takes a lot of analysis... + + * f1b837d8 src/papi.c: Remove unused variable in papi.c + + * 541bcf44 src/papi_internal.h: Update commens in papi_internal.h Some of + the EventSetInfo comments were out of date. + + * e6587847 src/papi.c src/papi_internal.c src/papi_internal.h: Remove unused + paramater from _papi_hwi_remap_event_position The mysterious + _papi_hwi_remap_event_position function had a "thisindex" field that was + ignored. This will possibly speed PAPI_start() time as it was running a loop + over num_native_events on _papi_hwi_remap_event_position even though each + call did the same thing since the value being passed was ignored. + + * 3ad3d14b src/papi_internal.c: Clean up and comment add_native_events in + papi_internal.c I'm chasing some weird perf_events behavior with the + papi_event_chooser. The add_native_events code is very hard to understand, + working on commenting it more. + + * 4e5e7664 src/utils/event_chooser.c: Fix coverity warning in + papi_event_chooser + + * 666249a8 src/jni/EventSet.java src/jni/FlipInfo.java + src/jni/FlopInfo.java...: RIP Java. Java PAPI wrappers have not been + supported for years (2005?). They are being removed to declutter the source. + + * e18561fc src/papi_preset.c: Update cmpinfo->num_preset_events properly + This value wasn't being set if we were reading the presets directly from the + CSV file. + + * 290ab7c3 src/utils/component.c: Have papi_component_avail report counter + and event info + + * 7c421b9c src/testlib/test_utils.c src/utils/native_avail.c: Remove counter + number from the testlib header. The header was only reporting number of + counters for the CPU component, even though the header is printed for many + utils and the CPU component might not even be involved. This could be a bit + confusing, so remove it. + + * 26432359 src/darwin-common.c src/darwin-memory.c: Improve OSX support This + properly detects CPU information now. You can get results like this: + Available native events and hardware information. + + - PAPI Version : 4.9.0.0 Vendor string and code : GenuineIntel + (1) Model string and code : Intel(R) Core(TM) i5-2435M CPU @ 2.40GHz (42) + CPU Revision : 7.000000 CPUID Info : Family: 6 + Model: 42 Stepping: 7 CPU Max Megahertz : 2400 CPU Min Megahertz + : 2400 CPUs per Node : 0 Total CPUs : 4 Running in + a VM : no Number Hardware Counters : -4 Max Multiplex Counters : + -4 + + - + +2012-07-08 + + * 845d9ecb src/Makefile.inc src/configure src/configure.in...: Add Mac OSX + support This is enough that things compile and simple utilities run. No CPU + perf counter support. + +2012-07-06 + + * ff6f9ab4 src/linux-bgq.c: missed to delete a debug output. + +2012-04-17 + + * 12e9a11a RELEASENOTES.txt: Release notes for the 4.4 release. + +2012-07-06 + + * ac2eac56 src/papi.c src/papi.h: Add a PAPI_disable_component_by_name entry + point. + + * 8c490849 src/components/coretemp_freebsd/coretemp_freebsd.c src/freebsd.c: + Fix FreeBSD to work I'm not sure how it ever worked in the past. With these + changes I can at least do a papi_component_avail and a papi_native_avail and + get sane results + + * 108b5ce6 src/freebsd.c src/freebsd.h src/freebsd/map-atom.c...: Fix FreeBSD + build some of the recent changes broke the FreeBSD build + + * 40a60f0a src/linux-bgq.c src/linux-bgq.h: Added BGQ's opcode and generic + event functionality to PAPI. For BGQ there are multiple ways to define + presets. The naive way is to derive from multiple events. This eats up + multiple counters and we lose sample capability as well as overflow + capability. On the other side, some events come with multiple InstrGroup + derived in the field. If that's the case we can use a generic event and + opcodes to filter multiple groups in a single counter. This is not working + properly yet due to a known error in BGPM. Bgpm_AddEvent() does not work + properly when multiple generic events are added to an EventSet. The BGPM + folks have been made aware of this issue, they confirmed the error, and they + are currently working on a fix. + + * 6f72b70f src/papi_events_table.sh: Make this script robust enough to handle + any line ending, including CR (Mac), CRLF (Windows), and LF (Unix). It + appears that google mail now automagically converts attached files to CRLF + format. + + * 765ed0d2 src/papi_internal.c: Fix a type warning in the UE code. + + * 94bc1b15 src/MACROS: Remove the MACROS file it held out of date info and + hasn't been touched since 2004 + + * d19e73ba src/ctests/Makefile src/ctests/clockcore.c + src/testlib/Makefile...: Move the clockcore.c file from ctests to testlib + it's common code used by multiple tests, including some in the utils + directory also add a function definition to fix a build-time warning + + * 1101a6aa src/aix-lock.h src/aix.h src/configure...: Make papi_lock.h + changes for non Linux architectures + +2012-07-05 + + * 3b82b03d src/Makefile.in src/Makefile.inc src/aix.c...: Make the PAPI locks + be tied to OS, not to CPU There is not a papi_lock.h file that when included + gets the proper lock include for the OS. This fixes a lot of previous build + hacks where a CPU component was needed in order for locks to work. + + * 0632ef42 src/threads.c: Fix spurious init_thread call in threads.c + threads.c was calling init_thread() on all components, even ones that were + disabled Fix it to honor the disable bit, as well as for shutdown_thread(). + This was causing perfctr disable code to not work. + + * 19d9de7f src/Makefile.in src/Makefile.inc src/Rules.pfm4_pe...: Replace + SUBSTRATE with CPUCOMPONENT in build This was mostly a configure/build + change but it also cleaned up some cases where we were including SUBSTRATE + where we didn't have to. + + * 829780db src/solaris-common.c src/solaris-common.h + src/solaris-niagara2.c...: Move some common solaris code to solaris-common + + * 681ef027 src/configure src/configure.in src/solaris-memory.c...: Merge + solaris-memory.c and solaris-niagara2-memory.c + + * bbd41743 src/solaris-ultra/get_tick.S src/solaris.h: Remove + solaris-ultra/get_tick.S Nothing was using it. + + * dc3b6920 src/papi_sys_headers.h src/solaris.h: Remove papi_sys_headers.h + Solaris was the only thing including it, and it wasn't really using it. + + * 7ccfa9df src/Makefile.in src/Makefile.inc src/configure...: Move move OS + specific code into the new OSFILESSRC Linux in particular was using MISC for + this. + + * 6f16c0c5 src/configure: Re-run autoconf to pickup the substrate=>component + change. + + * cfff1ede src/Makefile.in src/Makefile.inc src/configure...: Remove + MEMSUBSTR In reality, what we want instead of a Memory Substrate is an idea + of the OS-specific common code that includes the memory substrate. This + change adds OSFILESSRC and OSFILESOBJ to handle this case in configure + + * ca4729e6 src/configure.in: Separate out MEMSUBSTR and make it per-OS + + * 3148cba5 src/Matlab/PAPI_Matlab.dsp src/ctests/calibrate.c + src/ctests/flops.c...: RIP Windows, remove the windows support code. Windows + has not been activly supported since the transition to Component PAPI (4.0) + This cleans up the code-base. + +2012-07-03 + + * a366adf7 src/papi.c src/utils/error_codes.c: Change PAPI_strerror and + PAPI_perror to behave more like thir POSIX namesakes. PAPI_error_descr is + made redundant and removed as a result. + +2012-07-05 + + * 7df46f81 src/Rules.pfm src/aix.c + src/components/coretemp/linux-coretemp.c...: Move uses of PAPI_ESBSTR to + PAPI_ECMP I left PAPI_ESBSTR defined too for backward compatability. Also + some of the changes update PAPI_ESBSTR to be a more relevant error code, it + one is available. + +2012-07-03 + + * fdb348ad src/components/coretemp_freebsd/coretemp_freebsd.c + src/components/example/example.c src/components/net/linux-net.c...: A few + more substrate removals + + * 791747c1 src/cpus.c src/papi.h src/perf_events.c...: Fix bugs introduced by + substrate -> component change Fix some stupid compile bugs that I missed. + + * 79b01a47 src/aix.c src/components/appio/appio.c + src/components/bgpm/CNKunit/linux-CNKunit.c...: More substrate -> component + changes This changes the vectors .init_substrate -> .init_component + .shutdown_substrate -> .shutdown_substrate .init -> .init_thread .shutdown -> + .shutdown_thread hopefully this will make the code clearer. + + * 02a10d71 src/Makefile.inc src/aix.c src/cpus.c...: Rename "substrate" to + "component" This first pass only re-names things in comments. + +2012-07-02 + + * c4bbff1c src/papi.c src/papi.h: Minor documentation fixes Found when + writing up the PAPI 5.0 changes document + +2012-06-30 + + * f9cb7346 src/components/vmware/vmware.c: Fix vmware component apparently I + forgot to test the build with the vmguestlib support disabled. + +2012-06-22 + + * 599040d1 src/components/coretemp/linux-coretemp.c + src/components/rapl/linux-rapl.c + src/components/stealtime/linux-stealtime.c...: Fix libpfm4 ntv_event_to_info + event_info_t on other components This was actually a widespead problem due + to cut-and-paste. + + * 2b51b439 src/papi_libpfm4_events.c: Properly fix libpfm4 ntv_event_to_info + event_info_t event value The previous fix was subtly wrong. This is the + proper fix, which is to do nothing inside of papi_libpfm4_events.c because + papi_internal.c does the right thing for us and we were overwriting that with + the wrong value. + + * a4f576bf src/ctests/overflow_allcounters.c src/testlib/papi_test.h + src/testlib/test_utils.c: Clean up overflow_allcounters code While tracking + down a previous issue I also cleaned up the overflow_allcounters test code to + use some of the new interfaces. + + * 6903e053 src/papi_libpfm4_events.c: Fix libpfm4 ntv_event_to_info + event_info_t event value The recently added libpfm4 ntv_event_to_info + function was not properly oring PAPI_NATIVE_MASK to the event value in the + event_info_t struct. That means if you tried to use that event value to add + an event it would fail. The overflow_allcounters test broke because of this. + + * 420c3d11 src/ctests/Makefile src/ctests/disable_component.c src/papi.c...: + Add PAPI_get_component_index() and PAPI_disable_component() + PAPI_get_component_index() will return a component index if you give it the + name of a component to match. This saves you having to iterate the entire + component list looking. PAPI_disable_component() will manually mark a + component as disabled. It has to be run before PAPI_library_init() is called. + + * 11946525 src/aix.c src/components/cuda/linux-cuda.c + src/components/example/example.c...: Standardize component names to not end + in .c We were being inconsistent; the time to make them all be the same is + now before 5.0 gets out. + +2012-06-21 + + * 274e1ad8 src/components/vmware/tests/Makefile: Fix cut-and-paste error in + the vmware component Makefile + + * 85d6438d src/utils/event_chooser.c: Update papi_event_chooser to work with + components Now you can specify events from components and it will tell you + all the other events on that component that can run with it. Previously the + utility was limited to the CPU component (0) only. + + * 3c2fcc83 src/papi_libpfm3_events.c src/papi_libpfm4_events.c + src/perf_events.c: Hook up .ntv_code_to_info on perf_event + + * 36e864b3 src/papi_libpfm4_events.c src/papi_libpfm_events.h + src/perf_events.c: Enable support for showing extended umasks on perf_event + With this change, papi_native_avail now shows event umasks such as :u, :k, + :c, :e, and :i. (user, kernel, cmask, edge-trigger, invert) Thes are boolean + or integer events. They were supported by previous PAPI but they were never + enumerated. + + * 8f3e305e src/components/coretemp/linux-coretemp.c: Fix cut-and-paste error + in linux-coretemp.c that could lead to wrong size being copied + + * 0eedd562 src/libpfm4/lib/events/intel_atom_events.h + src/libpfm4/lib/events/intel_core_events.h + src/libpfm4/lib/events/intel_coreduo_events.h...: Import most recent libpfm4 + git This fixes an issue where there can be confusion between :i and :i=1 + type events. It also has initial support for Uncore, though you need a + specially patched kernel and PAPI does not support it yet. + + * 2f86ec78 src/components/appio/tests/Makefile + src/components/appio/tests/appio_test_blocking.c + .../appio/tests/appio_test_fread_fwrite.c...: - Fixed tests verbosity by + using TESTS_QUIET macro - Fixed Makefile to only include necessary tests for + automatic builds (skip blocking tests that read from stdin) + + * 6936b955 src/components/appio/README src/components/appio/appio.c + src/components/appio/appio.h...: Added polling of read/write descriptor to + check which operations would block. + + * 48cacccf src/papi.h: Add back PAPI_COMPONENT_INDEX() for backward + compatability It turns out some people were using this for cases other than + enumeration. The proper way to do things now is to use + PAPI_get_event_component() which is what this PAPI_COMPONENT_INDEX() now maps + to. + + * d1ed12b7 src/ctests/Makefile src/ctests/get_event_component.c + src/papi.c...: Add PAPI_get_event_component() This function returns the + component an event belongs to. It also adds a test to test this + functionality. + +2012-06-20 + + * ffccf633 src/papi.h: Add component_type field to .cmp_info The idea is + we'll specify CPU, I/O, GPU, hardware, etc. + + * 9998eecc src/components/lmsensors/Rules.lmsensors: Another lmsensors build + fix + + * caa94d64 src/components/lmsensors/linux-lmsensors.c: Update lmsensors + component to actually compile. I finally found a machine with lmsensors + installed. + + * fbcde325 src/components/lmsensors/linux-lmsensors.c + src/components/lmsensors/linux-lmsensors.h: Update lmsensor component Unlike + the other components it hadn't been updated to PAPI 5 standards. Also, it + was wrongly de-allocating all state in "_shutdown" rather than + "_shutdown_substrate" which was causing double-frees during tests. + + * 0d3c0ae2 src/papi_internal.c: Add some extra debugging to + _papi_hwi_get_native_event_info + + * 5961c03d src/aix.c src/components/nvml/linux-nvml.c + src/ctests/subinfo.c...: Remove cntr_groups from .cmp_info This information + is better exposed by enumeration. + + * 2b4193fd src/utils/event_chooser.c: Cleanup and comment event_chooser code + + * 7f9fab2b src/ctests/all_native_events.c: Cleanup and add comments to + all_native_events.c + + * a245b502 src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/freebsd.c...: Remove profile_ear from .cmp_info The CPU components + should handle this internally. + + * bca07f3c src/papi.c: Add comments to the PAPI_sprofil code. + + * b1e2090c src/papi.c: Minor papi.c cleanups Fix some minor cosmetic things, + including a typo in a comment. + + * 8f3aef4a src/ctests/subinfo.c src/papi.h: Remove opcode_match_width field + from .cmp_info This should be exposed via enumeration and not by a field in + the generic cmp_info structure. + + * 047af629 src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/papi.h...: Remove cntr_OPCM_events field from .cmp_info This should be + exposed via enumeration and not by a field in the generic cmp_info structure. + + * 3f1f9e10 src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/papi.h...: Remove cntr_DEAR_events field from .cmp_info This should be + exposed via enumeration and not by a field in the generic cmp_info structure. + + * 962c642a src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/papi.h...: Remove cntr_IEAR_events field from .cmp_info This should be + exposed via enumeration and not by a field in the generic cmp_info structure. + + * 5aa7eac1 src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/papi.h...: Remove instr_address_range from .cmp_info This feature should + be deteced via enumeration, not via a flag in the generic .cmp_info + structure. + + * 1bf68d5d src/components/nvml/linux-nvml.c src/ctests/subinfo.c + src/papi.h...: Remove data_address_range field from .cmp_info The proper way + to detect this feature is via enumeration. + +2012-06-19 + + * 90037307 src/linux-context.h: Change Linux from using "struct siginfo" to + "siginfo_t" This conforms to POSIX, and fixes newer Fedora where struct + siginfo is no longer supported. This can in theory break on older setups + (possibly kernel 2.4). If that happens, we need to somehow detect this using + autoconf. + +2012-06-18 + + * ad48b4fa src/Rules.perfctr-pfm: Fix the perfctr-pfm build; for buildbot, + mostly. Have the perfctr-pfm build only build libpfm, like the perfevents + builds. The icc build was choking on warnings (-Werror => errors) in the + example programs with libpfm, this is not something we depend upon. + +2012-06-17 + + * 358b14f9 src/papi_events.csv: Update BGQ presets + + * cf26fc87 src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/L2unit/linux-L2unit.c...: Update bgpm components + according to the papi5 changes + + * a7b08a91 src/configure src/configure.in src/linux-bgq.c: Merging the BG/Q + stuff from stable_4.2 to PAPI 5 did break it. It's corrected now; also + predefined events are now working.) + +2012-06-15 + + * 2d5a4205 src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/L2unit/linux-L2unit.c src/configure...: Merging the BG/Q + stuff from stable_4.2 to PAPI 5 did break it. It's corrected now (almost); + predefined events are not working yet.) + + * 1b034920 src/papi.c: Re-enable PAPI_event_name_to_code() optimization In + PAPI_event_name_to_code() there was a commented-out optimization where we + would check if an event name begins with "PAPI_" before searching the entire + preset list for an event name. The comment says we had to disable this due + to "user events", but a check shows that was introduced in e7bd768850ecf90 + and that the "user events" it means is not the current support, but the + now-removed PAPI_set_event_info() function where you could change the names + of presets on the fly (even to something not starting with PAPI_). Since we + don't support that anymore, we can re-enable the optimization. + +2012-06-14 + + * 9a26b43d src/papi_internal.c src/papi_internal.h src/papi_preset.c: Remove + the 16-component limit This turned out to be easier than I thought it would + be. Now determining which component an event is in is a two step process. + Before, the code shifted and masked to find the component from bits 26-30. + Now, _papi_hwi_component_index() is used. There's a new native event table + which maps all native events (which are allocated incrementally at first use + starting with 0x4000000) to two values, a component number and an "internal" + event number. + +2012-06-13 + + * d5c50353 src/papi_internal.c: Fix for the PAPI_COMPONENT_MASK change I + missed two cases in papi_internal.c This was causing the overflow_allcounters + test to fail + + * 46fd84ce src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/CNKunit/linux-CNKunit.h + src/components/bgpm/IOunit/linux-IOunit.c...: Updating the Q substrate + according to the PAPI 5 changes + + * 05a8dcbf src/components/appio/appio.c + src/components/appio/tests/appio_list_events.c + src/components/appio/tests/appio_values_by_code.c...: First steps of removing + 16-component limit This change removes PAPI_COMPONENT_INDEX(), + PAPI_COMPONENT_MASK and PAPI_COMPONENT_AND_MASK. It adds the new functions + _papi_hwi_component_index() _papi_hwi_native_to_eventcode() + _papi_hwi_eventcode_to_native() By replacing all of the former macros with + the equivelant of the latter functions, it allows all of the future + 16-component limit changes to be made in the functions. Components now + receive as events a plain 32-bit value as their internal native event; the + high bits are not set. This may break some external components. This change + should not break things, but a lot of testing is needed. + + * af4cbb86 src/run_tests_exclude.txt: Exclude iozone helper scripts from + run_tests. run_tests.sh looks for executible files under components/*/tests + Some of the plotting scripts in appio/iozone were getting picked up. + +2012-06-12 + + * c10c7ccb src/configure src/configure.in: Configure does not work on BGQ due + to missing subcomp feature. It worked for stable-4.2 but got lost in current + git origin. + + * d9a58148 src/aix.c src/ctests/hwinfo.c src/ctests/overflow.c...: Update + hw_info_t CPU frequency reporting. Previously PAPI reported "float mhz" and + "int clock_mhz". In theory the first was the current CPU speed, and the + latter was the resolution of the cycle counter. In practice they were both + set to the same value (on Linux, read from /proc/cpuinfo) and not very useful + when DVFS was enabled, as the value reported was usually lower than the + actual frequency running once CPU started being used. This change adds two + new values "cpu_max_mhz" and "cpu_min_mhz" which are read from + /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq and + /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq if they are available, + and falls back to /proc/cpuinfo otherwise. All of the tests were updated to + use cpu_max_mhz. The old mhz and clock_mhz values are left for compatability + reasons (and set to cpu_max_mhz) but are currently unused otherwise. + +2012-06-11 + + * 0f124891 src/papi_events.csv: Initial PAPI Ivy Bridge Support for now try + to re-use the sandy bridge event presets + + * a1f46077 src/libpfm4/docs/man3/libpfm_intel_ivb.3 + src/libpfm4/include/perfmon/err.h + src/libpfm4/lib/events/intel_ivb_events.h...: Import libpfm4 git snapshot + This adds IvyBridge support + + * 3bb983cc src/libpfm-3.y/examples_v2.x/self_smpl_multi.c: Fix a libpfm3 + example program for icc, local fix because libpfm3 is deprecated. icc does + have more enjoyable warnings than gcc, : error 186: pointless comparison of + unsigned integer with zero on this: unsigned int foo; ... if ( foo < 0 ) + +2012-06-06 + + * d28adccf src/papi_user_events.c: The user events code had a call to exit, + this was bad... + +2012-06-04 + + * 6bf43022 src/testlib/test_utils.c: Further the hack for testing for perf SW + events. Events like + + - | perf::CPU-CLOCK + | | PERF_COUNT_SW_CPU_CLOCK + | + + - were passing the check, now we also check the event_info_struct.long_descr + field for PERF_COUNT_SW.... + + * fa4b1a28 src/components/nvml/linux-nvml.c: Cleanup nvml code a little. A + few print statements were left over from debugging. Also check errors from + nvml and cuda pciinfo functions, disabling the component in a few more cases. + +2012-06-01 + + * da144a94 src/components/nvml/Makefile.nvml.in src/components/nvml/README + src/components/nvml/Rules.nvml...: Rewrite and merge of the nVidia Management + library component. This component attempts to expose all supported + 'performance counters' on each card cuda knows about at runtime. Much like + the cuda component reads happen on the card you're executing on at PAPI_read + time. The test included is a copy of the cuda helloworld test, but it + attempts to start/stop the event on each gpgpu. If you select an event that + is not supported on the card you're running on we should fail gracefully but + this has not been tested. + +2012-05-23 + + * b2d414dc src/components/stealtime/linux-stealtime.c: At units to stealtime + component Added the function but forgot to add a function vector for it. + + * ce9d4500 src/components/stealtime/linux-stealtime.c: Add units to stealtime + Properly report that the units are in micro seconds. + + * 149948c8 src/components/rapl/linux-rapl.c: Minor cleanup of RAPL code + missing "void" paramter in init_substrate function + + * 6a7e22fa src/components/vmware/vmware.c: More vmware component fixes. This + makes the component thread-safe. Also makes it fail more gracefully if the + guestlib SDK is installed but does not support our hypervisor (for example, + if we are running under VM Workstation). Still need to test on ESX. + + * 072d6473 src/components/appio/tests/appio_test_select.c: added code to + intercept and time select() calls. + +2012-05-22 + + * 12b6d0d7 src/components/vmware/vmware.c: Some more minor fixes to VMware + component Try to handle things better if VMguest SDK not working + + * 6e015bc5 src/components/vmware/Rules.vmware src/components/vmware/vmware.c: + More vmware component fixups Now works with the events from the VMguest SDK + library + + * 5fc0f646 src/components/vmware/vmware.c: More cleanup of vmware component + The pseudo-performance counters work again. Now they behave in accumulate + mode, like all other PAPI counters. + + * f72b0967 src/components/vmware/tests/vmware_basic.c: Make vmware test a bit + more complete + + * 070e5481 src/components/vmware/tests/Makefile + src/components/vmware/tests/vmware_basic.c: Add a test for the vmware + component + + * 7cf62498 src/components/vmware/Makefile.vmware.in + src/components/vmware/Rules.vmware src/components/vmware/configure...: Clean + up the vmware component. bring it up to date with other components. make it + possible to build it without the vmguest library being installed + + * b32ae1ae src/components/stealtime/Rules.stealtime + src/components/stealtime/linux-stealtime.c + src/components/stealtime/tests/Makefile...: Add a stealtime component When + running in a VM, this provides information on how much time was "stolen" by + the hypervisor due to the VM being disabled. This info is gathered from + column 8 of /proc/stat This currently only works on KVM. + + * 9e95b480 src/components/appio/tests/appio_test_blocking.c: Use a + non-blocking select to determine which reads and writes would block + +2012-05-19 + + * f60d991f src/components/appio/README src/components/appio/appio.c + src/components/appio/tests/appio_test_read_write.c...: Interception of + close() implemented. This allows us to correctly determine the number of + currently open descriptors. + +2012-05-17 + + * 7cd8b5a3 src/libpfm4/.gitignore src/libpfm4/config.mk + src/libpfm4/lib/Makefile...: Update libpfm4 to current git tree + + * ebffdb7e src/components/rapl/tests/rapl_overflow.c: Skip rapl_overflow test + if RAPL not available + + * 98d21ef3 src/components/example/example.c src/components/rapl/linux-rapl.c: + Fix some component warnings. + + * 0447f373 src/configure src/configure.in src/linux-generic.h: Make build not + stall if PAPI_EVENTS_CSV not set This is some fallout from the FreeBSD + changes. PAPI_EVENTS_CSV could not be set, which would make the event + creation script hang forever. Also catch various fallthroughs in the code + where SUBSTR wasn't being set, which is how the above problem can happen. + + * ef484c00 src/linux-timer.h: Fix typo in linux-timer.h + +2012-04-14 + + * 7c3385f4 src/components/bgpm/CNKunit/CVS/Entries + src/components/bgpm/CNKunit/CVS/Repository + src/components/bgpm/CNKunit/CVS/Root...: Removed CVS stuff from Q code. + + * 2cf4aeb2 src/configure src/configure.in src/linux-bgq.c...: Removed + papi_events.csv parsing from Q code. (CVS stuff still needs to be taken care + of.) + +2012-04-12 + + * 153c2bb1 INSTALL.txt: Updated INSTALL notes for Q + +2012-05-17 + + * ff6a43fb src/Makefile.in src/Makefile.inc src/components/README...: Added + missing files for Q merge. Conflicts: src/configure src/configure.in + src/freq.c + +2012-04-12 + + * 0e142630 src/Rules.bgpm src/components/bgpm/CNKunit/CVS/Entries + src/components/bgpm/CNKunit/CVS/Repository...: Added PAPI support for Blue + Gene/Q. + +2012-05-14 + + * ad7e3fa0 src/components/rapl/linux-rapl.c: Properly accumulate RAPL results + Previously it was resetting the counts on read, instead of continuing to + count as per other PAPI events. + + * c79e3018 src/components/rapl/tests/rapl_overflow.c: Fix some warnings in + rapl_overflow test + + * 731afd1a src/components/rapl/tests/Makefile + src/components/rapl/tests/rapl_overflow.c: Add rapl_overflow test This test + sees if we can measure RAPL in conjunction with overflow CPU performance + events. + + * b0e201bb src/components/rapl/utils/Makefile + src/components/rapl/utils/rapl_plot.c: Remove derived "uncore" values from + rapl tool They weren't really measuring uncore, but were just TOTAL - PP0. + It was causing some confusion. + +2012-05-09 + + * 547e9379 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump the version + number to 4.9.0.0 Read 4.9 as pre-5.0 master was at version number 4.2.1, + this was archaic... Sorry for the confusion Tushar, master is the correct + branch for the latest development code. + + * 133e3d67 src/configure src/configure.in: Fix perfctr build In the FreeBSD + changes I removed the CPU determination by reading /proc/cpuinfo as that was + prone to failure and non-portable. This broke perfctr as it was doing a huge + CPU name lookup to determine if it was on an x86 system or not. This change + fixes that. + +2012-05-08 + + * 42b21d67 src/papi_libpfm4_events.c: Fix PAPI event enumeration inside of + VMware VMware disables the availability of architectural events when + virtualized PMU is not available. libpfm4 was checkign this when enumerating + events, and we would end up in the situation where ix86arch was marked active + but 0 events were available. We didn't check for this error condition and + thus end up thoroughly confused. + +2012-05-07 + + * fd79a584 src/freebsd.c: Fix event enumeration on FreeBSD It was passing + PAPI_OK in all cases, causing papi_native_avail to try to do things like + report groups even when groups weren't available. + + * 53732c2e src/freebsd.c: Add Virtual Machine detection support to FreeBSD + again, support for this on x86 is OS Neutral + + * 7b4d7c96 src/configure src/configure.in src/freebsd-memory.c...: Add + x86_cacheinfo support to FreeBSD The x86 cache and memory info is + OS-independent, so add support for it to FreeBSD. + + * 91033df6 src/Makefile.in src/Makefile.inc src/configure...: Re-enable + predefined events on FreeBSD + + * 36f6dc1b src/freebsd.c src/freebsd/map.c src/freebsd/map.h: Modify FreeBSD + to use _papi_load_preset_table + + * 45651746 src/freebsd.c: Cleanup the freebsd code a bit. + + * e1554ed8 src/configure: re-run autoconf for updated configure + + * 1deb2f5d src/Makefile.inc: Make sure a proper dependency for + papi_events_table.h exists Our Makefile code that builds a shared library is + way broken; it will fail to rebuild in many cases where the static library + properly detects thing. + + * 28e28006 src/configure.in: Make papi_events_table.h build normally, not by + configure. + + * 9a66dfa5 src/configure.in: Another place papi_events_table.sh is called + + * 12e4a934 src/Makefile.inc src/papi_events_table.sh: Make + papi_events_table.sh take a command line argument This way we can use it on + any .csv file, not just papi_events.csv + + * 7018528f src/freebsd/memory.c: Remove unused freebsd/memory.c file + + * 819e5826 src/freebsd_events.csv: Make freebsd_events.csv a valid PAPI event + file + + * 9cc4a468 src/freebsd.c src/freebsd/map-atom.c src/freebsd/map-core.c...: + Fix FreeBSD build on head. This temporarily disables preset events. There + are also a few other minor fixes. + +2012-05-01 + + * ab36c0a2 src/Makefile.inc src/configure src/configure.in: Update build + system for FreeBSD + + * 2b61d8b7 src/freebsd.c src/freebsd.h: Fix various compiler warnings on + FreeBSD + + * 2c0bcc84 src/freebsd.c: Enable new Westmere events on FreeBSD + + * b0499663 src/freebsd/map-i7.c src/freebsd/map-i7.h + src/freebsd/map-westmere.c...: Add Westmere event support for FreeBSD + + * e54cabc6 src/ctests/inherit.c: Fix the inherit ctest to compile on FreeBSD + + * d9dbdd31 src/components/appio/appio.c: - change in appio component + (appio.c): removed reference to .ntv_bits_to_info as it doesn't exist in the + PAPI component interface. + +2012-04-27 + + * 5d661b2d src/Rules.pfm src/Rules.pfm_pe: Add the libpfm -Wno-override-init + bandaid to the other rules files. In + b33331b66137668155c02e52c98a7e389fad402e we test if gcc -Wextra complains + about some structure initialization that libpfm does. This was incoperated + into Rules.pfm4_pe only. Jim Galarowicz noticed the other Rules files didn't + have it. + + * 4349b6fd src/Rules.pfm4_pe src/Rules.pfm_pe: Cleanup the perf events Rules + files. Steve Kaufmann reported that CONFIG_PFMLIB_OLD_PFMV2 is only used for + libpfm3 builds targeting old versions of perfmon2. + +2012-04-26 + + * 8a7fef68 src/mb.h: Add memory barries for ia64 + +2012-04-24 + + * 9af4dd4a src/libpfm4/README src/libpfm4/config.mk + src/libpfm4/include/perfmon/perf_event.h...: Import libpfm4 git snapshot + This brings libpfm4 up to 9ffc45e048661a29c2a8ba4bfede78d3feb828f4 The + important change is support for Intel Atom Cedarview. + +2012-04-20 + + * fac6aec0 src/linux-bgp-memory.c src/linux-bgp.c: Some BG/P cleanups. + Removed a lot of dead code, noticed when looking for any potential BG/P + issues. + + * 977709f6 src/linux-bgp-preset-events.c src/linux-bgp.c: Fix PAPI compile on + BG/P Thanks to Harald Servat + +2012-04-19 + + * 5207799e release_procedure.txt: Modified release_procedure.txt to push + tags. + +2012-04-18 + + * b248ae80 doc/Makefile: Have clean remove the doxygen error file. + + * 1d4f75a3 doc/Doxyfile-man1 doc/Doxyfile-man3: Fix an error in the Doxygen + config files. Doxygen includes things with @INCLUDE not @include. The html + file had this, the man page files did not... + +2012-04-17 + + * 979cda20 cvs2cl.pl delete_before_release.sh gitlog2changelog.py...: Update + the release machinery for git. gitlog2changelog.py takes the output of git + log and parses it to something like a changelog. + + * 67bdd45f doc/Doxyfile-html: Cover up an instance of doxygen using full + paths. Doxygen ( up to 1.8.0, the most recent at this writing ) would use + full paths in directory dependencies ignoring the use relative paths config + option. + +2012-04-13 + + * c38eb0b7 src/libpfm-3.y/lib/intel_corei7_events.h + src/libpfm-3.y/lib/intel_wsm_events.h src/libpfm-3.y/lib/pfmlib_intel_nhm.c: + Add missing update to libpfm3 Somehow during all of the troubles we had with + importing libpfm3 into CVS, we lost some Nehalem/Westmere updates. Tested on + a Nehalem machine to make sure this doesn't break anything. + + * 193d8d06 src/papi_libpfm3_events.c: Fix max_multiplex case on + perf_event/libpfm3 num_mpx_cntrs was being set to 512 even though the real + maximum is 32, causing a buffer overflow and segfault. + +2012-04-12 + + * f1f7fb5b src/threads.h: Fix minor typo in a comment + + * 0373957d src/linux-timer.c: Fix potential fd leak Noticed by coverity + checker. + + * 71727e38 src/ctests/max_multiplex.c: Improve max_multiplex ctest on + perfmon2, this test was failing because the maximum number of multiplexed + counters was much more than the available counters we could test with. This + change modifies the test to not fail in this case. + +2012-04-11 + + * fdbdac9f src/perfmon.c: Fix the perfmon substrate. It was missing a + _papi_libpfm_init() call, which meant the number of events was being left at + 0. + +2012-04-09 + + * 2a44df97 src/libpfm-3.y/examples_v2.x/multiplex.c + src/libpfm-3.y/examples_v2.x/pfmsetup.c + src/libpfm-3.y/examples_v2.x/rtop.c...: Catch a few libpfm-3.y files up to + libpfm-3.10. More skeletons keep falling out of the cvs closet. This is just + what diff -q -r catches. + +2012-04-04 + + * 0e05da68 src/components/rapl/utils/Makefile + src/components/rapl/utils/README src/components/rapl/utils/rapl_plot.c: Add + the rapl_plot utility to the RAPL component. This utility uses PAPI to + periodicly poll the RAPL counters and generate average power results suitable + for plotting. There's been a lot of interest in this utility so it's + probably useful to include it with the RAPL component. + + * 2daa03ac src/papi_internal.c: Check if a component is disabled at init + time. This change modifies the code so that at PAPI_library_init() time we + check the component disable field, and we don't call the init routines for + components the user has disabled. This allows code like the following to + happen _before_ PAPI_library_init(): numcmp = PAPI_num_components(); + for(cid=0; cidname,"cuda")) { cmpinfo->disabled=1; + strncpy(cmpinfo->disabled_reason,"Disabled by user",PAPI_MAX_STR_LEN); } } + We might want to add a specific PAPI_disable_component(int cid) call of maybe + even a PAPI_disable_component(char *name) as the above code causes compiler + warnings since cmpinfo is returned as a const pointer. This all works + because currently PAPI currently statically allocates all of the components + at compile time, so we can view and modify the cmp_info structure before + PAPI_library_init() is called. + + * 3fd2b21e src/components/appio/README src/components/appio/appio.c + src/components/appio/appio.h...: Added support to count reads that are + interrupted or would block. + +2012-04-03 + + * dd3a192f release_procedure.txt: Change chmod flags for doxygen stuff from + 755 to 775 to allow group write permissions. + +2012-03-30 + + * deac54cc src/components/coretemp/linux-coretemp.c + src/components/coretemp/tests/coretemp_basic.c + src/components/coretemp/tests/coretemp_pretty.c...: Add new + PAPI_enum_cmp_event() function This will be needed when we remove the + 16-component limit. Currently in PAPI_enum_event() the component number is + gathered from bits 29-26 of the eventcode. This won't work anymore once we + remove those bits. Also update the various components to not use + PAPI_COMPONENT_MASK() as this too will go away in the transition. + + * 48331cc9 src/configure src/configure.in src/papi.c...: Place all + compiled-in components in the _papi_hwd[] array. Previously we had separate + compiled_in[] and _papi_hwd[] arrays. At init time a pointer to the + compiled_in[] was copied to _papi_hwd[] if initialization passed. This kind + of code setup makes enumerating components hard, and finding info from + non-available components would require additional function entry points. + This change leaves all compiled in components to _papi_hwd[]. Availability of + the component can be checked with the new "disabled" field. This will make + enumeration support a lot easier to add. It can possibly cause user confusion + if they try to access component structures directly without checking the + "disabled" field first. This change should also make any eventual support + for run-time component enabling/disabling a lot easier. + + * 66a72f44 src/papi.c: Documentation was referring to nonexistent + "PAPI_enum_events()" The actual function we have is PAPI_enum_event() + + * 0f2c2593 src/components/coretemp/linux-coretemp.c + src/components/lustre/linux-lustre.c src/components/mx/linux-mx.c...: Add + support for reporting reason for failed component initialization. This + change adds the fields "disabled" and "disabled_reason" to the + component_info_t structure. At initialization time, PAPI will set the + "disabled" field to the value returned by component init (that is PAPI_OK if + OK or an error otherwise). This can be checked later to find why component + init failed. Also provided is the "disabled_reason" string. The components + can set this at failure time, and this can be printed later. For example, + this is sample output of the updated papi_component_avail routine: + + - Compiled-in components: Name: perf_events.c Linux perf_event + CPU counters Name: linux-rapl Linux SandyBridge RAPL energy + measurements \-> Disabled: Not a SandyBridge processor Name: example.c + A simple example component Name: linux-coretemp Linux + hwmon temperature and other info \-> Disabled: No coretemp events found Name: + linux-net.c Linux network driver statistics Name: linux-mx.c + Myricom MX (Myrinet Express) statistics \-> Disabled: No MX + utilities found Name: linux-lustre.c Lustre filesystem statistics + \-> Disabled: No lustre filesystems found Active components: Name: + perf_events.c Linux perf_event CPU counters Name: example.c + A simple example component Name: linux-net.c Linux + network driver statistics + +2012-03-29 + + * d84b144e src/components/rapl/Rules.rapl src/components/rapl/linux-rapl.c + src/components/rapl/tests/Makefile...: Add a SandyBridge RAPL (Running + Average Power Level) Component This component allows energy measurement at + the package-level on Sandybridge machines. To run, you need the Linux + x86-msr kernel module installed and read permissions to /dev/cpu/*/msr The + output from the rapl_busy test looks like this on a SandyBridge-EP machine: + Trying all RAPL events Found rapl component at cid 2 Starting + measurements... Doing a naive 1024x1024 MMM... Matrix multiply sum: + s=1016404871450364.375000 Stopping measurements, took 3.979s, gathering + results... Energy measurements: PACKAGE_ENERGY:PACKAGE0 175.786011J (Average + Power 44.2W) PACKAGE_ENERGY:PACKAGE1 73.451096J (Average Power 18.5W) + DRAM_ENERGY:PACKAGE0 11.663467J (Average Power 2.9W) DRAM_ENERGY:PACKAGE1 + 8.055389J (Average Power 2.0W) PP0_ENERGY:PACKAGE0 119.215500J (Average + Power 30.0W) PP0_ENERGY:PACKAGE1 16.315216J (Average Power 4.1W) Fixed + values: THERMAL_SPEC:PACKAGE0 135.000W THERMAL_SPEC:PACKAGE1 135.000W + MINIMUM_POWER:PACKAGE0 51.000W MINIMUM_POWER:PACKAGE1 51.000W + MAXIMUM_POWER:PACKAGE0 215.000W MAXIMUM_POWER:PACKAGE1 215.000W + MAXIMUM_TIME_WINDOW:PACKAGE0 0.046s MAXIMUM_TIME_WINDOW:PACKAGE1 0.046s + rapl_basic.c PASSED + +2012-03-26 + + * b44d60ca src/components/appio/appio.c src/components/appio/appio.h + src/components/appio/tests/appio_test_read_write.c: Added support for + intercepting open calls. + +2012-03-23 + + * 9e9fac4b src/Makefile.in src/Rules.pfm4_pe src/configure...: Fix the test + case in configure at 0cea1848 Make use of the structure we're using for the + override-init test case. + + * 0cea1848 src/configure src/configure.in: Doctor CFLAGS when testing for a + gcc warning. -Wextra was not in CFLAGS when I attempted to check for the + initialized field overwritten warning. So we set -Wall -Wextra -Werror when + running the test code. + +2012-03-22 + + * b33331b6 src/Makefile.in src/Rules.pfm4_pe src/configure...: Fix + initialized field overwritten warning when building libpfm4 on some gcc + versions. In gcc 4.2 or so, -Woverride-init was added to -Wextra causing + issues with code like struct foo { int a; int b;}; struct foo bar = { .a=0, + .b=0, .b=5; }; --Wno-override-init allows us to keep -Werror for libpfm4 + compiles. + +2012-03-21 + + * ae149766 src/papi_internal.h: Delete an old comment. Yes, Dan in 2003, we + should and do use MAX_COUNTER_TERMS as the size of the event position array. + +2012-03-20 + + * b937cdd8 src/papi_user_events.c: Move the user events code over to using + the new preset event data structure. + +2012-03-14 + + * 6ca599e2 src/papi_internal.c: Fix a small memory leak. We weren't freeing + _papi_hwd, causing a lot of MEM_LEAK warnings in buildbot. + +2012-03-13 + + * 473b8203 src/aix.h src/configure src/configure.in...: Remove last MY_VECTOR + usage. Have configure explicitly set the name of the perf counter substrate + vector in the components_config.h file This removes one more special case, + and gets us slightly closer to being able to have multiple CPU substrates + compiled in at once. + + * 360c3003 src/papi.c src/papi_libpfm3_events.c src/papi_libpfm_events.h...: + Clean up the papi_libpfm3_events.c code. Move code that was perfctr specific + into perfctr-x86.c + + * 03de65e3 src/libpfm-3.y/examples_v2.x/multiplex.c + src/libpfm-3.y/examples_v2.x/pfmsetup.c + src/libpfm-3.y/examples_v2.x/rtop.c...: Fix some libpfm3 warnings. libpfm3 + is not maintained anymore, so applied these changes locally. libpfm3 is + compiled with -Werror so they broke the build with newer gcc even though they + are just warnings in example programs. + + * ad490353 src/ctests/zero_named.c src/utils/multiplex_cost.c: Fix a few + compiler warnings in the tests. + + * a0fec783 src/linux-timer.c: Fix another linux-timer.c compile problem. I + hadn't tested with debug enabled, so all of buildbot failed last night. + +2012-03-12 + + * a3733ecd src/linux-timer.h: Fix typo in the linux-timer.h header + _linux_get_virt_usec_timess should have been _linux_get_virt_usec_times + Thanks to Steve Kaufmann for noticing this. + + * 785db5ae src/linux-common.c src/linux-timer.c: Fix timer compile on Power + machines Power, ARM, and MIPS have no get_cycles() call so provide a dummy + function on these architectures. + + * 708090ee src/linux-common.c src/linux-timer.h: Another fix for non-POSIX + timers The recent changes had the name of the fallback usec method wrong. + + * 88e8d355 src/papi_libpfm3_events.c: Fix a warning in the libpfm3 code. + + * 8ca63705 src/configure src/configure.in src/linux-common.c...: Fix build + when not using POSIX timers The PAPI build system was being overly clever + with how it defined what kind of wall clock timers were to be used, so of + course I broke things when breaking the timer code out to make it a bit more + understandable. This patch breaks out the timer define into two pieces; one + saying it's a POSIX timer and one saying whether to use HR timers or not. + +2012-03-09 + + * b69ad727 src/linux-common.c src/linux-timer.c src/linux-timer.h: Add Linux + posix gettime() nanosecond functions + + * af2c9a49 src/papi.c src/papi_vector.c src/papi_vector.h: Add + ->get_virt_nsec() and ->get_real_nsec() OS vectors Currently PAPI was just + cheating and running the usec functions and multiplying by 1000. Make this + the default, but allow the OS code to override if they have timers capable of + returning nsec percision. + + * 24c68dbe src/aix.c src/freebsd.c src/linux-bgp.c...: Clean up + ->get_virt_usec() It no longer needs to be passed a context, so remove that + from all callers. Also, ->get_virt_cycles() was just a get_virt_usec()*MHz + on most platforms. While this is a bit dubious (especially as MHz can't be + relied on) make this a common routine that will be added at innoculate time + if ->get_virt_cycles() is set to NULL. + + * a3ef7cef src/linux-common.c src/linux-timer.c src/linux-timer.h: Cleanup + the Linux timer code. Split things up a bit to make the code more readable. + + * 50ce8ea0 src/papi_internal.c: Change a strcpy() to strncpy() just to be a + bit safer. + + * 0526b125 src/components/lmsensors/linux-lmsensors.c: Fix buffer overrun in + lmsensors component + + * b088db70 src/libpfm4/config.mk + src/libpfm4/docs/man3/pfm_get_os_event_encoding.3 + src/libpfm4/examples/showevtinfo.c...: Update to current git libpfm4 snapshot + + * ccb45f61 src/aix.c src/extras.c: Fix segfault on AIX During some of the + cleanups, the extras.h header was not added to aix.c This made some of the + functions (silently) use default data types for the function parameters, + leading to segfaults in some of the tests. + +2012-03-08 + + * 1cb22d0b src/components/coretemp/linux-coretemp.c src/utils/native_avail.c: + Make "native_avail -d" report units if available Add units support to the + coretemp component, have native_avail -d (detailed mode) print it to make + sure it works. + + * 9c54840e src/extras.c src/extras.h src/papi_internal.c...: Add new + ntv_code_to_info vector This will allow components to return the extended + event_info data for native events. If a component doesn't implement + ntv_code_to_info then get_event_info falls back to the old way of just + reporting symbol name and long description. + + * c4579559 src/papi.h: Add new event_info fields New fields are added to + event_info that allow passing on extended information. This includes things + such as measurement units, data type, location, timescope, etc. + + * 17533e4e src/ctests/all_events.c src/ctests/derived.c + src/ctests/kufrin.c...: Restore fields to event_info structure The changes + made were probably too ambitious, even for a 5.0 release. In the end it + looks like we can remain API compatible while just using up a little more + memory. We can still save space by shrinking preset_t behind the scenes. + + * 6f13a5f6 src/aix.c src/components/coretemp/linux-coretemp.c + src/components/coretemp_freebsd/coretemp_freebsd.c...: Remove + ->ntv_bits_to_info vector from component interface We weren't using it + anymore, and many of the components were just setting it to NULL + unncessarily. We'll be replacing the functionality soon with + ntv_code_to_info + + * 401f37bc src/components/example/example.c src/ctests/subinfo.c src/papi.h: + Remove invert and edge_detect fields from component info These fields were + there to indicate if a CPU component supported these attributes (for Intel + processors) but in the end we never used these. The proper way to export + this info is during event enumeration. + + * f32fe481 src/papi_events.csv: We had the PAPI_VEC_INS preset wrong on amd + fam12h llano + + * 38a8d8a7 src/ctests/multiplex2.c src/papi_preset.c: Fix preset adding code + to be more robust. If an invalid event is in a preset definition, we'd + currently add it with an eventcode of 0 to the preset, which would break if + you tried to use the event. This change properly prints a warning in this + case, and sets the preset to be unavailable. + + * 2591a546 src/ctests/val_omp.c src/ctests/zero_omp.c: Remove the hw_info + field from add_two_events calls. Two ctests missed the bus when Vince + reworked the add_two_events call. + + * 358a2e32 src/papi_internal.c src/papi_preset.c: Fix segfault seen on an AMD + fusion machine With the recent preset and component hanges, we were not + properly resetting papi_num_components if PAPI_library_init()/PAPI_shutdown() + was called multiple times. + +2012-03-07 + + * 7751f5d8 src/ftests/zeronamed.F: Fix a compile error on aix. Dan ran over + 72 characters on a single line. xlf actually enforced that part the Fortran + spec. + +2012-03-06 + + * 1c87d89c src/ftests/Makefile src/ftests/zeronamed.F src/papi_fwrappers.c: + Add support for {add, remove, query}_named to Fortran interface; add zero + named.F test case; modify ftests Makefile to support "all" tag. + + * 71bd4fdd src/configure src/configure.in: Modify configure to define the + default FTEST_TARGETS as "all" + + * 54e39855 src/components/vmware/vmware.c: Changed tri8ggering environment + variable to PAPI_VMWARE_PSEUDOPERFORMANCE per Vince's earlier email. This + should complete all the VMware component changes. + +2012-03-05 + + * 845503fb src/Makefile.inc: Add missing MISCSRCS line to Makefile.inc This + was breaking the shared library build + +2012-02-01 + + * 11be8e4b .../appio/tests/appio_test_fread_fwrite.c + src/components/appio/tests/appio_test_pthreads.c + src/components/appio/tests/appio_test_read_write.c: updated these tests to + print timing information + + * 9ad62ab1 src/components/appio/README src/components/appio/appio.c + src/components/appio/appio.h...: Added support for timing I/O calls. Updated + tests and README. + +2012-01-31 + + * beaa5ff0 src/components/appio/tests/iozone/Changes.txt + src/components/appio/tests/iozone/Generate_Graphs + src/components/appio/tests/iozone/Gnuplot.txt...: added the latest stable + iozone to the appio tests. + + * 4af58174 src/components/appio/README src/components/appio/tests/Makefile + src/components/appio/tests/init_fini.c: added a hook to run the appio test + for iozone. + +2012-01-21 + + * 15c733cf src/components/appio/CHANGES src/components/appio/README + src/components/appio/appio.c...: Removed stray 'net' references. All + remaining references are only for the purpose of giving credit. Updated + change log. + +2012-01-20 + + * ca4b6785 src/components/appio/README src/components/appio/appio.c + src/components/appio/tests/appio_list_events.c...: - general cleanup - + improved tests to be quiet and be conform to other PAPI tests - replaced + hardwire constants in appio.c with symbolic ones - tests will now write to + /dev/null to avoid filling the terminal screen with useless text - more + comments added - @author added to files - updated README + +2012-01-18 + + * bb22ed9f src/components/appio/README src/components/appio/Rules.appio + src/components/appio/appio.c...: - Added support to measure + bytes/calls/eof/short calls for read/write calls. - Interception of + read/write and fread/fwrite calls. - Works for static and dynamic linkage + (without need for LD_PRELOAD) - Tested OK on 32-bit i686 Linux 2.6.38. + Tushar + +2011-12-03 + + * d58b34b6 src/components/appio/tests/Makefile + src/components/appio/tests/appio_list_events.c + src/components/appio/tests/appio_values_by_code.c...: *** empty log message + *** + + * cd7d7acc src/components/appio/tests/appio_values_by_name.c: file + appio_values_by_name.c was added on branch appio on 2011-12-03 05:22:06 +0000 + + * 425e4d09 src/components/appio/tests/appio_values_by_code.c: file + appio_values_by_code.c was added on branch appio on 2011-12-03 05:22:06 +0000 + + * 596ad9bb src/components/appio/tests/appio_list_events.c: file + appio_list_events.c was added on branch appio on 2011-12-03 05:22:06 +0000 + + * 119543dc src/components/appio/tests/Makefile: file Makefile was added on + branch appio on 2011-12-03 05:22:06 +0000 + +2012-03-05 + + * ba748a41 src/components/vmware/configure: Remove old configuration + parameters from vmware/configure + +2012-03-02 + + * 2b7e2abb src/ctests/Makefile src/ctests/max_multiplex.c: Add a new + max_multiplex test This tries to use the maximum number of multiplexed + events. This was written in response to the 32/64 perf_event multiplexed + event limit reported by Mohammad j. Ranji + + * a0985ff5 src/multiplex.c src/papi_internal.c src/papi_libpfm4_events.c...: + Fix issue when using more than 32 multiplexed events on perf_event On + perf_event we were setting num_mpx_cntrs to 64. This broke, as the + MPX_EventSet struct only allocates room for PAPI_MPX_DEF_DEG events, which is + 32. This patch makes perf_event use a value of 32 for num_mpx_cntrs, + especially as 64 was arbitrarily chosen at some point (the actual value + perf_event can support is static, but I'm pretty sure it is higher than 64). + + * 331c516c src/ctests/acpi.c: Remove the acpi.c file from ctests It wasn't + being built, and we removed the ACPI component a while ago. + + * 73e7d191 src/components/vmware/vmware.c: Removed all old references to + #define VMWARE_PSEUDO_PPERF and switched over to getenv + +2012-03-01 + + * 969b8aa9 src/ctests/Makefile src/ctests/zero_named.c src/papi.c: Three new + APIs: PAPI_query_named_event PAPI_add_named_event PAPI_remove_named_event and + a new test: zero_named Still to do: maybe test named native events and + support Fortran + + * 97bf9bf8 src/papi.c src/papi.h: First pass implementation of {add, remove, + query}_named_event + + * 2416af88 src/components/vmware/vmware.c: Add functionality to getenv + selectors + + * 297f9cd6 src/papi.c: Fix possible race in _papi_hwi_gather_all_thrspec_data + The valgrind helgrind tool noticed this with the thrspecific test + + * be599976 src/papi_internal.c: Add some locking in + _papi_hwi_shutdown_global_internal This caused a glibc double-free warning, + and was caught by the Valgrind helgrind tool in krentel_pthreads There are + some other potential locking issues in PAPI_shutdown, especially when debug + is enabled. + + * 8444d577 src/utils/clockres.c src/utils/command_line.c: Cleanup the oxygen + markup for the utilities. + + * 7144394f doc/Doxyfile-html: Missed a recursive tag for the html config + file. + + * 63b2efc4 src/papi_preset.c: Fix segfaults in tests on AMD machines The + papi_preset code was wrongly calling papi_free() on some code that was + allocated with strdup() (not with papi_malloc). We were only noticing this + on AMD machines because it was the code for freeing developer notes in + presets, and currently only AMD events have developer notes. + + * 0b1350df src/linux-common.c: Touch 'virtual_vendor_name' to cleanup a + warning on bluegrass. + +2012-02-29 + + * 1f17b571 src/Makefile.inc src/Rules.perfctr-pfm src/Rules.pfm4_pe...: Merge + the contents of papi_libpfm_presets.c into papi_preset.c The code isn't + libpfm specific at all anymore, it's the generic "read presets from a file" + code. It makes more sense to find it in papi_presets.c + + * d087d49f src/papi_fwrappers.c: Fix Fortran breakage after the preset event + changes + + * 156141ec src/papi_libpfm_presets.c src/papi_preset.c src/papi_preset.h: + Simplify papi_libpfm_presets.c Previously adding presets from + papi_events.csv was a three step process. 1. Load the presets from the + file, put in temporary structure. 2. Convert this temporary structure to a + "findem" dense structure 3. Pass this dense structure to + _papi_hwi_setup_all_presets for final assignment. This change creates the + final assignment directly without the intermediate two steps. + + * 8bc2bafd src/papi.c src/papi.h src/papi_common_strings.h...: Make the + internal preset_info match the one exported by papi.h There were a lot of + cases where the same structure fields were available, just with different + names. That was confusing. Also, this allows using a pointer to the preset + info instead of having to copy values out of the structure when gathering + event info for presets. + + * 8fda68cb src/genpapifdef.c src/papi.c src/papi_common_strings.h...: Merge + the 4 separate preset structs into one. _papi_hwi_presets was a structure + containing pointers to 4 other arrays of structures which held the actual + preset data. This change merges all of these into one big structure. + +2012-02-28 + + * e69815d7 src/linux-bgp.c src/papi_internal.c src/papi_internal.h...: + Removing remaining vestiges of references to bipartite routines. Now the only + references are in papi_bipartite.h, perfctr-x86.c and winpmc-p3.c. + + * 5766b641 src/papi_bipartite.h src/perfctr-x86.c + src/win2k/substrate/winpmc-p3.c: These changes implement the bipartite + allocation routine as a header file to be included in whatever cpu component + needs it. Right now, that's just perfecter-x86 and windows. Both components + have been modified and perfecter-86 compiles cleanly. Neither has been tested + since I don't have access to a test bed. + + * 7f444b76 src/papi_libpfm_presets.c src/papi_preset.c src/papi_preset.h: + Merge the hwi_dev_notes structure into hwi_preset_data + + * 21a1d197 src/components/vmware/vmware.c: add getenv + + * 08c1b474 src/perfctr-x86.c: Merge bipartite routine into perfecter-x86 + component, since this is effectively the only place it is used. + + * 9ed9b1f5 src/papi.c: Remove a reference to PAPI_set_event_info() which was + removed for PAPI 4 + + * c626f064 src/ctests/all_events.c src/ctests/derived.c + src/ctests/kufrin.c...: Convert PAPI_event_info_t to separate preset event + info This moves the preset event info to its own separate structure, which + reduces greatly the large string overhead that is not used by the native + events. + + * 787d6822 src/perfctr-x86.c: Move bipartite stuff to perfctr_x86 since + that's really the only place it's currently used. + + * 229c8b41 src/components/vmware/vmware.h: Add env_var definition to vmware.h + + * 46aaf6ca src/components/vmware/vmware.c: Remove all unneeded cases + + * 874a5718 src/freebsd.c src/perfctr-ppc64.c: Remove more unused references + to .bpt_ routines in preparation for refactoring. + + * 74e5a5fd src/components/vmware/vmware.h: Remove uneeded defines from + vmware.h header + + * 58b51367 src/components/coretemp_freebsd/coretemp_freebsd.c + src/components/vmware/vmware.c src/solaris-niagara2.c...: Remove unused + references to .bpt_ routines in preparation for refactoring. + +2012-02-27 + + * 6b184158 src/Makefile.inc src/components/coretemp/linux-coretemp.c + src/configure...: Have separate concept of "compiled-in" versus "active" + components With this change, the _papi_hwd[] component info array only + contains a null-terminated list of _active_ components. The + _papi_compiled_components[] array has the original full list. At + init_substrate[] time a pointer to a component is only put in the _papi_hwd[] + list if it is successfully initialized. In addition the + PAPI_num_compiled_components() and PAPI_get_compiled_component_info() calls + have been added, but this is probably a confusing interface so they might + only be temporary additions. + + * 042bfd5b src/Makefile.inc src/papi.c src/papi_data.c...: Split the contents + of papi_data.c to various other files. The data declarations in papi_data.c + were mostly used in other files. Move these into more relevant locations. + + * 1877862c src/papiStdEventDefs.h src/papi_common_strings.h: Remove the BGL + and BGP specific pre-defined events. They can be better replaced by + user-events, and we also had already removed BGL support completely a while + back. This removes some ifdefs from the pre-defined event list and keeps + future pre-defined events from having different eventcodes on different + platforms. + + * c3986b79 src/components/coretemp/linux-coretemp.c + src/components/cuda/linux-cuda.c + src/components/infiniband/linux-infiniband.c...: Add names and descriptions + for components. Also fixes cuda and lmsensors build issues introduced by + vector.h cleanup + + * 2c84f920 src/aix.c src/freebsd.c src/perf_events.c...: Add names and + descriptions to all of the CPU substrates. + + * 9f3e634a src/components/example/example.c src/papi.h src/utils/component.c: + Add new "description" and "short_name" fields to .cmp_info structure This + description field allows components to provide extra information on what they + do. The short_name field will eventually be used to pre-pend event names. + The papi_component_avail utility has been updated to print the description. + The example component was updated to fill in these values. + + * ab61c9a7 src/Makefile.inc src/genpapifdef.c src/papi_common_strings.h...: + Split papi_data.c into two parts papi_data.c was half data structure + definitions for all of PAPI and half string definitions used by both PAPI + *and* genpapifdef This splits the common string definitions to + papi_common_strings.h so that genpapifdef can still be built w/o linking + libpapi.a while making the code a lot easier to follow. + + * b8e6294c src/solaris-ultra.c: Remove unncessary extern declarations from + solaris-ultra.c. + + * 5ddaff91 src/sys_perf_event_open.c: Remove unncessary extern declarations + from sys_perf_event_open.c + + * a6c463b7 doc/Doxyfile-common.config: Create a common config file for + doxygen. As part of streamlining the doxygen process, this is a new template + doxygen config files. This is a blank template file generated by doxygen + 1.7.4 (the version currently mandated by the release procedure ) + + * dc2c11fa src/aix.c src/aix.h src/perfmon.c...: The vector pre-definition + should be in the .c file, not the .h file + + * 0b3c83c3 src/perf_events.c: Remove unnecessary extern declarations in + perf_events.c + + * b93efca0 src/perfmon.c src/perfmon.h: Remove unnecessary extern + declarations in perfmon.c + + * 7f7a2359 src/papi_preset.c: Remove unnecessary extern declarations from + papi_preset.c + + * ecec03ad src/papi_libpfm_presets.c: Remove extraneous extern declarations + from papi_libpfm_presets.c + + * 7b5f3991 src/extras.c: remove extraneous extern declarations from extras.c + + * f6470e4d src/aix-memory.c src/aix.c src/aix.h: Remove unncessary extern + declarations from aix.c + + * f197d4ab src/papi_data.h src/papi_internal.c: Remove unncessary extern + declarations in papi_internal.c + + * e7b39d48 src/papi.c src/papi_data.c src/papi_data.h...: remove unnecessary + extern definitions from papi.c + +2012-02-24 + + * 92689f62 src/configure src/configure.in src/linux-common.c...: Add a + --with-pthread-mutexes option to enable using pthread mutexes rather than + PAPI custom locks This is useful when running on new architectures that + don't have a custom PAPI lock coded yet, and also for running valgrind + deadlock detection utilities that only work with pthread based locking. + + * ca51ae67 src/papi_events.csv: Fix broken Pentium 4 Prescott support We + were missing the netbusrt_p declaration in papi_events.csv + + * f6460736 src/linux-common.c: Fix build on POWER, broken by the + virtualization change. + + * 91d32585 src/perfctr-x86.c src/perfmon.c: Fix some warnings that have + appeared due to recent changes. + + * ae0cf00f src/linux-common.c src/papi_libpfm3_events.c + src/papi_libpfm4_events.c...: Clean up the Linux lock files The locking + primitives for some reason were spread among the libpfm code and the + substrate codes. This change moves them into linux-common and has them part + of the OS code. This way they will get properly initialized even if the perf + counter or libpfm code isn't being used. + +2012-02-23 + + * 88847e52 src/papi.c src/papi_memory.h: Remove _papi_cleanup_all_memory + define from papi_memory.h The code in papi_memory.h said: /* define an + alternate entry point for compatibility with papi.c for 3.1.x*/ /* this line + should be deleted for the papi 4.0 head */ Since we are post papi-4.0 I + thought it was time to act on this. Of course papi.c was still using the old + name in one place. + + * 1d29dfc6 src/papi_libpfm_presets.c src/perfctr.c src/perfmon.c: Fix some + missing includes found after the header cleanup. + + * b425a9f4 src/Makefile.inc src/extras.c src/extras.h...: Header file cleanup + The papi_protos.h file contained a lot of no-longer in use exports. I split + up the ones that are still relevant to header files corresponding to the C + file that the functions are defined in. + + * 07199b41 src/extras.c src/papi_vector.c src/papi_vector.h: Clean up the + papi_vector code. Remove things no longer being used, mark static functions + as static. + + * d7496311 src/linux-common.c src/x86_cpuid_info.c src/x86_cpuid_info.h: Fix + a missing "return 1" which meant that the virtualization flag wasn't being + set right. With this fix, on saturn-vm1 we now get: Running in a VM + : yes VM Vendor: : VMwareVMware in the papi_native_avail + header + + * 8da36222 src/freebsd.c src/linux-bgp.c src/papi.c...: Remove the + ->add_prog_event function vector As far as I can tell this is a PAPI 2.0 + remnant that was never properly removed. This also removes + PAPI_add_pevent(), PAPI_save(), and PAPI_restore(), none of which were + exported in papi.h so in theory no one could have been using them. Also + removes _papi_hwi_add_pevent() + + * a5f3c8b5 src/aix.c src/freebsd.c src/linux-timer.c...: Reduce the usage of + MY_VECTOR whenever possible. This is an attempt to make the cpu-counter + components to be as similar as possible to external components. + + * abbcbf29 src/any-null.h: Missed removing any-null.h during the any-null + removal. + + * 665d4c5c src/linux-common.c: Somehow missed an include during the + virtualization addition. + + * 0c06147b src/perfctr-2.6.x/usr.lib/event_set_centaur.os + src/perfctr-2.6.x/usr.lib/event_set_p5.os + src/perfctr-2.6.x/usr.lib/event_set_p6.os: Removes the last of the binary + files from perfctr2.6.x Some binary files were left out in the cold after a + mishap trying to configure perfctr for the build test. + + * 3acb7d57 src/Makefile.inc src/configure src/configure.in...: Add support + for reporting if we are running in a virtualized environment to the + PAPI_hw_info_t structure. This currently only works on x86. it works by + looking at bit 31 of ecx after a cpuid (the "in a VM" bit) and then using + leaf 0x40000000 to get the name of the VM software (this works for VMware and + Xen at least) x86_cache_info.c was renamed to x86_cpuid_info.c to better + reflect what goes on in that file (it does various things based on the cpuid + instruction). the testlib header was updated to report virtualization status + in the papi header (printed for things like papi_native_avail). + +2012-02-22 + + * 9c7659b5 src/Makefile.inc src/freq.c: Remove the freq.c file as nothing + seemed to be using it. + + * d205e2d3 src/perfctr-x86.c: Made a stupid typo when converting perfctr to + call libpfm functions with the component id. + + * 25b41779 src/papi_libpfm3_events.c src/papi_libpfm4_events.c + src/papi_libpfm_events.h...: When updating the preset code to take a + component index I missed a few callers. + + * a713ffb1 src/papi_internal.c src/papi_vector.c: Remove any-null component + + * 27e1c2c5 src/any-null-memory.c src/any-null.c src/any-proc-null.c...: + Remove the any-null component. + + * 25779ae0 PAPI_FAQ.html: Saving another version of the FAQ after adding a + git section, and removing several obsolete sections. These questions still + need detailed review for relevance and timeliness. + + * 449a1a61 src/ctests/overflow_allcounters.c: Fix overflow_allcounters which + was making assumptions about component 0 existing. + + * f21be742 src/ctests/hwinfo.c: Make the hwinfo test not bail out if no + counters are available. + + * ebc675e6 src/ctests/memory.c: Make sure the memory ctest runs even if no + components are available. + + * 9b3de551 src/linux-common.c src/perf_events.c src/perfmon-ia64.c...: Make + sure the system info init happens at os init time. Otherwise the system info + never gets set if a perfcounter component isn't available. + + * 59e47e12 src/papi_internal.c: Make sure that _papi_hwi_assign_eventset() + does the right thing if no components are available. + + * dd51e5d6 src/ctests/api.c: The api test would fail in the no cpu component + case. Fix it to properly check for errors before attempting to run + high-level PAPI tests. + + * 069e9d2f src/aix.c src/papi.c src/papi_internal.h...: Fix code that was + depending on _papi_hwd[0] existing. Most of this was in the presets code. + The preset code had many assumptions so that you can only code presets with + component[0]. This fixes some of them by passing the component index around. + + * 7259eaec src/papi_vector.c: Fix up papi_vector to get rid of some warnings + introduced on AIX. + + * 16fe0a61 src/aix.c src/solaris-ultra.c: Fix two last substrates where I + missed some fields in the OS structure conversion. + + * 625871ec src/perfmon.c: Missed a cmp_info field in perfmon.c + + * 680919d9 PAPI_FAQ.html: Saving the latest version of the FAQ before + undertaking major revisions. + + * 3d4fa2e5 src/linux-timer.c src/perfctr-x86.h: Fix the perfctr code to + compile if configured with --with-virtualtimer=perfctr + + * bbd7871f src/perfctr.c: Missed two OS vector calls in the perfctr code + during the conversion. + + * bc6d1713 src/Makefile.inc: Removed one of the two instances of MISCOBJS + listed in Makefile.inc. + +2012-02-21 + + * 40bc4c57 src/papi_vector.c src/papi_vector.h: Remove now-unused OS vectors + from the main papi vector table. + + * 3c6a0f7b src/aix.c src/freebsd.c src/linux-bgp.c...: Convert PAPI to use + the _papi_os_vector for the operating-system specific function vectors. + + * 568abad5 src/papi_vector.h: Add new _papi_os_vector structure to hold + operating-system specific function vectors. + + * a39d2373 src/ctests/subinfo.c: Missed removing a field from the subinfo + ctest. + + * 1d930868 src/papi.h: Remove fields now in PAPI_os_info_t from the + component_info_t struct. + + * d397d74a src/components/example/example.c: Remove fields now in + PAPI_os_info_t from the example component. + + * 8cd5c8e0 src/aix.c src/freebsd.c src/linux-bgp.c...: Modify all the + substrates to use _papi_os_info. instead of _papi_hwd[0]->cmp_info for the + values moved to the OS struct + + * 58855d3a src/papi_internal.h: Add padding for future expansion to + PAPI_os_info_t Add _papi_hwi_init_os(void); definition + + * ea1930e1 src/papi_internal.h: Add new PAPI_os_info_t structure to + papi_internal.h + + * 0eac1b29 src/utils/multiplex_cost.c: Modify multiplex_cost to properly use + the API_get_opt() interface to get itimer data, rather than directly + accessing the fields from the cmp_info structure. This would have broken + after the OS split. + + * 87c2aa2f src/ctests/subinfo.c: subinfo was printing itimer data from the + cmpinfo structure. These values will not be in cmpinfo once the OS split + happens. + + * f2c62d50 src/components/vmware/vmware.h: Clean up the VMware Header a bit + +2012-02-17 + + * 6f0c1230 src/aix.c src/components/coretemp/linux-coretemp.c + src/components/coretemp_freebsd/coretemp_freebsd.c...: The git conversion + reset all of the CVS $Id$ lines to just $Id$ Since we depend on the $Id$ + lines for the component names, I had to go back and fix all of them to be the + component names again. + + * 2d208d0e src/perfctr-2.6.x/usr.lib/event_set_centaur.o + src/perfctr-2.6.x/usr.lib/event_set_p5.o + src/perfctr-2.6.x/usr.lib/event_set_p6.o: Remove a few binary files in + perfctr-2.6.x + + * f78bf1af src/libpfm-3.y/Makefile src/libpfm-3.y/README + src/libpfm-3.y/docs/Makefile...: More cleanups from the migration, latest + version of libpfm-3.y perfctr-2.[6,7] Version numbers got really confused in + cvs and the git cvsimport didn't know that eg 1.1.1.28 > 1.1 ( see + perfctr-2.6.x/CHANGES revision 1.1.1.28.6.1 :~) + + * e8aa2e61 INSTALL.txt: Explicitly state that 3.7 was the last version of + PAPI with good windows support. + + * 546901fa src/components/cuda/linux-cuda.c: Modified CUDA component so that + a PAPI version - that was configured with CUDA - will successfully build on a + machine that does not have GPUs. + +2012-02-16 + + * 49d9f71c src/.gitignore: Add a .gitignore file with the files that PAPI + autogenerates. This way they won't clutter up "git status" messages + diff --git a/ChangeLogP501.txt b/ChangeLogP501.txt new file mode 100644 index 0000000..68ca1c7 --- /dev/null +++ b/ChangeLogP501.txt @@ -0,0 +1,77 @@ +2012-09-20 + + * 708d173a man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Rebuild the manpages for a 5.0.1 release. + +2012-09-19 + + * 29cdd839 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump the version + number for a 5.0.1 release. + + * bb7727f6 src/libpfm4/examples/fo src/libpfm4/examples/injectevt.c + .../bin/usr/local/include/perfmon/perf_event.h...: Cleanup a botched libpfm4 + update. As Steve Kaufmann noted, I botched an update of libpfm4. + +2012-09-18 + + * dc117410 src/configure src/configure.in: Remove a trailing slash in libpfm4 + pathing. Addresses an issue in rpmbuild when using bundled libpfm4. Reported + and patched by William Cohen + +2012-09-17 + + * e196b89b src/components/cuda/configure src/components/cuda/configure.in: + Minor changes to CUDA configure necessary to get it running smoothly on the + Kepler architecture. + +2012-09-11 + + * 866bd51c src/papi_internal.c src/papi_preset.c: Fix preset bug The preset + code was only initializing the first element of the preset code[] array. + Thus any event with more than one subevent was not terminated at all, and the + preset code would use random garbage as presets. This exposed another + problem; half our code assumed a 0 terminated code[] array, the rest was + looking for PAPI_NULL (-1). This standardizes on PAPI_NULL, with comments. + Hopefully this might fix PAPI bug #150. This is a serious bug and should be + included in the next stable release. + +2012-08-29 + + * b978a744 src/configure src/configure.in: configure: fix autodetect perfmon + case The fixes I made yesterday to libpfm include finding broke on perfmon2 + PAPI if you were letting the library be autodetected. This change should fix + things. Tested on an actual 2.6.30 perfmon2 system. + + * 4386e6e5 src/libpfm4/Makefile src/libpfm4/README src/libpfm4/config.mk...: + Update libpfm4 included with papi to 4.3 + +2012-08-28 + + * 729a8721 src/configure src/configure.in: configure: don't check for libpfm + if incdir specified When various --with-pfm values are passed, extra checks + are done against the libpfm library. This was being done even if only the + include path was specified, which probably shouldn't be necessary. This + broke things because a recent change I made had the libpfm include path be + always valid. + + * bc9ddffc src/configure src/configure.in: Fix compiling with separate + libpfm4 The problem was if you used any of the --with-pfm-incdir type + directives to configure, it would them assume you wanted a perfmon2 build. + This removes that assumption. I did check this with perfmon2, perfctr, and + perf_event builds so hopefully I didn't break anything. + +2012-08-27 + + * 3b737198 src/papi.c src/papi_libpfm4_events.c src/papi_preset.c...: Hack + around debugging macros. Under NO_VARARG_MACROS configs the debug printing + guys become two expression statements. This is bad for code expecting eg + SUBDBG(); to be one statement. --ie-- if ( foo ) SUBDBG("Danger Will + Robinson"); ------ In order to keep the useful file and line number + expansions with out variadic macro support, we split SUBDBG into two parts; A + call to DEBUGLABEL() and friends and then a call to a function to capture the + actual informative message. So if(foo) stmt(); becomes if (foo) + print_the_debug_label(); print_your_message(...); And your message is always + printed. See papi_debug.h for what actually happens. I'm not clever enough + to work around this any other way, so I exaustivly put { }s around every case + of the above I found. (I only searched on 'DBG' so its possible I missed + some) diff --git a/ChangeLogP510.txt b/ChangeLogP510.txt new file mode 100644 index 0000000..48b9c23 --- /dev/null +++ b/ChangeLogP510.txt @@ -0,0 +1,427 @@ +2013-01-15 + + * 0917f567 src/threads.c: Cleaned up compiler warning (gcc version 4.4.6) + + * 06ca3faa src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/L2unit/linux-L2unit.c...: Cleaned up compiler warnings on + BG/Q (gcc version 4.4.6 (BGQ-V1R1M2-120920)) + +2013-01-14 + + * 56400627 .../build/lib.linux-x86_64-2.7/perfmon/__init__.py + .../lib.linux-x86_64-2.7/perfmon/perfmon_int.py + .../build/lib.linux-x86_64-2.7/perfmon/pmu.py...: libpfm4: remove extraneous + build artifacts. Steve Kaufmann reported differences between the libpfm4 I + imported into PAPI and the libpfm4 that can be attained with a git clone + git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4 Self: Do libpfm4 + imports from a fresh clone of libpfm4. + +2013-01-11 + + * 4ad994bc src/papi_events.csv: Clean up armv7 cortex a15 presets Clean up + armv7 cortex a15 presets and add presets for L1 and L2 cache + + * d54dabf5 ChangeLogP510.txt RELEASENOTES.txt doc/Doxyfile-common...: Prepare + the repo for a 5.1 release. * Bump the version number to 5.1 * Update the + man pages * Create a changelog for 5.1 * Update RELEASENOTES + + * 8816a3b8 INSTALL.txt: Update INSTALL.txt Add information about installing + PAPI on Intel MIC. Based upon information from Vince Weaver's PAPI MIC + support page. http://www.eece.maine.edu/~vweaver/projects/mic/ + + * 8dc1ca23 TEST.TXT: Remove TEST.TXT This was a leftover from a switch over + to git. + + * 292d6c9b src/papi_libpfm3_events.c: Fix build on ia64 When trying to build + papi 5.0.1 for IA64, my collegue got compile errors due to perfmon.h not + being included. We're not sure if this actually is a configure bug, but this + patch fixed it. + + * 25424f41 src/extras.c: Fix kernel warning in _papi_hwi_stop_timer() In + _papi_hwi_stop_timer() we were calling setitimer( timer, NULL, NULL ) to + disable the itimer. Recent Linux kernels print warnings if you do this; NULL + is not a valid second argument to setitimer() and possibly this wasn't really + working before. According to the manpage the proper fix is to call + setitimer() with a valid "new_value" field but with the values all 0. That + is what this patch does. + +2012-11-30 + + * a7d70127 src/components/micpower/README + src/components/micpower/Rules.micpower + src/components/micpower/linux-micpower.c...: MIC power component The Intel + MIC (Xeon PHI) card reports power of several components of the card. These + values are reported in a sysfs file, so this component is cloned from the + coretemp component. + +2013-01-08 + + * 121cd0a6 src/Makefile.in src/Rules.pfm4_pe src/configure...: configure: Add + shortcut for mic support. * Add a --with-mic flag to enable the several + options to cross compile for mic. MIC builds are cross-compiled and Matt and + I were unable to figure out how to trigger cross compilation with just our + flag. This is short-hand for setting --with-arch=k1om --without-ffsll + --with-walltimer=clock_realtime_hr \ --with-perf-events --with-tls=__thread + --with-virtualtimer=cputime_id * Automatically cause make to pass + CONFIG_PFMLIB_ARCH_X86=y to libpfm4's make. So to build for the mic card one + has to do: {Set pathing to find the x86_64-k1om-linux-gcc cross-compiler} $ + ./configure --host=x86_64-k1om-linux --with-mic $ make Thanks to Matt + Johnson for the legwork on configure shortcuting. + +2013-01-07 + + * f65c9d9e src/papi_events.csv: Add preset events for ARM Cortex A15 + +2012-12-14 + + * 61a9c7b1 man/man3/PAPI_get_eventset_component.3 src/papi.c: Doxygen: Add a + new API entry Add the manpage for the new PAPI_get_eventset_component api + entry. + +2013-01-02 + + * 38d969ab doc/Doxyfile-man1 doc/Doxyfile-man3 doc/Makefile...: Doxygen: + Cleanup generated man pages. Mark a few \page sections as \htmlonly so that + man pages are not built for them. Modify the makefile to rm some data + structures that are generated. Doxyfile-man3: * Take out papi_vector.h, this + file only defines a few data structures from which we don't need manpages. + papi.h: * PAPI_get_component_index's inline comment had the close /**> to + delimit its description, but doxygen uses /**<. papi_fwrappers.c: * Mark the + group PAPIF as internal so that a man page is not generated for it. utils/*: + * Remove some useless htmlonly directrives, doxygen will generate pages for + any data structure, htmlonly doesn't stop that. Doxyfile-man1: * Change a + flag in Doxyfile-man1 so that we don't document internal data structures in + the utilities. We don't do this in -man3 because of the \class workaround we + use to create manpages for each of the PAPI_* api entry points. Because we + call them classes, they would be caught in the no data structures flag. + + * 7b790c09 doc/Doxyfile-html src/papi.h src/papi_fwrappers.c...: Doxygen: + Cleanup some of the markup We were not using htmlonly correctly... The idea + was to use \htmlonly to not build manpages for a few things. To properly hide + \page s you want things like: /** \htmlonly \page Foo I don't want this to + generate a manpage. \endhtmlonly */ + +2012-12-07 + + * 152bac19 src/papi.c: Doxygen: Cleanup papi.c Cleanup some \ref s, \ref + PAPI_function() isn't happy, use \ref PAPI_function it'll put in the proper + links. Remove _papi_overflow_handler doc block. We had the block but no + code. + +2012-12-20 + + * 7a40c769 src/components/rapl/tests/rapl_overflow.c: RAPL test code: Add + flexibility to the test code. Per Will Cohen; ------------------ I was + reviewing some test results for the papi test and found that the + rapl_overflow.c tests makes an assumption that there are exactly two + packages. As a result the test will fail on machines with a single package. + The following is a patch to make it a bit more flexible allow 1-n packages in + the test. -Will ----------------- + +2012-12-19 + + * 96c9afb0 src/components/appio/README src/components/appio/appio.c + src/components/appio/appio.h...: Added events for seek statistics and support + for intercepting lseek() calls. + +2012-12-14 + + * 003abf6d src/Rules.perfctr-pfm: Rules.perfctr-pfm: pass CC in all cases. + Perfctr user library was not being passed CC when built. + +2012-12-05 + + * e2c05b29 src/papi_internal.c: papi_internal.c: Refactor dublicated code in + cleanup and free eventset. Currently the code to free runtime state is + duplicated in cleanup and free. The perf_event_uncore test exposed an issue + where free cleaned up cpu_attach state but cleanup did not, causing a leak. + Have _papi_hwi_free_EventSet call _papi_hwi_cleanup_eventset to free most of + the runtime state of the eventset and then allow free_eventset to free the + Eventset Info struct. + +2012-12-13 + + * 7d020224 src/configure src/configure.in: configure: Change fortran compiler + search order. Bandaid fix to buildbot errors. By default, configure would + find icc before gcc but gfortran would be used before ifort. The real fix is + to test that object code from the c compiler can be linked to by the fortran + compiler. + +2012-12-12 + + * 87b6e913 src/papi_events.csv: ivy_bridge: remove PAPI_HW_INT event + Apparently recent Intel Vol3B documentation removed this event, and the most + recent libpfm4 merge followed suit. I asked at Intel about this and possibly + they only removed it because they didn't think anyone was using it. Maybe + they'll ad it back + +2012-12-10 + + * 293b26b9 src/Makefile.inc: Makefile.inc: Fix library link ordering. Per + Will Cohen ----------------------------------------------------------- I ran + across a problem when trying to build papi with the bundled libpfm and an + earlier incompatible version of libpfm was already installed on the machine. + The make would use the /usr/lib{64}/libpfm.so before trying to use the + locally built version and this would cause problems. The attached patch + changes the order of the linking and uses the local built libpfm before it + tries the installed version. -Will + ----------------------------------------------------------- + +2012-12-12 + + * 57e6aa0d src/Makefile.in: Makefile.in: export CC_COMMON_NAME In 17cfcb4a I + started using CC_COMMON_NAME in Rules.pfm4 but failed to have configure put + it in Makefile. + +2012-12-11 + + * 17cfcb4a src/Rules.pfm4_pe src/configure src/configure.in: Cleanup icc + build Start using -diag-disable to quiet down some of the remarks icc carps + about in libpfm4. Also have configure export CC_COMMON_NAME and check + against that in Rules.pfm4_pe. afec8fc9a reverted us to passing + -Wno-unused-parameter to icc, polluting buildbot. + +2012-12-10 + + * afec8fc9 src/configure src/configure.in: configure: Attempt to better + detect which C compiler we are using. This attempts to address trac bug 162. + http://icl.cs.utk.edu/trac/papi/ticket/162 Specifying full paths for CC + caused issues in our configure logic. We set several flags specific to gcc or + icc and this was breaking down EG "/usr/bin/gcc" != "gcc" Now we attempt to + execute whatever CC we are going to use and grep its version string. We set a + CC_COMMON_NAME \in {"gcc", "icc", "xlc", "unknown"} based upon the above and + later check CC_COMMON_NAME inplace of CC to set compiler specific flags. + + * 14432aa0 src/linux-timer.c src/papi.c: Minor Coverity fixes. Thanks, Will + Cohen. + +2012-12-07 + + * ba5e83d4 src/papi_user_events.c: papi_user_events.c: Fix memory leak. + Reported by William Cohen as detected by the coverity tool. + + * 166498a8 src/components/nvml/linux-nvml.c: nvml component: fix + detectDevices() The routine detectDevices() always returned with the error + PAPI_ESYS when there was a device available. This resulted in that there were + no nvml events available. Fixed. + + * 11ad5894 src/components/nvml/linux-nvml.c: nvml component: add missing + variable declaration In the routine _papi_nvml_init_componen(), the variable + papi_errorcode was not declared which prevented this component to build. + Added declaration of papi_errorcode as int. + +2012-12-06 + + * 9567dfef src/ftests/first.F src/ftests/second.F: Fix warning messages + issued by gfortran 4.6.x regarding loss of precision when casting REAL to + INT. Thanks to Heike for identifying the proper intrinsics. + + * 72588227 src/papi.c src/papi.h: Add PAPI_get_eventset_component() to get + the component index from an event set. This is symmetric with + PAPI_get_event_component which extracts the information from an event. In + response to a request from John Mellor-Crummey. + + * 2e055d40 src/components/rapl/linux-rapl.c: Fix a compiler warning about a + possibly uninitialized return value. + +2012-12-05 + + * 1aae2246 src/utils/command_line.c: Reformat the floating point output + string to recognize that you can't cast the *value* of a long long to a + double and expect to get the right answer; you need to cast the *pointer* to + a double, then everything works. + + * 0e834fc2 src/utils/command_line.c: Incorporated use of the new + PAPI_add_named_event API. Restructured output to support formatted printing + of built-in DATATYPEs: UINT64 prints as unsigned followed by (u); INT64 + prints as signed; FP64 prints as float (but I don't like the default format); + BIT64 prints a hex, prefixed by '0x'. Also if info.units is not empty, units + are appended to output values. These features can be demo'd with the RAPL + component. + + * af6abec2 src/papi.h: Rearranged DATATYPE enums so INT64 is now default (0) + value. Also added a BIT64 type for unspecified bitfields. + +2012-12-04 + + * 862033e0 src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/IOunit/linux-IOunit.h + src/components/bgpm/L2unit/linux-L2unit.c...: Resolved multiple components + conflict on BG/Q when overflow is enabled for multiple events from different + components at the same time. + + * 44744002 src/utils/command_line.c: Add -x and -u options to + papi_command_line to allow printing counter values in hexadecimal and + unsigned formats. + +2012-11-30 + + * 25a914c5 src/papi_user_events.c: Cleanup unused variable warnings in + user_events code. + +2012-11-28 + + * 9a75f872 src/Rules.pfm4_pe src/configure src/configure.in: Cleanup the + build under icc. libpfm4's build system uses a gcc specific flag, + -Wno-unused-parameter. It does this via a variable, DBG, in config.mk: + DBG?=-g -Wall -Werror -Wextra -Wno-unused-parameter The Intel compiler + doesn't understand -Wno-unused-parameter and complains about it. In + Rules.pfm4_pe we set DBG for icc builds. + +2012-11-27 + + * 4def827b src/configure src/configure.in: Fix the perfctr build that was + breaking due to missing CPU Mark Gates was reporting PAPI 5 wasn't running + properly on Keeneland. It looks like some CPU cleanups in the configure code + broke things. Hopefully this helps the situation. + +2012-11-21 + + * 4316f172 src/perf_events.c: perf_events: get rid of "PAPI Error: Didn't + close all events" error This was more meant as a warning; it could trigger + when closing an EventSet that had an event partially added but failed for + some reason. + + * 671e10bd src/utils/command_line.c: papi_command_line: fix error output The + error messages got a bit weird looking due to the PAPI error printing changes + a while back. + + * 959afa49 src/papi_internal.c: Fix _papi_hwi_add_event to report errors back + to user. Previously _papi_hwi_add_event would report all errors returned by + add_native_events() as being PAPI_ECNFLCT even though add_native_events() + returned a wider range of errors. + + * 8ecb70ba src/perf_events.c: Have perf_event return PAPI_EPERM rather than + PAPI_ECNFLCT if the kernel itself returns EPERM + + * 9053ca1c src/perf_events.c: Work around kernel issue with + PERF_EVENT_IOC_REFRESH It's unclear exactly the best way to restart + sampling. Refreshing with 1 is the "official" way as espoused by the kernel + developers, but it doesn't work on Power. 0 works for Power and most other + machines, but the kernel developers say not to use it. This makes power user + 0 until we can figure out exactly what is going on. + + * e85df04b src/components/appio/tests/appio_test_socket.c: - added support + distinguishing between network and file I/O. - added events to measure + statistics for sockets - updated README + +2012-11-15 + + * 248694ef src/x86_cpuid_info.c: Update x86_cpuid_info code for KNC. On + Knight's Corner the leaf2 code returns 0 for the count value. We were + printing a warning on this; better would be to just skip the cache detection + code if we get this result. + +2012-11-08 + + * 82c93156 src/linux-bgp-memory.c src/linux-bgp.c src/linux-bgp.h: There was + more cleaning up necessary in order to get PAPI compiled on BG/P. It should + work now with the recommended configure steps described in INSTALL. + +2012-11-07 + + * 77da80b3 src/Makefile.inc src/configure src/configure.in...: Make BGP use + papi_events.csv This was easier than trying to clean up the + linux-bgp-preset-events.c file to have the proper file layout. + + * fc8a4168 src/linux-bgp.c: Fix some linux-bgp build issues. No one has + tried compiling after all the PAPI 5.0 changes so many bugs slipped in. + + * c16ef312 src/ctests/perf_event_uncore.c: Fix type warnings in + perf_event_uncore test. + + * 3947e9c8 src/ctests/perf_event_uncore.c: Put a bandaid on the + perf_event_uncore test. Check for an Intel family 6 model 45 processor + (sandybridge ep) before executing the test. + +2012-09-27 + + * a23d95f8 src/papi.c src/papi.h src/papi_fwrappers.c...: Mark some comments + @htmlonly. This cleans up what man pages are generated. + +2012-11-07 + + * d239c350 src/Makefile.inc src/Rules.pfm4_pe: Factor out duplicate install + code from Rules.pfm4_pe The Makefile.inc has a rule to installed shared + libraries. However, Rules.pfm4_pe also has a slightly different set of rules + to install code for shared libraries. This leads to the same shared library + being installed under two different names. The duplicate code has been + removed from Rules.pfm4_pe and a symbolic link has been added to ensure that + any code that might have linked with + libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE) still runs. + +2012-10-30 + + * fcc64ff9 src/papi_events.csv: Add PAPI_HW_INT event for IvyBridge + +2012-10-26 + + * ef89fc56 src/papi_events.csv: MIC: update PAPI_FP_INS / PAPI_VEC_INS + instruction We were using VPU_INSTRUCTIONS_EXECUTED for PAPI_FP_INS but + really it's more appropriate for PAPI_VEC_INS This leaves PAPI_FP_INS + undefined, which breaks a lot of the ctests. A long term goal should probably + be modifying the tests to use another counter if PAPI_FP_INS isn't available + (this affects Ivy Bridge too). + +2012-10-25 + + * 975c03f1 src/perf_events.c: perf_event: fix granularity bug cut-and paste + error in the last set of changes. Would have meant if you tried to + explicitly set granularity to thread you'd get system instead. + + * 3cd3a62d src/configure src/configure.in src/ctests/Makefile...: Add + perf_event_uncore ctest Also add a new type of ctest, perf_event specific + In theory we should have configure only enable this if perf_event support is + being used. + + * 5ee97430 src/perf_events.c: perf_event: add PAPI_DOM_SUPERVISOR to allowed + perf_event domains perf_event supports this domain but since we didn't have + it in the list PAPI wasn't letting us set/unset this. This is needed for + uncore support, as for uncore domain must be set to allow monitoring + everything. + + * c9325560 src/perf_events.c: perf_event enable granularity support Add + support for PAPI_GRAN_SYS to perf_event. This is needed for uncore support. + +2012-10-18 + + * 59d3d758 src/mb.h src/perf_events.c: Update the memory barriers It turns + out PAPI fails on older 32-bit x86 machines because it tries to use an SSE + rmb() memory barrier. (Yes, I'm trying to run PAPI on a Pentium II. Don't + ask) It looks like our memory barriers were copied out of the kernel, which + doesn't quite work because it expects some kernel infrastructure instead. + This patch uses the definitions used by the "perf" tool instead. Also + dropped the use of the mb() memory barrier on mmap tail write, as the perf + tool itself did a while ago so I'm hoping it's safe to do so as well. It + makes these definitions a lot simpler. + +2012-10-08 + + * bcdce5bc src/perf_events.c: perf_event: clarify an error message The + message was saying detecting rdpmc support broke, but the real error is that + perf_events itself is totally broken on this machine and it's just rdpcm was + the first code that tried to access it. + +2012-10-02 + + * 3bb3558f src/mb.h: Update memory barries for Knights Corner Despite being + x86_64 they don't support the SSE memory barrier instructions, so add a case + in mb.h to handle this properly. + +2012-10-01 + + * 38a5d74c src/libpfm4/README src/libpfm4/docs/Makefile + src/libpfm4/docs/man3/libpfm_intel_atom.3...: Merge libpfm4 with Knights + Corner Support + + * bf959960 src/papi_events.csv: Change "phi" to "knc" to match libpfm4 for + Xeon Phi / Knights Corner support + +2012-09-20 + + * d9249635 ChangeLogP501.txt RELEASENOTES.txt: Update releasenotes and add a + changelog for 5.0.1 + + * a1e30348 man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Rebuild the manpages for a 5.0.1 release. diff --git a/ChangeLogP511.txt b/ChangeLogP511.txt new file mode 100644 index 0000000..ebe6a3d --- /dev/null +++ b/ChangeLogP511.txt @@ -0,0 +1,176 @@ +2013-05-21 + + * 602d8dbc man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Rebuild man pages for a 5.1.1 release. + + * 93d9be34 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version + number for a 5.1.1 release. + +2013-04-15 + + * 8e47838d src/components/cuda/linux-cuda.c: When creating two event sets - + one for the CUDA and one for the CPU component - the order of event set + creation appears crucial. When the CPU event set has been created before the + CUDA event set then PAPI_start() for the CUDA event set works fine. However, + if the CUDA event set has been created before the CPU event set, then + PAPI_start(CUDA_event_set) forces the CUDA control state to be updated one + more time, even if the CUDA event set has not been modified. The CUDA control + state function did not properly handle this case and hence cause PAPI_start() + to fail. This has been fixed. + +2013-05-13 + + * c93dfa68 src/perf_events.c: perf_event component: update error returns + This passes more error return values back to PAPI. Before this change a lot + of places were hardcoded to PAPI_EPERM even if sys_perf_event_open() was + reporting a different error. + +2013-05-08 + + * d1db58e8 src/configure src/configure.in: Force the use of pthread_mutexes + on ARM This lets the system libraries worry about the best way to define + mutexes, rather than trying to hand-code in assembly around all of the + various issues there are with atomic instructions in the ARM architecture. + It might make sense to enable this for *all* Linux architectures, but for now + just do it for ARM. + + * 29662e3e src/linux-lock.h: Commit 59d3d7584b2925bd05b4b5d0f4fe89666eb8494a + removed the definition of mb(). mb() was defined as rmb(). This just + corrects it back. (Note from VMW -- this fixes some things, but ARM still + won't build on a Cortex A9 pandaboard due to the use of the "swp" + instruction. Proper fix is probably to enforce posix-mutexes on ARM) + +2013-04-22 + + * ff29fd12 src/run_tests.sh: The test for determining whether to run valgrind + was backwards. Correcting that allow the run_test.sh script to stay the same + and one just needs to define "VALGRIND=yes" (or any non-null string) to make + run_test.sh use valgrind. --- src/run_tests.sh | 6 ++---- 1 file changed, 2 + insertions(+), 4 deletions(-) diff --git a/src/run_tests.sh + b/src/run_tests.sh index d1ce205..9337ff2 100755 --- a/src/run_tests.sh +++ + b/src/run_tests.sh @@ -19,10 +19,8 @@ else export TESTS_QUIET fi -if [ + "x$VALGRIND" = "x" ]; then -# Uncomment the following line to run tests using + Valgrind -# VALGRIND="valgrind --leak-check=full"; - VALGRIND=""; +if [ + "x$VALGRIND" != "x" ]; then + VALGRIND="valgrind --leak-check=full"; fi + #CTESTS=`find ctests -maxdepth 1 -perm -u+x -type f`; -- + +2013-03-28 + + * 1e8101f6 src/run_tests.sh: run_tests.sh: further refine component test find + Exclude *.cu when looking for component tests. + +2013-03-25 + + * 0b600bc5 src/run_tests.sh: run_tests.sh: File mode changes. run_tests.sh + is now expected to run from the install location in addition to src. The + script tried to remove execute from *.[c|h], now it just excludes *.[c|h] + from the find commands. + +2013-03-18 + + * 06f9c43b src/perfctr-x86.c: perfctr: don't read in event table multiple + times papi_libpfm3_events.c now reads in the predefined events, we don't + also need to do this in perfctr setup_x86_presets() + + * 48d7330c src/perfctr.c: Fix segfault in perfctr.c The preset lookup uses + the cidx index, but in perfctr.c we weren't passing a cidx value (it was + being left off). The old perfctr code plays games with defining extern + functions so the compiler wasn't giving us a warning. + +2013-03-14 + + * eda94e50 src/components/bgpm/L2unit/linux-L2unit.c src/linux-bgq.c: If a + counter is not set to overflow (threshold==0; happens when PAPI_shutdown is + called) then we do not want to rebuild the BGPM event set, even if the event + set has been used previously and hence "applied or attached". Usually if an + event set has been applied or attached prior to setting overflow, the BGPM + event set needs to be deleted and recreated (which implies malloc() from + within BGPM). Not so, though, if threshold is 0 which is the case when + PAPI_shutdown is called. Note, this only applies to Punit and L2unit, not + IOunit since an IOunit event set in not applied or attached. + +2013-03-13 + + * 46f6123a src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/IOunit/linux-IOunit.h + src/components/bgpm/L2unit/linux-L2unit.c...: Overflow issue on BG/Q + resolved. Overflow with multiple components worked; overflow with multiple + components and multiple events did not work as supposed to. + +2013-03-07 + + * 6a0813f8 src/linux-common.c src/linux-memory.c: Fix the build on + Linux-SPARC I dug out an old SPARC machine and fixed the PAPI build on it. + + * 51fe7e53 src/perf_events.c: More comprehensive sys_perf_open to PAPI error + mappings This tries to cover more of the errors returned by sys_perf_open + and map them to better results. EINVAL is a problem because it can mean + Conflict as well as Event not found and many other things, so it's unclear + what to do with it. + + * 1479a67f src/perf_events.c src/sys_perf_event_open.c: Return proper error + codes for sys_perf_event_open For some reason on x86 and x86_64 we were + trying to set errno manually and thus over-writing the proper errno value, + causing all errors to look like PAPI_EPERM This removes that code, as well + as adds code to report ENOENT as PAPI_ENOEVENT. With this change, on IVY + this happens which looks more correct. ./utils/papi_command_line + perf::L1-ICACHE-PREFETCHES Failed adding: perf::L1-ICACHE-PREFETCHES because: + Event does not exist command_line.c PASSED + +2013-03-06 + + * 7a3e75e8 src/papi_libpfm4_events.c src/papi_user_events.c: Coverity fixes: + Coverity pointed out that there was a case where load_user_eent_table() could + leak memory. The change in the location of the papi_free(foo) ensures that + the allocated memory is freed. Coverity pointed out one path through the + code in _papi_libpfm4_ntv_code_to_descr() that did not free up memory + allocated in the function. Added a free on the path in free up that memory. + Thanks Will Cohen. + +2013-03-04 + + * b19bd1a2 src/components/rapl/linux-rapl.c: Remove a stray debug statement. + Thanks to Harald Servat for catching this. + +2013-03-01 + + * 6e5be510 src/utils/command_line.c: Wrestled some horribly convoluted + indexing into shape. The -u and -x options now print as expected (I think). + +2013-01-31 + + * 02bd70ad src/components/nvml/linux-nvml.c: linux-nvml.c: Fix type warning. + CUDA and NVML have an signed vs unsigned thing going on in their returned + device counts, cast away the warning. + +2013-01-23 + + * a5bed384 src/linux-memory.c src/linux-timer.c: ia64 fixes. Thanks to Tony + Jones for patches. + +2013-01-16 + + * 021db23a src/components/nvml/linux-nvml.c: nvml component: cleanup a memory + leak We did not free a buffer at shutdown time. + +2013-05-17 + + * b25fc417 src/perf_events.c: perf_event: allow running with + perf_event_paranoid is 2 perf_event_paranoid set to 2 means allow user + monitoring only (no kernel domain). The code before this mistakenly disabled + all events in this case. Also set the allowed domains to exclude + PAPI_DOM_KERNEL. + +2013-05-16 + + * 12768bec src/papi_events.csv: papi_events.csv Revert a little mishap in + adding ivbep support Somehow the contents of papi_hl.c ended up in the + events file. + + * 5e97ad7f src/papi_events.csv: Add identifier for ivb_ep + +2013-01-29 + + * e201b8eb src/papi.c: General doxygen cleanup: remove all "No known bugs" + messages; correct and cleanup examples for PAPI_code_to_name and + PAPI_name_to_code diff --git a/ChangeLogP520.txt b/ChangeLogP520.txt new file mode 100644 index 0000000..5d0577f --- /dev/null +++ b/ChangeLogP520.txt @@ -0,0 +1,1222 @@ +2013-08-02 + + * 6b62d586 man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Update the manpages for a pending 5.2 + release. New pages for PAPI[F]_epc and papi_version. + + * 1ae08835 src/linux-common.c: try to properly detect number of sockets Use + totalcpus rather than ncpu in the calculation. This change fixes things on a + Sandybridge-EP machine. We should maybe find a more robust way to detect + this. + + * 79c37fbf .../perf_event_uncore/tests/perf_event_uncore.c + .../tests/perf_event_uncore_multiple.c: perf_event_uncore: have tests skip if + component disabled rather than fail + + * 638ccf6b .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + change order of uncore detection logic This way it will report an error of + "no uncore found" before it reports "not enough permissions". That way a + user won't waste time getting permissions only to find out they didn't have + an uncore anyway. + + * 30582773 src/components/perf_event/pe_libpfm4_events.c: perf_event: fix + papi_native_avail output A recent change of mine that added stricter error + checking for libpfm4 event lookup broke event enumeration on perf_event, + specifically papi_native_avail output. libpfm4 will return an error on some + events if no UMASK or improper UMASK is supplied, but papi_native_avail + always wants to print the root event and umasks separately. this temporary + fix just ignores libpfm4 umask errors; we might in the future want to + properly indicate which events are only valid when certain umasks are + present. + + * c7612326 src/utils/native_avail.c: papi_native_avail: fix empty component + case If a component had no events, papi_native_avail would ignore the error + returned by PAPI_enum_cmp_event( PAPI_ENUM_FIRST ); and try to print a first + event anyway. + + * e1b064eb .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + disable component if no events found This can happen on older (pre 3.6) + kernels with the new libpfm4 that does proper uncore detection. + +2013-08-01 + + * 9a54633a src/components/host_micpower/linux-host_micpower.c + src/components/infiniband/linux-infiniband.c + src/components/nvml/linux-nvml.c...: Components: Use the cuda dlopen fix all + cases. See 4cb76a9b for details, the short version is if you call dlopen + when you have been statically linked to libc, it gets ugly. + +2013-07-31 + + * dbc44ed1 src/components/perf_event/pe_libpfm4_events.c + .../perf_event_uncore/perf_event_uncore.c + .../perf_event_uncore/peu_libpfm4_events.c: perf_event libpfm4 events -- + correctly handle invalid events It was possible for event names to be + obtained from libpfm4 during enumeration that were not valid events. This + usually happens with uncore events, where the uncore is listed as available + based on cpuid but when libpfm4 tries to get the uncore type from the kernel + finds out it is unsupported. This change makes this properly fail, instead + of just returning "0" for all the event paramaters (which is a valid event on + x86). Also make this change in the regular perf_event component, even though + it is less likely to happen in practice. + + * 4720890a .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + remove check_permissions() test It was trying to see if an EventSet was + runnable by using the current permissions and adding the PERF_HW_INSTRUCTIONS + event. That doesn't really make sense on uncore. The perf_event component + uses this test to try to give errors early, at set_opt() time rather than at + the first run time, although in practice now we can probably make intelligent + guesses based on the current permission levels. + + * 113d35f7 .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + remove unused kernel workarounds uncore only works on Linux 3.6 or newer so + all of the pre-2.6.35 workarounds aren't necessary. If someone has + backported the uncore support to kernels that old, hopefully they've also + backported all the other bugfixes too. + +2013-07-25 + + * 4cb76a9b src/components/cuda/linux-cuda.c: Trial fix for the cuda component + static libc linking issue. Weak link against _dl_non_dynamic_init, this + appears in my limited testing to be in gnu libc.a and not in the so. For + background, it was reported by Steve Kaufmann that statically linking tools + with a PAPI library configured with the CUDA component segfaulted. It appears + that calling any of the dynamic linker functions from a static executable is + asking for pain. See Trac bug 182 + https://icl.cs.utk.edu/trac/papi/ticket/182 + +2013-07-24 + + * ad47cfb9 src/configure src/configure.in: Add linux-pfm-ia64 to configure + I'm not sure if this is enough to fix itanium support but it's a start. + + * 098294c5 src/components/example/tests/example_basic.c + .../example/tests/example_multiple_components.c: Fixed tests for example + component. Both tests failed due to incorrect check of the components PAPI + has been configured with. + +2013-07-23 + + * c0c4caf4 src/linux-memory.c src/papi_events.csv: Add initial support for + IBM POWER8 processor Add initial support for IBM POWER8 processor The IBM + POWER8 processor (to be publicly announced at some future date) has some + preliminary support in libpfm with a subset of native events. These + POWER8-related libpfm changes were pulled into PAPI on July 3, so further + updates in PAPI were required to support this new processor. This patch adds + that required support. NOTE: Due to the fact that only a subset of native + events have been publicised at this point (and pushed into libpfm), not all + of the usual PAPI preset events have corresponding native events. The rest of + the POWER8 native events will be pushed upstream once they are verified, and + then we can flesh out the PAPI preset events. With this initial POWER8 + support patch, 5 of the ctests and ftests fail, compared to 3 when PAPI is + run on a POWER7. At least one of the failing testcases is due to testing + being done on an early POWER8 processor with some known hardware problems. We + presume the number of failing tests will decrease once we have GA-level + hardware to test on. + +2013-07-22 + + * 6c231d1a src/configure: Rerun autoconf for f4ec143e Correct versioning of + libpapi.so + + * f4ec143e src/configure.in: Correct versioning of libpapi.so The configure + for linux always set the soname to libpapi.so. This causes problems when + /sbin/ldconfig tries to update the library information on linux. The shared + library is installed as /lib{64}/libpapi.so.$VERSION, but the shared library + has the soname of libpapi.so. ldconfig makes a symbolic link from + /lib/libpapi.so to the actual versioned shared library, + /lib/{64}/libpapi.so$VERSION. The configure should get the soname correct to + avoid creating this symbolic link. This patch only addresses the issues for + some of the possible platforms and similar patches may be needed for other + platforms. + +2013-07-19 + + * 92356bbd src/papi.c src/threads.c src/threads.h: Attempt to fix a memory + leak in fork2 test. Fork2 does the following: PAPI_library_init() fork(); / + \ parent child wait() PAPI_shutdown() -> + _papi_hwi_shutdown_global_threads() -> foreach(threadinfo we allocated): + _papi_hwi_shutdown_thread() PAPI_library_init() _papi_hwi_shutdown_thread + checks who allocated a ThreadInfo entry in the global list, and will only + free it if our thread did the allocation. When threading is not initialized, + we fall back to getpid(), now in the child process, the one ThreadInfo item + on the list was allocated by our parent, so at shutdown time we don't free + this, and thus leak it. Solution is to add a parameter to + _hwi_shutdown_thread to force shutdown even if we didn't allocate it. At + _papi_hwi_shutdown_global_threads() time, who cares, its closing time. + + * c04d908e src/cpus.c: Fix a deadlock in _papi_hwi_lookup_cpu(). If cpu_num + is not found by _papi_hwi_lookup_cpu(), _papi_hwi_initialize_cpu() calls + insert_cpu(), which locks CPUS_LOCK, which was already held by + _papi_hwi_lookup_cpu(). + + * efac24c4 src/components/micpower/linux-micpower.c: micpower: fix return + value check Also add a time check at stop time. + +2013-07-16 + + * b9fd9dd1 src/configure src/configure.in: configure: Fix AIX build + perfctr_ppc was not the only system that relied on ppc64_events.h, power*.h, + and friends. First run at a fix is -Icomponents/perfctr_ppc for the C and F + flags... + + * 46042e68 src/components/micpower/linux-micpower.c: micpower: update some + indexing code + +2013-07-15 + + * 5220e7d2 INSTALL.txt: INSTALL.txt: typo --with-arch=, not --arch=; Thanks + to Karl Schulz for catching this. + + * 207e0ee0 src/papi_libpfm_events.h: papi_libpfm_events: needs include files + for types. Include papi.h and papi_vector.h for papi_vector_t and + PAPI_component_info_t + + * d96c01c7 src/components/perfctr/perfctr.c: perfctr: cleanup a warning + Include papi_libpfm_events.h for _papi_libpfm_init() decl. + + * 367e1b38 src/components/perfctr/perfctr-x86.c + src/components/perfctr/perfctr.c: perfctr: refactor out setup_x86_presets + The setup_presets function served only to call _papi_libpfm_init, so we go + the rest of the way and completly remove the function, calling + _papi_libpfm_init directly from _perfctr_init_component. + + * 1ba38ce5 src/components/perfctr/perfctr-x86.c: perfctr: cleanup unused + parameter warning. The perfctr code was refactored to only call into the + table loading code one time. This had the side effect of removing most of + what setup_x86_presets does. + + * 02710ced src/configure src/configure.in: configure: remove debugging + message The compiler detection code had a stray AC_MSG_RESULT. + +2013-07-12 + + * 028ce29d src/components/lustre/linux-lustre.c: lustre: use whole directory + name as event Gary Mohr reported that on a trial system he was seeing many + events of the form fs3-* which were all chopped to fs3, not helpful. I've + not actually been able to figure out exactly how lustre names things, I've + seen it described as - But have no clue what uid promisses. + +2013-07-15 + + * 129d4587 src/papi.c: allow more than one EventSet attach to a CPU at a time + This is necessary for perf_event_uncore support, as multiple uncores will + want to attach to a CPU. It looks like this change won't break anything, and + the tests pass on my test machines. I am a bit concerned about + cpu->running_eventset, though no one seems to use that value... + + * bcda5ddd src/components/perf_event_uncore/tests/Makefile + .../tests/perf_event_uncore_nogran.c: perf_event_uncore: remove + perf_event_uncore_nogran test It is unnecessary after recent changes to the + uncore component. + + * b1b9f654 src/components/perf_event_uncore/tests/Makefile + .../tests/perf_event_uncore_cbox.c: perf_event_uncore: add + perf_event_uncore_cbox test This adds a non-trivial test of the CBOX + uncores. It turned up various bugs in the PAPI uncore implementation. + + * df1b6453 src/linux-common.c: linux: properly set hwinfo->socket value It + was being derived from hwinfo->ncpu but being calculated before hwinfo->ncpu + was set. + +2013-07-13 + + * ee537448 .../perf_event_uncore/perf_event_uncore.c + .../perf_event_uncore/peu_libpfm4_events.c + .../perf_event_uncore/peu_libpfm4_events.h: perf_event_uncore: properly + report number of total counters available + + * 7eb93917 src/components/perf_event/Rules.perf_event + src/components/perf_event/pe_libpfm4_events.c + src/components/perf_event/pe_libpfm4_events.h...: + perf_event/perf_event_uncore/libpfm4 -- rearrange files Give perf_event and + perf_event_uncore copies of papi_libpfm4_events to work with, as they will + have different needs for the code. Get rid of the perf_event_lib stuff. It + was a hack to begin with and in the end not much code will be shared. Maybe + we can re-share things once uncore support is complete. + +2013-07-12 + + * 6810af2a src/components/perf_event/perf_event.c + .../perf_event_uncore/perf_event_uncore.c src/papi_libpfm4_events.c...: + papi_libpfm4: properly call pfm_terminate() in papi_libpfm4_shutdown + + * 010497f4 src/components/perf_event/perf_event.c + .../perf_event_uncore/perf_event_uncore.c src/papi_libpfm4_events.c...: split + papi_libpfm4_init() split this function because the perf_event_uncore() + component is going to want to initialize things differently than plain + perf_event + + * d9023411 src/components/perf_event/perf_event.c: perf_event: on old kernels + if SW Multiplex enabled, then report proper number of MPX counters available + it may be different than the amount HW supports + + * 7595a840 src/components/perf_event/perf_event_lib.c: perf_event: use + PERF_IOC_FLAG_GROUP when resetting events This ioctl argument specifies to + reset all events in a group, so we don't have to iterate. This argument + dates back to the introduction of perf_event and it makes the code a bit + cleaner. + + * f220fd19 src/ctests/Makefile src/ctests/reset_multiplex.c: Add + reset_multiplex.c PAPI_reset() potentially exercises different paths when + resetting normal and multiplexed eventsets, so make sure we test both. + + * f784a489 src/components/lustre/linux-lustre.c: lustre: botched a conflict + resolution properly do error checking on addCounter() + + * c1350fc8 src/components/perf_event/perf_event.c + src/components/perf_event/perf_event_lib.c + src/components/perf_event/perf_event_lib.h: perf_event: move overflow and + profile code out of common lib the perf_event_uncore component doesn't need + it + + * 8dde03fc .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + remove profiling and overflow code perf_event doesn't support sampling or + overflow on uncore + + * 30d23636 src/components/lustre/linux-lustre.c: lustre component: Several + fixes 1. create a dynamic native events table in pathalogical cases, lustre + can have lots of events. 2. resolve some warnings change signature of + init_component properly error check addCounter 3. Add a preprocessor flag to + fake interface Set LIBCFLAGS="-DFAKE_LUSTRE" + + * 7ef51566 .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + remove dispatch timer call perf_event doesn't support sampling on uncore + events + + * 667661c6 src/components/perf_event/perf_event.c + src/components/perf_event/perf_event_lib.c + src/components/perf_event/perf_event_lib.h: perf_event: move rdpmc detection + back into perf_event.c It was in the perf_event_lib but uncore won't use the + feature. + + * d46f01e1 .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + check the paranoid file Disable the component if paranoid isn't 0 or lower, + and we're not running as root. + + * e4ec67d1 src/components/perf_event/perf_event.c: perf_event and paranoid + level 2 If paranoid level 2 (no kernel events) was set we were removing + PAPI_DOM_KERNEL from the allowable domains We were doing this even if the + user was root. This code checks for uid 0 and overrides the restriction. + + * c5501081 src/components/perf_event/perf_event_lib.c + src/components/perf_event/perf_event_lib.h: rename sys_perf_event_open2() + call back to sys_perf_event_open() This was changed when merging code to + avoid a conflict but wasn't renamed back whe the conflict was fixed. + +2013-07-11 + + * e263ea60 src/configure src/configure.in: configure: libpfm selection logic + rework If configure detected perfctr it would force libpfm3 to be used, even + with --with-perf_events, now force libpfm4 if perf_events is requested. + +2013-07-10 + + * 7a3ce030 .../host_micpower/Makefile.host_micpower.in + src/components/host_micpower/Rules.host_micpower + src/components/host_micpower/configure...: Component: host_micpower This is + a component that exports power information for Intel Xeon Phi cards (MIC). + The component makes use of the MicAccessAPI distributed with the Intel + Manycore Platform Software Stack. + + k-mpss) + + * 9d9bd9c2 src/ctests/shlib.c: Fwd: Re: [Ptools-perfapi] ctests/shlib FAILED + Should have sent this to the papi devel list. -Will -------- Original + Message -------- Subject: Re: [Ptools-perfapi] ctests/shlib FAILED Date: Tue, + 09 Jul 2013 23:20:10 -0400 From: William Cohen To: + ptools-perfapi@eecs.utk.edu On 03/09/2012 03:40 PM, William Cohen wrote: > I + was looking through the test results and found that ctests/shlib FAILED on + all the machines I tested on because libm shared library is already linked + in. There is no difference in the number of shared libraries before and after + the dlopen. The test ctests/shlib fails as a reult of this. > > -Will > + _______________________________________________ > Ptools-perfapi mailing list + > Ptools-perfapi@eecs.utk.edu > + http://lists.eecs.utk.edu/mailman/listinfo/ptools-perfapi > I did some more + investigation of this problem today. I found that the lmsensor component + implicitly pulls in the libm. As an alternative, I wrote the attached patch + that uses setkey() and encrypt() in libcrypt.so instead. It works on various + linux machines, but I do not know whether it is going to work on other OS. + -Will >From c53c97e1de2d1c7dc0bca64d1906287ff73343c6 Mon Sep 17 00:00:00 + 2001 From: William Cohen Date: Tue, 9 Jul 2013 22:37:27 + -0400 Subject: [PATCH] Avoid using libm.so for ctests/shlib because of + implicit use in some components The lmsensors component can implicitly pull + in libm.so into the executable. Unfortunately, the ctests/shlib test expects + that libm.so is not loaded and will fail because there is no change in the + count of shared libraries. The patch uses libcrypt.so library setkey and + encrypt functions to test PAPI_get_shared_lib_info( ) instead of libm.so + library pow function. + +2013-07-09 + + * bdc9b34b .../tests/perf_event_amd_northbridge.c: + Perf_event_amd_northbridge_test: Use buffer event_name instead of + uncore_event The variable uncore_event is initialized to NULL and is never + changed during execution of the test. PAPI_add_named_event fails and the + event set cannot be started. The correct event name is stored in event_name, + replacing all occurrences of uncore_event with event_name therefore fixes the + problem metioned above. + +2013-07-08 + + * a1678388 src/components/micpower/linux-micpower.c: micpower: Fix output in + native_avail and component_avail. It uses cmp_info.name, not .short_name? + Native Events in Component: mic-power Name: mic-power + Component for reading power on Intel Xeon Phi (MIC) Should both match what + is prepended to event names, so change .name from mic-power to micpower. + + * e0582f2d src/components/micpower/linux-micpower.c: Micpower: fix a typo + subsystem, not sybsystem... + + * c7b357ec INSTALL.txt: INSTALL.txt: update instructions for MIC. + + * 34a1124e src/components/perf_event_uncore/tests/Makefile + .../tests/perf_event_amd_northbridge.c: Add perf_event_amd_northbridge test + The test should show how to write a program using AMD fam15h NB with a 3.9 + kernel. Once libpfm4 gets updated we can see if it's possible to also have + the test properly run on 3.10 kernels (in that case the regular + perf_event_uncore test should work w/o changes) + + * 41b6507c .../perf_event_uncore/tests/perf_event_uncore.c + .../tests/perf_event_uncore_multiple.c: Make perf_event_uncore tests use + PAPI_get_component_index() They were open-coding the component name search + for no good reason. + +2013-07-05 + + * abf38945 src/papi_libpfm4_events.c: avoid having a "default" PMU for the + uncore component on the main CPU component we have a "default" PMU where you + can leave out the PMU part of the event name. This is unnecessary and + sometimes confusing on uncore, so always print the full event name if it's an + uncore PMU. + + * b9fe5c3e .../perf_event_uncore/tests/perf_event_uncore.c + .../tests/perf_event_uncore_multiple.c: Update perf_event_uncore tests to + properly fail if they don't have enough permissions + + * 32ae1686 .../perf_event_uncore/tests/perf_event_uncore.c: + perf_event_uncore_test : properly use uncore component The sample code was + still hardcoding to component "0" which shouldn't have worked. Thanks to + Claris Castillo for pointing out this problem. + + * 59e73b51 src/papi_libpfm4_events.c: have _papi_libpfm4_ntv_name_to_code + properly check pmu_type With the existing code, uncore events were being + found by the perf_event component even when that component has uncore events + distabled. + +2013-07-03 + + * a01394eb .../tests/perf_event_uncore_lib.c: perf_event_uncore: fix ivb + event in uncore test Now that libpfm4 officially supports plain ivb uncore, + make sure the test event we were using matches what libpfm4 supports. + +2013-07-01 + + * f10342a8 src/utils/cost.c: Clean up option handling in papi_cost The + papi_cost used strstr to seach for the substring that matched the option. + this is pretty inexact. Made sure that the options matched exactly and the + option argments for -b and -t were greater than 0. Also make papi_cost print + out the help if there was an option that it didn't understand. + + * b5adc561 src/utils/native_avail.c: Clean up option handling for + papi_native_avail Corrected the help to reflect the name of the option + "--noumasks". Print error message if the "-i", "-e", and "-x" option + arguments are invalid. Avoid using strstr() for "-h", use strcmp instead. + Also check for "--help" option. + + * 8933be9b src/utils/decode.c: Clean up option handling in papi_decode + papi_decode used strstr() to match options; this can lead to inexact matchs. + The code should used strcmp instead. Make sure command name is not processed + as an option. Also print help iformation is some argument is not understood. + + * d94ac43a src/utils/component.c: Improve option matching in papi_component + and add "--help" option + + * bb63fe5c src/utils/command_line.c: Add options to papi_command_line man + page and improve opt handling Add options mention in the -h to the man page. + Also improve the matching of the options. + + * 09059c82 doc/Makefile src/utils/version.c: Add information for papi_version + to be complete + + * 4f2eee8c src/configure src/configure.in: add a --disable-perf-event-uncore + option to configure + +2013-06-29 + + * 901c5cc2 src/components/perf_event/perf_event.c + src/components/perf_event/perf_event_lib.c + .../perf_event_uncore/perf_event_uncore.c...: remove syscalls.h it's no + longer needed + + * 4d7e3666 src/Rules.perfmon2 src/components/perfmon2/Rules.perfmon2 + src/components/perfmon2/perfmon.c...: move perfmon modules to their own + component directory + + * a7e9c5f1 src/Rules.perfctr src/Rules.perfctr-pfm + src/components/perfctr/Rules.perfctr...: move perfctr files to + components/perfctr directory verified that perfctr-x86 still builds and + works perfctr_ppc has all the files to build, but it doesn't work. It looks + like no one has tried to build perfctr-ppc for a very very long time. + +2013-06-27 + + * e9dec1fd src/ctests/hl_rates.c src/papi.h src/papi_fwrappers.c...: debugged + versions of these files + + * e282034e src/utils/native_avail.c: native_avail: Fix parse_unit_mask code + Reported by Steve Kaufmann -------------------------- I noticed while + developing a new component that the output from papi_native_avail was + incorrectly presented for the component. I believe this is because the ":::" + prefix is not being taken into account, so the base event name is interpreted + as a unit mask and is prepend with a : before each legitimate unit mask + associated with the event. I think this is just now happening because mine is + the first component that has unit masks. I have include a fix below. The + output of the unit masks by papi_native_avail now appears correctly for my + component. Thanks, Steve + +2013-06-26 + + * ff096786 src/ctests/fork2.c: fork2: Return fork2 test to its old + functionality Once upon a time fork2 did: PAPI_library_init() … if ( + fork() == 0) PAPI_shutdown() PAPI_library_init() … + +2013-06-25 + + * 978d0d3d src/examples/PAPI_add_remove_event.c src/papi.c: Modify + PAPI_list_events functionality to match documentation. You can now pass in a + NULL event array and a zero count to get back the valid number of events. + This can then be used to allocate the array and retrieve the exact number of + events. Thanks to Nils Smeds and Alain Miniussi for pointing this out. + + * 13c52402 src/examples/PAPI_add_remove_event.c src/papi.c: Modify + PAPI_list_events functionality to match documentation. You can now pass in a + NULL event array and a zero count to get back the valid number of events. + This can then be used to allocate the array and retrieve the exact number of + events. Thanks to Nils Smeds and Alain Miniussi for pointing this out. + + * 656e703e src/ctests/zero_fork.c: zero_fork ctest : make documentation match + code + + * 96aad0c7 src/ctests/forkexec.c: forkexec ctest : make comments match code + + * b7c70953 src/ctests/forkexec4.c: forkexec4 ctest : make comments match the + code + + * 7ffb0245 src/ctests/forkexec3.c: forkexec3 ctest : make documentation match + code + + * 55ea846c src/ctests/forkexec2.c: forkexec2 ctest: have comments match what + source does + + * 7a601e2a src/ctests/Makefile src/ctests/fork2.c: fork2 ctest: remove; was + an exact duplicate of fork + + * 9deff49b src/ctests/fork.c: fork ctest: make comments match what file + actually does + +2013-06-24 + + * 2770d2c5 src/components/perf_event/perf_event_lib.c: perf_event: fix + failure on ARM due to domain settings forgot to git add the perf_event_lib.c + file :( + + * bf7c4c50 src/components/perf_event/perf_event.c + src/components/perf_event/perf_event_lib.h: perf_event: fix failure on ARM + due to domain settings On Cortex A8 and A9 it's not possible to set + exclude_kernel (hardware does not support it). Make sure the rdpmc detection + code doesn't try to set exclude_kernel. + +2013-06-18 + + * 2b1433d8 src/ctests/all_native_events.c src/ctests/get_event_component.c: + ctests: Skip calling into disabled components. This patch fixes a problem + that was causing two test cases to abort when they were run on a system which + has disabled components. Code was added to check if the component is + disabled and just go to the next component in the list when the check is + true. This prevents calls to code in components which may abort because the + component was unable to initialize itself correctly. Thanks to Gary Mohr and + Chuck LaCasse from Bull for reporting. + +2013-06-14 + + * 1872453c src/testlib/do_loops.c: testlib: don't change the iter count The + first argument to do_misses is an iteration count, for some reason the code + was dividing this in half before doing work. Most places that call do_misses + call it as do_misses ( 1, ...) void do_misses( int n, int bytes ) { {...} n + = n / 2; for ( j = 0; j < n; j++ ) { 1/2 == 0; so our do_misses call was + usually not. Thanks Nils Smeds for reporting. + +2013-06-12 + + * c113e5b6 src/components/infiniband/Makefile.infiniband.in + src/components/infiniband/Rules.infiniband + src/components/infiniband/configure...: Infiniband component: switch over to + weak linking Thnaks to Gary Mohr for the patch. + ---------------------------------- The infiniband component needs include + files and libraries from both the infiniband ibmad and ibumad packages. When + these packages are installed on a system, both packages normally install + their files in the same place (includes in /usr/include/infiniband and + libraries in /usr/lib64). The current component configure script allows you + to provide a single include path and a single library path which gets used to + access files from both packages. If these two packages have different + install prefixes (or you are trying to build from install images of each + package which are not located under the same directory) then the configure + script fails because it can not find all the files it needs. These changes + modify the configure script to replace the include and library dir's with an + ibmad_dir and ibumad_dir and then uses the correct packages directory when + looking for includes and libraries from that package. This makes it work + like the cuda and nvml components with respect to configuring how to find + files from a package the component depends on. There are also changes in + this patch file to remove an unneeded variable in the dlopen code to resolve + some defects reported by coverity. + +2013-06-11 + + * d5be5643 src/components/rapl/tests/rapl_basic.c: rapl tests: make the error + messages a little more verbose + + * 0c9f1a8c src/run_tests_exclude.txt src/run_tests_exclude_cuda.txt: + run_tests_exclude files: Exclude a template file + ------------------------------------------- It also adds the cpi.pbs file to + the list of files to excluded when the tests are run. This file is just a + template and attempts to run it hang the run_tests script on our systems. + ------------------------------------------- + + * 0a063619 src/run_tests.sh: run_tests.sh: fix exclude check. The script + failed to remove .cu files, this patch fixes the check. Thanks Gary Mohr for + reporting/patching. + +2013-06-10 + + * 87399477 src/components/cuda/linux-cuda.c: cuda component: Address a + coverity issue The library linking code saved return values in a local var + but never used them. Thanks to Gary Mohr for submitting this patch. + + * 99b5b685 src/components/coretemp/tests/coretemp_basic.c: coretemp_basic: + update test to properly enumerate events The code was old and was searching + the entire native event list for ones that started with "hwmon". This + updates the test to first find the coretemp component, then enumerate all + events contained within. + + * b5c0795b src/components/rapl/tests/rapl_overflow.c: rapl component: address + potential looping issue in test. A rapl component test has a do/while which + only exited when PAPI_add_named_event returned 0 ( and only 0; the PAPI_E* + error codes would not terminate a while( retval ) loop), this felt fragile, + minimal checks are now inplace. + + * 4e9484a5 src/components/rapl/tests/rapl_overflow.c: rapl components: + coverity fixes Reported/patched by Gary Mohr ----------------------------- + The rapl component also has 1 defect in a test case. The complaint is that + there is code that can never be executed. But this one is not as clear, it + says that you can not exit the do/while loop that preceeds a test of retval + until retval=0 which means the test can never be true. The patch I am + providing is to again remove the if test and its contents. But I am + concerned that the do/while loop preceeding the test could result in a hard + loop that would hang the test case forever. It seems to me like something + should also be done to insure the loop will exit at some point. Here is a + patch that provides at least part of the fix: ----------------------------- + + * 0a533810 src/components/net/tests/net_values_by_name.c: net components: + coverity fixes Reported/patched by Gary Mohr ----------------------------- + The net component has one defect in one of the test cases. The complaint is + that there is code that can never be executed. There is a test to see if + event_count == 0 which can never be true at that place in the code. So I + removed the if statement and its contents. Here is the patch: + ----------------------------- + +2013-06-07 + + * b784b063 src/components/nvml/Rules.nvml src/components/nvml/configure + src/components/nvml/configure.in...: nvml: Apply Gary Mohr's dlopen patch. + Move the nvml component over to using the dlopen and weak linking + infrastructure of the cuda component. Thanks, Gary. + + * d6505b76 src/components/rapl/utils/rapl_plot.c: rapl: update the rapl_plot + utility Get the event names by enumerating the ones available with the RAPL + component rather than having a hard-coded list. + + * 2094c5b1 src/components/rapl/linux-rapl.c: rapl: add better error messages + on component init failure + + * d0e668fb src/ctests/Makefile src/ctests/high-level.c + src/ctests/hl_rates.c...: First round of changes to implement a PAPI high + level event per cycle call. Untested. + +2013-06-05 + + * 63074f82 src/components/rapl/linux-rapl.c: rapl: Add Ivb-EP support The + Intel docs are spotty on what is actually supported. They state: 14.7.2 RAPL + Domains and Platform Specificity The specific RAPL domains available in a + platform varies across product segments. Platforms targeting client segment + support the following RAPL domain hierarchy: * Package * Two power planes: + PP0 and PP1 (PP1 may reflect to uncore devices) Platforms targeting server + segment support the following RAPL domain hierarchy: * Package * Power plane: + PP0 * DRAM + +2013-05-31 + + * 31b4702d src/cpus.c: cpus.c: Don't run init_thread/shutdown_thread for + disabled components. + +2013-05-29 + + * c48087d2 ChangeLogP511.txt RELEASENOTES.txt: Grab the updated ChangeLog + from 5.1.1 Create a ChangeLog and update RELEASENOTES for a 5.1.1 release. + +2013-05-24 + + * d1c8769e src/components/perf_event/tests/Makefile + src/components/perf_event/tests/event_name_lib.c + .../perf_event/tests/perf_event_user_kernel.c: Add perf_event user/kernel + domain test This will be useful if/when we start handling domains properly. + + * 89e1aeba src/components/perf_event/tests/Makefile + src/components/perf_event/tests/event_name_lib.c + src/components/perf_event/tests/event_name_lib.h...: Add perf_event offcore + response test Does a quick check to see if offcore response events are + working. + + * bda86616 .../perf_event_uncore/perf_event_uncore.c + src/ctests/get_event_component.c src/papi_internal.c: Some more ctest fixes + involving disabled components. We enforce disabled components sometime in + the PAPI routines and sometimes in the components themselves. A bit + confusing. It is tough with perf_event and perf_event_uncore because we + share libpfm4 by both, so the naming library for perf_event_uncore will be + active even if the component is disabled, which can cause some confusing + results if your test code ignores PAPI_ENOCMP error messages and accesses a + disabled component anyway. This at least fixes our test cases, we might have + to revisit this later. + + * b596621e doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version + numbers Call this 5.2.0.0 simple because its greater than (and some + components are completely incompatible with) 5.1.1 + + * eb77a91e .../perf_event_uncore/perf_event_uncore.c src/papi.c: Disallow + enumerating events on disabled components. This was causing segfaults on + tests where enumeration was trying to enumerate uncore events on machines w/o + uncores. + + * 4e991a8a .../perf_event/tests/perf_event_system_wide.c: + perf_event_system_wide: SKIP instead of FAIL if we don't have proper + permissions + + * 7654bb1f src/Makefile.inc src/components/perf_event/tests/Makefile + .../perf_event/tests/perf_event_system_wide.c...: move the perf_event + specific tests to be with their component This means the perf_event tests + will only be run if perf_event is enabled + + * d82e343f src/ctests/perf_event_uncore_multiple.c: + ctests/perf_event_uncore_multiple: Improve this test a bit + + * b1a594bf src/perf_events.c src/sys_perf_event_open.c: Remove the no-longer + needed perf_events files Now we use the versions in the + components/perf_event directory + + * a9a277f3 src/Makefile.in src/Makefile.inc src/configure...: Split up + CPUCOMPONENT configure variable Now it is CPUCOMPONENT_NAME CPUCOMPONENT_C + CPUCOMPONENT_OBJ This allows having setups with no CPUCOMPONENT set + (perf_event used as a component) while keeping backward compatible with + non-component CPU components. This has been tested on perf_event and + perfctr. It might break other architectures, so test if you can. + + * 69e29526 src/configure src/configure.in: configure: have --with-components + append comonents to existing value This allows configure to earlier set the + components value to include "perf_event" if detected and then later append + the values passed in with --with-components + + * 9d28df4c src/components/perf_event/Rules.perf_event + src/components/perf_event/perf_event.c + src/components/perf_event/perf_event_lib.c...: add perf_event and + perf_event_uncore components This adds perf_event as a standalone component. + Currently it is not compiled or built, some changes need to be made to the + build system before this will work. + +2013-05-21 + + * ea996661 src/components/cuda/linux-cuda.c: eliminate warnings of unused + vars + + * 691bf114 src/components/cuda/linux-cuda.c: eliminate warnings of unused + vars + + * 221bfdab src/components/cuda/linux-cuda.c + src/components/cuda/tests/HelloWorld.cu: Problem with cleanup_eventset(): + after destroying the CUDA eventset, update_control_state() is called again + which operates on the already destroyed eventset. + +2013-05-17 + + * 84925f50 src/components/cuda/linux-cuda.c: When adding multiple CUDA events + to an event set, PAPI_add_event() error 14 (CUPTI_ERROR_NOT_COMPATIBLE) is + being raised from the CUPTI library. Turns out that the CUDA update control + state wasn't cleaning the event set up properly before adding new events. + It's fixed now. + + * 2337aa3a src/perf_events.c: perf_event: allow running with + perf_event_paranoid is 2 perf_event_paranoid set to 2 means allow user + monitoring only (no kernel domain). The code before this mistakenly disabled + all events in this case. Also set the allowed domains to exclude + PAPI_DOM_KERNEL. + +2013-05-16 + + * 617d9fbb src/papi_events.csv: papi_events.csv Revert a little mishap in + adding ivbep support Somehow the contents of papi_hl.c ended up in the + events file. + + * 2aff4596 src/papi_events.csv: Add identifier for ivb_ep + + * 1810ddf9 src/papi_libpfm4_events.c src/papi_libpfm4_events.h + src/perf_events.c: papi_libpfm4_events: allow specifying + core/uncore/os_generic PMUs This allows you to specify you only want your + perf_event/libpfm4 based component to only export the PMU types you want. + Now we can have an uncore-only component. + + * 6554f3f0 src/papi_libpfm4_events.c: papi_libpfm4_events.c: only enable + presets for component 0 If we have multiple events using libpfm4, we only + want to load the presets if it is component 0. + + * 6a4a4594 src/papi.c: PAPI_get_component_index() was matching names + improperly For example, it was matching perf_event and perf_event_uncore as + the same component. + + * 1b94e157 src/papi_hl.c: papi_hl.c : fix IPC calculation I broke it a while + back while trying to clear out use of MHz. The code was uncommented and very + confusing. It is slightly better now. + + * 92d4552e src/papi_libpfm4_events.c src/papi_libpfm4_events.h + src/perf_events.c: papi_libpfm4_events: code changes to allow multiple + component access the PAPI libpfm4 code has been modified to allow multiple + users at once. This will allow multiple components to use libpfm4, for + example a CPU component and an uncore component. + + * 7902b30e src/cpus.c: cpus: fix debug compile I always forget to compile + with --with-debug and miss changes in the DEBUG statements. + +2013-05-15 + + * 7ddc05ff src/cpus.c src/cpus.h: cpus.c: Add reference count to cpu + structure It is possible to have multiple eventsets all attached to the same + CPU, as long as only one eventset is running at a time. At EventSet cleanup, + PAPI would free the CpuInfo_t structure even if other EventSets were still + using it. This patch adds a reference count to the structure and only frees + it after the last user is cleaned up. I also fixed a few locking bugs, + hopefully I didn't introduce any new ones. + + * 6a61f9a2 src/cpus.c: more cleanup of the cpus.c file mostly formatting and + added comments. + + * 710d269f src/cpus.c src/cpus.h src/papi.c...: cleanup cpus.h It had a lot + of extraneous stuff in it. Also make sure it only gets included in files + that need it. + + * 422226c9 src/papi.c: papi.c: add some extra debug messages + + * b1297058 src/cpus.c: Clean up cpus.c a bit Tracking down a segfault in the + cpu attach cleanup code. + + * 7b6023cf src/ctests/perf_event_system_wide.c: + ctests/perf_event_system_wide: much improved output It segfaults at the end + though, unclear if this is a bug in the test or a bug in PAPI. Will + investigate. + + * 38397aa3 src/components/cuda/configure src/components/cuda/configure.in + src/components/cuda/linux-cuda.c...: Cuda component: Update library search + path From Gary Mohr: It turns out that with the changes I gave you the path + to the libcuda.so library is still hard coded to /usr/lib64. This assumes + that the NVIDIA-Linux package is installed on the system where the build is + being done. In Bull's case (and probably other users also) this is not + always the case. To add the flexibility we need, I have added a new + configure argument to the cuda configure script. The new argument is + "--with-cudrv_dir" and it allows the user to specify where the cuda driver + package (ie: NVIDIA-Linux) to be used for the build can be found. This new + argument is optional and if not provided a value of "/usr" will be used. This + allows existing configure calls to continue to work like before. + + * f8873d1c src/ctests/perf_event_system_wide.c: + ctests/perf_event_system_wide: clean up the output a lot Still working on + understanding it. + + * ebf20589 src/ctests/perf_event_system_wide.c: perf_event_system_wide: + testing various DOMAIN and GRANULARITY settings pushing the limits of + PAPI/perf_event trying to see why system-wide measurement doesn't work. + +2013-05-14 + + * 0c1ef3f5 src/components/cuda/linux-cuda.c: CUDA component: Update + description field Also removes a strcpy in the init code, which overwrote + the name field. Thanks to Gary Mohr + + * 474fc00e src/ctests/perf_event_uncore_lib.c: Add AMD fam15h northbridge + event to ctests/perf_event_uncore_lib.c + +2013-05-13 + + * cf56cdac src/perf_events.c: perf_event component: update error returns + This passes more error return values back to PAPI. Before this change a lot + of places were hardcoded to PAPI_EPERM even if sys_perf_event_open() was + reporting a different error. + + * c824471b src/ctests/Makefile src/ctests/perf_event_system_wide.c + src/ctests/perf_event_uncore.c...: Update the perf_event specific tests. + This adds a few more uncore tests, which are currently showing some bugs in + the implementation. The tests all need root permissions to run, so should + default to "SKIPPED" for most users. + +2013-05-08 + + * e0204914 src/configure src/configure.in: Force the use of pthread_mutexes + on ARM This lets the system libraries worry about the best way to define + mutexes, rather than trying to hand-code in assembly around all of the + various issues there are with atomic instructions in the ARM architecture. + It might make sense to enable this for *all* Linux architectures, but for now + just do it for ARM. + + * f21b1b27 src/linux-lock.h: Commit 59d3d7584b2925bd05b4b5d0f4fe89666eb8494a + removed the definition of mb(). mb() was defined as rmb(). This just + corrects it back. (Note from VMW -- this fixes some things, but ARM still + won't build on a Cortex A9 pandaboard due to the use of the "swp" + instruction. Proper fix is probably to enforce posix-mutexes on ARM) + +2013-05-06 + + * 913f0795 src/components/nvml/configure src/components/nvml/configure.in: + NVML: Update wording for configure options. Thanks for pointing out the + ambigous wording, Heike. + + * 81a86c2b src/components/infiniband/Rules.infiniband + src/components/infiniband/linux-infiniband.c + src/components/infiniband/tests/Makefile: Infiniband component: use + dlopen/dlsym for symbols Apply Gary Mohr's patch to switch the infiniband + component over to dl* with the same motivations as the cuda component. + +2013-05-02 + + * 2e6bcb2a src/utils/native_avail.c: Add two command line switches: -i + EVENTSTR includes only events whose names contain EVENTSTR; -x EVENTSTR + excludes all events whose names contain EVENTSTR. These two switches can be + combined, but only one string per switch can be used. This allows you to, for + example, filter events by component name, or eliminate all uncore events on + Sandy Bridge… + +2013-05-01 + + * 3163cc83 src/ctests/perf_event_uncore.c: ctests/perf_event_uncore: add + IvyBridge support this needs an updated libpfm4 to work + +2013-04-30 + + * 55c89673 src/examples/add_event/Papi_add_env_event.c + src/examples/overflow_pthreads.c: Examples: Missed two instances of %x printf + formating. + +2013-04-29 + + * b3c5bd47 src/components/appio/tests/appio_list_events.c + src/components/appio/tests/appio_values_by_code.c + src/components/appio/tests/appio_values_by_name.c...: Address TRAC 174: Let + printf do the formatting https://icl.cs.utk.edu/trac/papi/ticket/174 174: + PAPI's debuggin/info output should use %# conversions for octal and hex + ------------------------+-------------------- Reporter: sbk@… | + Owner: Type: enhancement | Status: new Priority: normal | + Component: All Version: HEAD | Severity: normal Keywords: + | ------------------------+-------------------- Email sent to James + Ralph: Seeing your latest change reminded me: Anytime there is a value + issued in hex or octal the "%#" conversion should be used so the value is + always preceded with a "0" for octal or a "0x" for hex. Otherwise when a + value is printed one can not tell the base it is in (one shouldn't have to + rely on internal knowledge of the code or the context to tell). For variables + that are pointers the "%p" conversion can be used (this will always use an + hex syntax). It would be nice to apply this to all PAPI print statements in + their entirety. + +2013-04-25 + + * 87ec9286 src/components/vmware/Rules.vmware: Rules.vmware: Use $(LDL) no + -ldl Minor cleanup, but configure sets it, so why not use it. + +2013-04-26 + + * 8dddd587 src/papi_hl.c: papi_hl: Use PAPI_get_virt_usec() for process time + The code was using cycles / MHz which is not guaranteed to work on modern + machines. It also was sometimes using (instructions / estimated IPC) / MHz + which hopefully isn't necessary for any machine PAPI currently supports. + Instead use PAPI_get_virt_usec() which should give the right value. + +2013-04-25 + + * 9dd36088 src/ctests/perf_event_uncore.c: ctests/perf_event_uncore: make + more modular Cleans up the code to make it easier to add tests for + architectures other than SandyBridge-EP. I was doing this so I could add + support for IvyBridge but it turns out neither Linux nor libpfm4 supports + uncore on IvyBridge yet. hmmm. + + * 52ff0293 src/components/cuda/Rules.cuda: Rules.cuda: The cuda component + now depend on the dynamic linking loader and on some systems one has to + explicitly link to it. Add $(LDL) to LD_FLAGS, configure sets it if we need + it. + + * 97a4a5ea src/components/cuda/Rules.cuda src/components/cuda/linux-cuda.c + src/components/cuda/tests/Makefile: Cuda component enhancement. + ---------------- From Gary's submission--------------------------------- The + current packaging of the cuda component in PAPI has a fairly unfriendly side + effect. When PAPI is built with the cuda component, then that copy of PAPI + can only be used on systems where the cuda libraries are installed. If it is + installed on a system without these libraries then all PAPI services fail + because they have references to libraries which can not be found. Even + papi_avail which you would think has nothing to do with cuda reports the + error. This issue significantly complicates the delivery and install of the + PAPI package on large clusters where some of the nodes have NVIDIA GPU's (and + the cuda libraries to talk to them) and other nodes do not have GPU's (and + therefore no software to access them). I have been working with the help of + Phil Mucci to eliminate this dependency so that a copy of PAPI built with a + cuda component could be installed on all nodes in the cluster and if the node + had NVIDIA GPU's (and libraries available) then the cuda component would get + enabled and could be used. If the node did not have the hardware or the + access libraries were not available, then the cuda component would just + disable itself at component initialization so it could not be used (but all + other PAPI services would still work). Phil has provided some gentle + prodding and lots of valuable suggestions to assist this effort. I now think + that I have a working version of this capability and am ready to share it + with the community. + ----------------------------------------------------------------------- Many + thanks to Gary Mohr and Phil Mucci for this much needed functionality. + +2013-04-23 + + * 99c8e352 src/papi_internal.c: papi_internal.c: Print an eventcode in hex vs + decimal. Thanks, Gary Mohr. + +2013-04-22 + + * 1fc5dae2 src/run_tests.sh: The test for determining whether to run valgrind + was backwards. Correcting that allow the run_test.sh script to stay the same + and one just needs to define "VALGRIND=yes" (or any non-null string) to make + run_test.sh use valgrind. --- src/run_tests.sh | 6 ++---- 1 file changed, 2 + insertions(+), 4 deletions(-) diff --git a/src/run_tests.sh + b/src/run_tests.sh index d1ce205..9337ff2 100755 --- a/src/run_tests.sh +++ + b/src/run_tests.sh @@ -19,10 +19,8 @@ else export TESTS_QUIET fi -if [ + "x$VALGRIND" = "x" ]; then -# Uncomment the following line to run tests using + Valgrind -# VALGRIND="valgrind --leak-check=full"; - VALGRIND=""; +if [ + "x$VALGRIND" != "x" ]; then + VALGRIND="valgrind --leak-check=full"; fi + #CTESTS=`find ctests -maxdepth 1 -perm -u+x -type f`; -- + +2013-04-19 + + * 4cf16234 src/components/README src/components/bgpm/README + src/components/coretemp_freebsd/README...: Restructure README files for + components so that the file in the components directory doesn't document + individual component details. Add README files to each component directory + that requires further installation detail. Update RAPL instructions to + capture how to enable reading the MSRs. These files are supposedly configured + with Doxygen markup, but I don't think the master README ever got built. It + probably should. + +2013-04-17 + + * bf75d226 src/components/cuda/tests/HelloWorld.cu: cuda/tests/HelloWorld.cu: + workaround a segfault. Report from Gary Hohr + + I was running the Cuda test case on a system which did not actually have any + NVIDIA GPU's installed on it (but the cuda software was installed and papi + was built with the cuda component). I modified the test case to put an real + cuda event in the source (as suggested in the source). When I run the test + case the cuda component gets disabled in PAPI_library_init (because + detectDevice function can not find any GPU's) which is the correct behavior. + The test case then calls PAPI_event_name_to_code which failed because the + cuda component was disabled. The test case then created an event set and + called PAPI_add_events with an empty list of events to be added. This led to + a segfault somewhere inside libpfm4. The attached patch makes some minor + changes to protect against this problem. I noticed this test case does not + use the PAPI test framework utilities (test_xxxx functions) so I did not + modify the test to use them. + +2013-04-15 + + * 457bfd74 src/components/cuda/linux-cuda.c: When creating two event sets - + one for the CUDA and one for the CPU component - the order of event set + creation appears crucial. When the CPU event set has been created before the + CUDA event set then PAPI_start() for the CUDA event set works fine. However, + if the CUDA event set has been created before the CPU event set, then + PAPI_start(CUDA_event_set) forces the CUDA control state to be updated one + more time, even if the CUDA event set has not been modified. The CUDA control + state function did not properly handle this case and hence cause PAPI_start() + to fail. This has been fixed. + + * 807120b6 src/components/cuda/linux-cuda.h: linux-cuda.c + +2013-03-28 + + * 7b0eec7a src/run_tests.sh: run_tests.sh: further refine component test find + Exclude *.cu when looking for component tests. + +2013-03-25 + + * 6a40c8ba src/run_tests.sh: run_tests.sh: File mode changes. run_tests.sh + is now expected to run from the install location in addition to src. The + script tried to remove execute from *.[c|h], now it just excludes *.[c|h] + from the find commands. + +2013-03-18 + + * 2ba9f473 src/perfctr-x86.c: perfctr: don't read in event table multiple + times papi_libpfm3_events.c now reads in the predefined events, we don't + also need to do this in perfctr setup_x86_presets() + + * 326401b1 src/perfctr.c: Fix segfault in perfctr.c The preset lookup uses + the cidx index, but in perfctr.c we weren't passing a cidx value (it was + being left off). The old perfctr code plays games with defining extern + functions so the compiler wasn't giving us a warning. + +2013-03-14 + + * 50130c6f src/components/bgpm/L2unit/linux-L2unit.c src/linux-bgq.c: If a + counter is not set to overflow (threshold==0; happens when PAPI_shutdown is + called) then we do not want to rebuild the BGPM event set, even if the event + set has been used previously and hence "applied or attached". Usually if an + event set has been applied or attached prior to setting overflow, the BGPM + event set needs to be deleted and recreated (which implies malloc() from + within BGPM). Not so, though, if threshold is 0 which is the case when + PAPI_shutdown is called. Note, this only applies to Punit and L2unit, not + IOunit since an IOunit event set in not applied or attached. + +2013-03-13 + + * 1a143003 src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/IOunit/linux-IOunit.h + src/components/bgpm/L2unit/linux-L2unit.c...: Overflow issue on BG/Q + resolved. Overflow with multiple components worked; overflow with multiple + components and multiple events did not work as supposed to. + + * 42741a40 src/components/cuda/Rules.cuda: Added one more library to linker + command. + +2013-03-12 + + * 1431eb3f src/components/nvml/Makefile.nvml.in + src/components/nvml/Rules.nvml src/components/nvml/configure...: NVML + component: build system work Adopt the cuda component's method for + specifying library location. + +2013-03-11 + + * ce66feac src/components/mx/linux-mx.c: mx component: Modernize init + routine. Add component index to _mx_component_init()s signarure and set the + bit in component info. + + * 1c1bc177 src/components/cuda/Makefile.cuda.in + src/components/cuda/Rules.cuda src/components/cuda/configure...: Resolve + configure issues for CUDA component. + +2013-03-07 + + * f3572537 src/linux-common.c src/linux-memory.c: Fix the build on + Linux-SPARC I dug out an old SPARC machine and fixed the PAPI build on it. + + * 2c7f102c src/perf_events.c: More comprehensive sys_perf_open to PAPI error + mappings This tries to cover more of the errors returned by sys_perf_open + and map them to better results. EINVAL is a problem because it can mean + Conflict as well as Event not found and many other things, so it's unclear + what to do with it. + + * 299070ef src/perf_events.c src/sys_perf_event_open.c: Return proper error + codes for sys_perf_event_open For some reason on x86 and x86_64 we were + trying to set errno manually and thus over-writing the proper errno value, + causing all errors to look like PAPI_EPERM This removes that code, as well + as adds code to report ENOENT as PAPI_ENOEVENT. With this change, on IVY + this happens which looks more correct. ./utils/papi_command_line + perf::L1-ICACHE-PREFETCHES Failed adding: perf::L1-ICACHE-PREFETCHES because: + Event does not exist command_line.c PASSED + +2013-03-06 + + * baa557ca src/papi_libpfm4_events.c src/papi_user_events.c: Coverity fixes: + Coverity pointed out that there was a case where load_user_eent_table() could + leak memory. The change in the location of the papi_free(foo) ensures that + the allocated memory is freed. Coverity pointed out one path through the + code in _papi_libpfm4_ntv_code_to_descr() that did not free up memory + allocated in the function. Added a free on the path in free up that memory. + Thanks Will Cohen. + +2013-02-14 + + * 395b7bc7 src/Makefile.inc src/components/README + src/components/appio/tests/Makefile...: Add component tests' to the + install-[all|tests] target. Thanks to Gary Mohr. ------------------- This + makes a fairly small change to src/Makefile.inc to add logic that adds a new + install-comp_tests target which calls the install target for each component + being built. This new target is listed as a dependency on the install-tests + target so it will happen when the 'install-all', 'install-tests', or + 'install-comp_tests' targets are used. A note about this change, I am not + real familiar with the auto make and auto conf tools. This change was enough + to make it work for me but if there is another file that should also be + changed for this modification, please help me out here. The patch also adds + install targets to the Makefiles for all of the components which have 'tests' + directories and updates the README file which talks about how to create + component tests. Another note, I only compile with a couple of components + (ours, rapl, and example) so if I fat fingered something in one of the other + components Makefiles I would not have noticed. Please keep me honest and make + sure you compile with them all enabled. Thanks for adding this capability + for us. Gary --------------------------- Makefile.inc: Add run_tests and + friends to install-tests target. Component test Makefiles' get their install + location to mirror what runtests expects. + +2013-03-04 + + * 448d21ab src/components/rapl/linux-rapl.c: Remove a stray debug statement. + Thanks to Harald Servat for catching this. + +2013-03-01 + + * df1a75cc src/utils/command_line.c: Wrestled some horribly convoluted + indexing into shape. The -u and -x options now print as expected (I think). + +2013-01-31 + + * b0f5f4d6 src/components/nvml/linux-nvml.c: linux-nvml.c: Fix type warning. + CUDA and NVML have an signed vs unsigned thing going on in their returned + device counts, cast away the warning. + +2013-01-29 + + * 8490b4ee src/papi.c: General doxygen cleanup: remove all "No known bugs" + messages; correct and cleanup examples for PAPI_code_to_name and + PAPI_name_to_code + +2013-01-23 + + * 89e45a9b src/linux-memory.c src/linux-timer.c: ia64 fixes. Thanks to Tony + Jones for patches. + +2013-01-16 + + * 23e0ba2d src/components/nvml/linux-nvml.c: nvml component: cleanup a memory + leak We did not free a buffer at shutdown time. + +2013-01-15 + + * f3db85fc src/papi.h: papi.h bump version number. + + * dfa80287 src/buildbot_configure_with_components.sh: Buildbot configure + script. Add cuda and nvml components, if configured, to the buildbot + coverage test. Note: Script now checks for existance of Makefile.cuda and + then Makefile.nvml so see if it can build the cuda component and then if it + can build the nvml component. + + * cf416e27 src/threads.c: Cleaned up compiler warning (gcc version 4.4.6) + + * 59cbc8fc src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/L2unit/linux-L2unit.c...: Cleaned up compiler warnings on + BG/Q (gcc version 4.4.6 (BGQ-V1R1M2-120920)) + +2013-01-14 + + * 3af71658 .../build/lib.linux-x86_64-2.7/perfmon/__init__.py + .../lib.linux-x86_64-2.7/perfmon/perfmon_int.py + .../build/lib.linux-x86_64-2.7/perfmon/pmu.py...: libpfm4: remove extraneous + build artifacts. Steve Kaufmann reported differences between the libpfm4 I + imported into PAPI and the libpfm4 that can be attained with a git clone + git://perfmon2.git.sourceforge.net/gitroot/perfmon2/libpfm4 Self: Do libpfm4 + imports from a fresh clone of libpfm4. diff --git a/ChangeLogP530.txt b/ChangeLogP530.txt new file mode 100644 index 0000000..ed744af --- /dev/null +++ b/ChangeLogP530.txt @@ -0,0 +1,424 @@ +2013-11-25 + + * a40c96c5 src/components/nvml/linux-nvml.c: nvml component: Add missing } + + * 166971ba src/components/nvml/linux-nvml.c: nvml component: modify api + checks To check if nvmlDeviceGetEccMode and nvmlDeviceGetPowerUsage are + supported, we just call the functions and see if nvml thinks its supported by + the card. + +2013-11-21 + + * 78192de9 delete_before_release.sh: Kill the .gitignore files in + delete_before_release + + * 60fb1dd4 src/utils/command_line.c: command_line utility: Initialize a + variable Initialize data_type to PAPI_DATATYPE_INT64 Addresses a coverity + error Error: COMPILER_WARNING: [#def19] + papi-5.2.0/src/utils/command_line.c:133:4: warning: 'data_type' may be used + uninitialized in this function [-Wmaybe-uninitialized] switch (data_type) { ^ + +2013-11-20 + + * da2925f6 src/ctests/data_range.c: Make data_range test use prginfo + Coverity complained about prginfo being an unused variable for data_range.c. + The code is modified to be stylistically like the code for hw_info in the + preceding lines which also is not used elsewhere in the test. This is more + to reduce the amount of output in the Coverity scan than to fix this minor + issue. + + * 3386953d src/ctests/data_range.c: Check the return values of PAPI_start() + and PAPI_stop() for the data_range test The ia64 data_range test did not + check the return values of PAPI_start() or PAPI_stop(). There are propbably + few people running this test on ia64 machine, but this is more to eliminate a + couple errors noted by a Coverity scan and reduce the clutter in the Coverity + scan. + +2013-11-19 + + * e704e8f1 src/configure src/configure.in: configure: Build fpapi.h and co + for mic When building for mic, set the cross_compiling var in configure to + use a native c compiler to build genpapif. + +2013-11-18 + + * d32b1dae man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Rebuild the man pages for a 5.3 release + + * 4e735d11 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version + numbers for a pending 5.3 + + * efe026cd src/Makefile.inc: Makefile.inc: Pass LINKLIB, not SHLIB to the + comp_tests + + * f0598acb src/ctests/Makefile.target.in: ctests/Makefile.target.in: Properly + catch LINKLIB LINKLIB=$(SHLIB) or $(LIBRARY), so we have to have configure + fill in those as well. + + * 1744c23e src/ctests/Makefile.target.in: ctests/Makefile.target.in: Respect + static-tools the --with-static-tools configure flag sets STATIC, not + LDFLAGS. This gets passed to the tests' make subprocesses via + LDFLAGS="$(LDFLAGS) $(STATIC)" We mimic this in the installed Makefile. + + * e9347373 src/ctests/Makefile: ctests/Makefile: Don't clobber value of + LIBRARY TOOD: write a better message + + * 237219d1 src/Makefile.inc: Makefile.inc: Add enviro vars to fulltest recipe + The fulltest target didn't set LD_LIBRARY_PATH and as a result, several + tests wouldn't find libpfm and fail to run. The fix is to call our SETPATH + command first (as all of the other testing targets do) See + ------------------------------------------------------------------ icc + -diag-disable 188,869,271 -g -g -DSTATIC_PAPI_EVENTS_TABLE + -DPEINCLUDE="libpfm4/include/perfmon/perf_event.h" -D_REENTRANT -D_GNU_SOURCE + -DUSE_COMPILER_TLS -Ilibpfm4/include -I../../../testlib -I../../.. -I. -o + perf_event_offcore_response perf_event_offcore_response.o event_name_lib.o + ../../../testlib/libtestlib.a ../../../libpapi.so.5.2.0.0 ld: warning: + libpfm.so.4, needed by ../../../libpapi.so.5.2.0.0, not found (try using + -rpath or -rpath-link) ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_get_event_attr_info' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_initialize' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_get_pmu_info' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_get_version' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_get_os_event_encoding' ../../../libpapi.so.5.2.0.0: undefined reference + to `pfm_get_event_next' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_get_event_info' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_strerror' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_find_event' ../../../libpapi.so.5.2.0.0: undefined reference to + `pfm_terminate' make[2]: *** [perf_event_offcore_response] Error 1 + ------------------------------------------------------------------ + +2013-11-17 + + * a7f642d2 src/Makefile.inc src/configure src/configure.in: Switch LINKLIB to + not have relative pathing + +2013-11-15 + + * 91a6fa54 src/components/lustre/tests/Makefile: Fix a typo in the lustre + tests' Makefile + +2013-11-13 + + * 9a5f9ad4 src/papi_preset.c: papi_preset.c: Fix _papi_load_preset_table func + Patch by Gleb Smirnoff ---------------------- The _papi_load_preset_table() + loses last entry from a static table. The code in get_event_line() returns + value of a char next to the line we are returning. Obviously, for the last + entry the char is '\0', so function returns false value and + _papi_load_preset_table() ignores the last line. Patch attached. The most + important part of my patch is only: - ret = + **tmp_perfmon_events_table; + return i; This actually fixes + the lost last line. However, I decided to make the entire get_event_line() + more robust, protected from bad input, and easier to read. + ---------------------- + +2013-11-12 + + * 579139a6 src/utils/hybrid_native_avail.c: more doxygen xml tag cleanup + + * 952bb621 src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/CNKunit/linux-CNKunit.h + src/components/bgpm/IOunit/linux-IOunit.c...: Fix doxygen Unsupported + xml/html tag warnings + + * 0c161015 src/components/micpower/linux-micpower.h: micpower: fix doxygen + warning + + * b187f065 src/components/host_micpower/README: host_micpower: update docs + +2013-11-11 + + * 4d379c6f src/ctests/p4_lst_ins.c: ctests/p4_lst_ins: Narrow scope of test + This test attempted to ensure that it was running on a P4, the test missed + for all non intel systems. + +2013-11-10 + + * ee1c7967 .../host_micpower/utils/host_micpower_plot.c: Added energy + consumption to host_micpower utility. + +2013-11-08 + + * eee49912 src/ctests/shlib.c: shlib.c: Check for NULL Thanks to Will Cohen + for reporting. Coverity picked up an instance of a value that could be NULL + and strlen would barf on it. Error: FORWARD_NULL (CWE-476): + papi-5.2.0/src/ctests/shlib.c:70: var_compare_op: Comparing "shinfo->map" to + null implies that "shinfo->map" might be null. + papi-5.2.0/src/ctests/shlib.c:74: var_deref_model: Passing "shinfo" to + function "print_shlib_info_map(PAPI_shlib_info_t const *)", which + dereferences null "shinfo->map". papi-5.2.0/src/ctests/shlib.c:13:26: + var_assign_parm: Assigning: "map" = "shinfo->map". + papi-5.2.0/src/ctests/shlib.c:24:3: deref_var_in_call: Function "strlen(char + const *)" dereferences an offset off "map" (which is a copy of + "shinfo->map"). + + * 83c31e25 src/components/perf_event/perf_event.c: perf_event.c: Check return + value of ioctl Thanks to Will Cohen for reporting based upon output of + coverity. + + * e5b33574 src/utils/multiplex_cost.c: multiplex_cost: check return value on + PAPI_set_opt Thanks to Will Cohen for reporting based upon output of + coverity. + + * 04f95b14 src/components/.gitignore: Ignore component target makefile + + * cbf7c1a8 src/components/rapl/linux-rapl.c + src/components/rapl/tests/Makefile src/components/rapl/tests/rapl_basic.c: + Modify linux-rapl to support one wrap-around of the 32-bit registers for + reading energy. This insures availability of the full 32-bit dynamic range. + However, it does not protect against two wrap-arounds. Care must be taken not + to exceed the expected dynamic range, or to check reasonableness of results + on completion. Modifications were also made to report rapl events as unscaled + binary values in order to compute dynamic ranges. Modify rapl-basic to add a + test (rapl_wraparound) to estimate maximum measurement time for a naive gemm. + With a -w option, measurement for this amount of time will be performed. The + gemm can be replaced with a user kernel for more accurate time estimates. + Makefile was modified to support the new test case. + +2013-11-07 + + * 7784de21 src/ctests/data_range.c src/ctests/zero_shmem.c: Modernize some + ctests Add tests_quiet check to data_range and zero_shmem + +2013-11-06 + + * 7c953490 src/configure src/configure.in: More MPICC checking Have + configure check for mpcc on AIX, in addition to mpicc. + + * 5c8d2ce0 src/ctests/zero_shmem.c: zero_shmem.c: Fix compiler warning The + worker threads in the test print an ID, the test was setup to call + pthread_self(), this is problematic. Since each thread is started with a + unique work load, use this to lable threads. + + * 993a6e96 src/ctests/Makefile.recipies src/ctests/Makefile.target.in: + ctests/Makefile.recipies: conditionally build the MPI test + + * b29d5f56 src/Makefile.inc src/configure src/configure.in: Check for mpicc + at configure time configure[.in]: look for mpicc Makefile.inc: Pass MPICC to + ctests' make + +2013-11-05 + + * b2d643df src/papi_events.csv: Add floating point events for IvyBridge Now + that Intel has documented them and libpfm4 supports them, PAPI can use them. + We just use the same events as on sandybridge. Tested on an ivybridge + system. + +2013-11-01 + + * c5be5e26 src/components/micpower/linux-micpower.c: micpower: check return + of fopen before use Issue reported by Will Cohen from results of Coverity + run. + + * 5c1405ab src/components/host_micpower/utils/Makefile + src/components/host_micpower/utils/README + .../host_micpower/utils/host_micpower_plot.c: Add host_micpower utility to + gather power (and voltage) measurements on Intel Xeon Phi chips using + MicAccessAPI. + + * 46b9bdf5 src/components/host_micpower/linux-host_micpower.c: Added more + detailed event description and correct units to host mic power events. + + * b97c0126 src/components/host_micpower/linux-host_micpower.c: host_micpower: + Better error reporting grab output of dlerror on library load failure + +2013-10-31 + + * 84da7fd3 src/components/host_micpower/Rules.host_micpower + src/components/host_micpower/tests/Makefile: host_micpower: Fix some makefile + bits tests/Makefile needed to define a target to work with the + Make_comp_tests install machinery. Rules.host_micpower had a typo + +2013-10-30 + + * 14f3e4c4 src/components/host_micpower/linux-host_micpower.c: host_micpower: + fix function signature shutdown_thread took wrong arguments. + +2013-10-28 + + * a4cc1113 release_procedure.txt: Update release_procedure.txt Bug in the + version of doxygen we were using to produce the documentation led to some of + the Fortran functions being left out in the cold. We now proscribe 1.8.5 + + * a1d6ae34 src/components/host_micpower/README: host_micpower: Add a README + file. + +2013-10-25 + + * 859dbc2c src/Makefile.inc src/components/Makefile_comp_tests + src/components/Makefile_comp_tests.target.in...: Make the testsuite as a + stand-alone copy-able directory of code These changes to the Makefiles + allows the testsuite to be compiled separately from the papi sources. This + is useful for people wanting to experiment with the tests and verify that the + existing installation of papi works. We put absolute paths to the installed + library and include files into the installed makefile for the tests. + + * c307ad18 src/ctests/Makefile src/ctests/attach_target.c + src/testlib/do_loops.c: Refactor the driver in do_loops.c into its own file. + (ctests/Makefile, ctests/attach_target.c testlib/do_loops.c) + +2013-10-23 + + * ace71699 src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/L2unit/linux-L2unit.c...: Passing BGPM errors up to PAPI. + +2013-10-22 + + * 2ee090ec src/components/bgpm/NWunit/linux-NWunit.c + src/components/bgpm/NWunit/linux-NWunit.h: Fixed the behavior in BGQ's NWunit + component after attaching an event set to a specific thread that owns the + target recourse counting + + * 8ab071ee src/components/cuda/linux-cuda.c: CUDA component: Set the number + of native events Patch by Steve Kaufmann When running papi_component_avail I + notice that the number of CUDA events was always zero when the component was + available. The following change correctly sets the number of native events + for the component: + +2013-10-11 + + * 071943b6 src/configure src/configure.in src/linux-context.h...: add + preliminary aarch64 (arm64) support There has been some work to build fedora + 19 on 64-bit arm armv8 machines (aarch64). I took a look that the why the + papi build was failing. The attached is a set of minimal patches to get papi + to build. The patch is just a step toward getting aarch64 support for papi. + Things are not all there for papi to work in that environment. Still need + libpfm to support aarch64 and papi_events.csv describing mappings to machine + specific events. + +2013-10-01 + + * 096eb7fc src/ctests/zero_shmem.c: zero_shmem: cleanup compiler warnings + Remove unused variables. + + * d9669053 src/ctests/earprofile.c: ctests/earprofile.c: Fix compiler warning + Both PAPI_get_hardware_info and PAPI_get_executable_info expect const + pointers, (get_executable_info is called by prof_init in profile_utils). + +2013-09-30 + + * 87e7e387 src/ctests/p4_lst_ins.c: ctests/p4_lst_ins.c: Fix the P4 load + test. This test relied upon a removed symbol to decide if it should run. The + symbol unsigned char PENTIUM4 was removed in 2011, update the logic. + + * 737d91ff src/ctests/zero_shmem.c: ctests/zero_shmem: Update the test * + add_test_events expects another argument, update the zero_shmem test's + invocation * Protect[Hide] OpenSHMEM calls with ifdefs + +2013-09-27 + + * 86c11829 src/ctests/zero_shmem.c: zero_shmem: Include pthread.h + + * 2d0e666c src/ctests/zero_smp.c: zero_smp: Change a compile time error to a + test_skip In 8d1f2c1, we changed the default assumption to be that all + ctests are build. This change allows the test to gracefully skip if it does + not have 'native SMP' threads. + +2013-09-26 + + * 8d1f2c16 src/ctests/Makefile: ctests/Makefile: Default to building + everything Set target all to depend upon ALL + + * ffd051cf src/ftests/Makefile src/testlib/Makefile: testlib, ftests + Makefiles: cleanup ifort generated files ifort produces mod and f90 + intermediate files which clean does not cleanup + + * c720bb59 src/components/coretemp/tests/coretemp_basic.c + src/components/coretemp/tests/coretemp_pretty.c: Coretemp tests: Fix skipping + logic The coretemp_basic test was failing if coretemp was disabled, skip + seems more appropriate. Add this logic to the coretemp_pretty test. + + * af7f7508 src/configure src/configure.in: configure: refactor CTEST_TARGETS + Problem: The set of ctests to build is determined at configure time, in + CTEST_TARGETS. This is set in each OS detection section and suffers from + neglect. Solution: Try to push the decisions about which tests to build out + of configure, ask for them all. Idealy the tests will be written in such a + way as to fail/skip gracefully if they lack functionallity, teething problems + are expected initially. + + * 14421695 src/testlib/Makefile: testlib: Fix the Makefile variable + assignment Consider: src=a.c b.c c.F obj=$(src:.c=.o) c.o After this + substution, obj is {a.o b.o c.F c.o}, not quite the nut. Change the logic to + correct that. + +2013-09-17 + + * 05a4e17b .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore: + cleanup a compiler warning _peu_read does not use the hwd_context argument. + + * f2056857 src/papi_events.csv: papi_events.csv: Add PAPI_L1_ICM for Haswell + Thanks to Maurice Marks of Unisys for the contribution ------------- I've + continued testing on Haswell. By comparison with Vtune and Emon on Haswell I + found that we can use the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which + is a very useful measure. Attached is my current version of papi_events.csv + with Haswell fixes. ------------- + +2013-08-28 + + * efe3533d src/Makefile.inc src/components/Makefile_comp_tests + src/ctests/Makefile...: testlib: library-ify testlib * Move ftests_util to + testlib * Naively create libtestlib.a * utils link to the testlib library * + [c|f]tests Switch the tests over to linking libtestlib.a * Component tests + link libtestlib.a + +2013-08-26 + + * d2a76dde src/configure src/configure.in src/utils/hybrid_native_avail.c: + Gabrial's mic with icc changes to configure. Specify --with-mic at configure + time and upon finding icc as the C compiler, it adds --mmic + + * 4c0349c0 src/papi_events.csv: papi_events.csv: First draft preset events on + HSW Contributed by Nils Smeds ------------------------- Here is a suggestion + for addition to Hsw counters. These are not rigorously tested. It compiles + and loads. I'm rather uncertain on many of the events so I am hoping that + adding events like this will get some useful feedback from the community so + that we can improve. ------------------------- + +2013-08-20 + + * 1b8ff589 src/utils/command_line.c: command_line util: Fix skipping event + bug. The command line utility had an extranious index increment which + resulted in skipping the reporting of event counts. Remove the increment. + Reported by Steve Kaufmann -------------------------- I am getting some funny + results when I use papi_command_line with the RAPL events. If I request them + all: $ papi_command_line THERMAL_SPEC:PACKAGE0 MINIMUM_POWER:PACKAGE0 + MAXIMUM_POWER:PACKAGE0 MAXIMUM_TIME_WINDOW:PACKAGE0 PACKAGE_ENERGY:PACKAGE0 + DRAM_ENERGY:PACKAGE0 PP0_ENERGY:PACKAGE0 Successfully added: + THERMAL_SPEC:PACKAGE0 Successfully added: MINIMUM_POWER:PACKAGE0 Successfully + added: MAXIMUM_POWER:PACKAGE0 Successfully added: + MAXIMUM_TIME_WINDOW:PACKAGE0 Successfully added: PACKAGE_ENERGY:PACKAGE0 + Successfully added: DRAM_ENERGY:PACKAGE0 Successfully added: + PP0_ENERGY:PACKAGE0 THERMAL_SPEC:PACKAGE0 : 115.000 W <<<<< + MINIMUM_POWER:PACKAGE0 ?? MAXIMUM_POWER:PACKAGE0 : 180.000 W + PACKAGE_ENERGY:PACKAGE0 : 2003784180(u) nJ DRAM_ENERGY:PACKAGE0 + : 438751220(u) nJ PP0_ENERGY:PACKAGE0 : 1248748779(u) nJ + ---------------------------------- Verification: Checks for valid event name. + This utility lets you add events from the command line interface to see if + they work. command_line.c PASSED Note that a value for + MINIMUM_POWER:PACKAGE0 is not displayed even though it was successfully added + to the event set. In fact, if combined with other events, the value for this + event is never displayed. If you specifiy it on its own it is displayed: + ------------------------------------ + +2013-08-16 + + * 0cb63d6e src/components/lustre/linux-lustre.c: lustre component: fix memory + leak + +2013-08-13 + + * c810cd0d src/components/micpower/linux-micpower.c src/linux-memory.c + src/papi_preset.c: Close resource leaks User dcb reported several resource + leaks in trac bug #184. -------------------- I just ran the static analysis + checker "cppcheck" over the source code of papi-5.2.0 It said 1. + [linux-memory.c:711]: (error) Resource leak: sys_cpu 2. [papi_preset.c:735]: + (error) Resource leak: fp 3. [components/micpower/linux-micpower.c:166]: + (error) Resource leak: fp I've checked them all and they all look like + resource leaks to me. Suggest code rework. + ---------------------------------- + +2013-08-07 + + * 8d479895 doc/Makefile: Doxygen makefile: update dependencies The manpages + are generated from comments in papi.h, papi.c, papi_hl.c and + papi_fwrappers.c; update the make dependencies to reflect this. diff --git a/ChangeLogP532.txt b/ChangeLogP532.txt new file mode 100644 index 0000000..0537923 --- /dev/null +++ b/ChangeLogP532.txt @@ -0,0 +1,84 @@ +2014-06-30 + + * 511d05bc man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Regenerate man pages for a pending 5.3.2 + release + + * a07adc91 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump version + number for a 5.3.2 release + +2014-06-27 + + * 43070347 src/components/coretemp/linux-coretemp.c + src/components/micpower/linux-micpower.c src/components/net/linux-net.c: Fix + a warning in component initialization A copy/paste perpetuated a multiple + definition of available_domains + +2014-06-24 + + * ea216f5a src/run_tests.sh src/run_tests_exclude.txt + src/run_tests_exclude_cuda.txt: Fix excluded files for run_tests Gary Mohr + pointed out that in a Makefile refactor we neglected to update the tests + eclude creteria -------------------------------- It turns out that you guys + have made changes in the way the tests are built which have introduced errors + when running the scripts. The script attempted to automatically remove the + makefiles from the list of files it would execute but your build changes + broke the makefiles up into several files and renamed all of them. So the + script was trying to execute them. I decided the most flexible way to handle + this is to remove the code from the script that looks for makefiles and just + add them to the exclude files used by the script. The script will not execute + any file listed in the exclude file. The attached patch implements these + changes so the script runs correctly with the current papi build files. Gary + -------------------------------- + +2014-05-16 + + * aacf9628 man/man3/PAPI_enum_cmp_event.3 man/man3/PAPI_enum_event.3 + man/man3/PAPI_get_overflow_event_index.3...: Printf formatting change... + Based upon a patch by Steve Kaufmann. I took the liberty of removing all the + leading "0x" as part of formatting output strings. Now the "%#" syntax is + used to print out hexidecimal values with a leading "0x" (letting the printf + function do the work). + +2014-03-26 + + * 0c93f0a1 src/components/nvml/linux-nvml.c src/components/nvml/linux-nvml.h: + Add units to NVML component Thanks to Brian Lemke at Bull for the patch. + +2014-02-26 + + * 2c79fab8 src/configure src/configure.in: configure: respect + --with-walltimer and virtualtimer For whatever reason configure would not + check for the --with-walltimer argument if we had already determined one (See + BG/P and CLE sections also --with-mic option). This is not desirable, kill + this behaviour. + +2014-01-30 + + * 284f25c2 src/components/lustre/linux-lustre.c + src/components/net/linux-net.c: Use correct specification for signed and + unsigned int A run of cppcheck showed that some mismatches between the + specfications for sscanf and the variables being used to store the values. + This corrects those minor issues. + +2014-02-04 + + * 291bad11 src/components/coretemp/linux-coretemp.c + src/components/coretemp_freebsd/coretemp_freebsd.c + src/components/host_micpower/linux-host_micpower.c...: Update the + domain/granularity of many components Many PAPI components only report + system wide events, here we attempt to match up entries in the .cmp_info + struct with reality by only allowing PAPI_GRAN_SYS and PAPI_DOM_ALL. + +2013-12-30 + + * eeefec5c src/ctests/attach2.c src/ctests/attach3.c: ctests/attach[2,3]: Fix + ptrace call for BSD "The SunOS man page describes ptrace() as "unique and + arcane", which it is." + + * 27d416c8 src/configure src/configure.in: Configure.in: Remove Bash-isms + from comp selection Part of a patch set by Gleb Smirnoff + + * 52f8979f src/configure src/configure.in: Configure.in: Correctly detect + FreeBSD OS version The script incorrectly parsed "FreeBSD 10" as "FreeBSD + 1". Part of a series of patches by Gleb Smirnoff diff --git a/ChangeLogP540.txt b/ChangeLogP540.txt new file mode 100644 index 0000000..4a545cc --- /dev/null +++ b/ChangeLogP540.txt @@ -0,0 +1,459 @@ +2014-11-13 + + * 8f524875 RELEASENOTES.txt: Prepare release notes for a 5.4.0 release + +2014-11-12 + + * a8b4613b man/man1/papi_avail.1 man/man1/papi_clockres.1 + man/man1/papi_command_line.1...: Rebuild the doxygen manpages + + * fbea4897 src/run_tests_exclude.txt: Remove omptough from standard + run_tests.sh testing On some platforms (e.g. some AMD machines), if + OMP_NUM_THREADS is not set, then this test does not complete in a reasonable + time. That is because on these platforms too many threads are spawned inside + a large loop. + +2014-11-11 + + * 23c2705b src/components/bgpm/CNKunit/linux-CNKunit.c + src/components/bgpm/IOunit/linux-IOunit.c + src/components/bgpm/IOunit/linux-IOunit.h...: Fix number of counters and + events for each of the 5 BGPM units as well as emon on BG/Q + +2014-11-07 + + * 93c69ded src/linux-timer.c: Patch linux-timer.c to provide cycle counter + support on aarch64 (64-bit ARMv8 architecture) Thanks to William Cohen for + this patch and the message below: --- The aarch64 has cycle counter available + to userspace and this resource should be made available in papi. --- This + patch is not tested by the PAPI team (no easily available hardware). + +2014-11-06 + + * 038b2f31 src/components/rapl/linux-rapl.c: Extension of the RAPL energy + measurements on Intel via msr-safe. + (https://github.com/scalability-llnl/msr-safe) msr-safe is a linux kernel + module that allows user access to a whitelisted set of MSRs. It is nearly + identical in structure to the stock msr kernel module, with the important + exception that the "capabilities" check has been removed. The LLNL sysadmins + did a security review for the whitelist. + + * 67e0b3f6 src/components/rapl/tests/rapl_basic.c: Fixed string null + termination. + +2014-10-30 + + * 2a1805ec src/components/perf_event/pe_libpfm4_events.c + src/components/perf_event/pe_libpfm4_events.h + src/components/perf_event/perf_event.c...: Patch to reduce cost of using + PAPI_name_to_code and add list of supported pmu names to papi_component_avail + output Thanks to Gary Mohr for this patch and its documentation: --- This + patch file contains code to look for either pmu names or component names on + the front of event strings passed to PAPI_name_to_code calls. If found the + name will be compared to values in each component info structure to see if + the component supports this event. If the pmu name or component name does + not match the values in the component info structure then there is no need to + call this component for this event. If the event string does not contain + either a pmu name or a component name then all components will be called. + This reduces the overhead in PAPI when converting event names to event codes + when either component names or pmu names are provided in the event name + string. To support the above checks, there is also code in this patch to add + an array of pmu names to the component info structure and modifications to + the core and uncore components to save the pmu names supported by each of + these components in this new array. This patch also adds code to the + papi_component_avail tool to display the pmu names supported by each active + component. --- + +2014-10-28 + + * a91db97b src/components/net/linux-net.c src/components/nvml/linux-nvml.c + src/components/perf_event/perf_event.c...: This patch file contains + additional changes to resolve defects reported by Coverity. Thanks to Gary + Mohr for this patch. ------ This patch file contains additional changes to + resolve defects reported by Coverity. Mostly these just make sure that + character buffers get null terminated so they can be used as C strings. + There is also a change in the RAPL component to improve the message to + identify why the component may have been disabled. ------ + +2014-10-22 + + * 3f913658 src/ctests/tenth.c: Fix percent error calculation in + ctests/tenth.c Thanks to Carl Love for this patch and the following + documentation: Do the division first then multiply by 100 when calculating + the percent error. This will keep the magnitude of the numbers closer. If + you multiply by 100 before dividing you may exceed the size the representable + number size. Additionally, by casting the values to floats then dividing we + get more accuracy in the calculation of the percent error. The integer + division will not give us a percent error less then 1. + + * ba5ef24a src/papi_events.csv: PPC64, fix L1 data cache read, write and all + access equations. Thanks to Carl Love for this patch and the following + documentation: The current POWER 7 equations for all accesses over counts + because it includes non load accesses to the cache. The equation was changed + to be the sum of the reads and the writes. The read accesses to the two + units, can be counted with the single event PM_LD_REF_L1 rather then counting + the events to the two LSU units independently. The number of reads to the L1 + must be adjusted by subtracting the misses as these become writes. Power 8 + has four LSU units. The same equations can be used since PM_LD_REF_L1 counts + across all four LSU units. + + * 882f5765 src/utils/native_avail.c: This patch file fixes two problems and + adds a performance improvement in "papi_native_avail. Thanks to Gary Mohr + for this patch and the following information: First it corrects a problem + when using the -i or -x options. The code was putting out too many event + divider lines (lines with all '-' characters). This has been corrected. + Second it improves the results from "papi_native_avail --validate" when being + used on SNBEP systems. This system has some events which require multiple + masks to be provided for them to be valid. The validate code was showing + these events as not available because it did not try to use the event with + the correct combination of masks. The fix checks to see if a valid form of + the event has not yet been found and if so then it tries the event with + specific combinations of masks that have been found to make these events + work. It also adds a check before trying to validate the event with a new + mask to see if a valid form of the event has already been found. If it has + then there is no need to try and validate the event again. + + * 94985c8a src/config.h.in src/configure src/configure.in...: Fix build error + when no fortan is installed Thanks to Maynard Johnson for this patch. Fix up + the build mechanism to properly handle the case where no Fortran compiler is + installed -- i.e., don't build or install testlib/ftest_util.o or the ftests. + +2014-10-16 + + * de05a9d8 src/linux-common.c: PPC64 add support for the Power non + virtualized platform Thanks to Carl Love for this patch and the following + description: The Power 8 system can be run as a non-virtualized machine. In + this case, the platform is "PowerNV". This patch adds the platform to the + possible IBM platform types. + + * 547f4412 src/ctests/byte_profile.c: byte_profile.c: PPC64 add support for + PPC64 Little Endian to byte_profile.c Thanks to Carl Love for this patch and + the following description: The POWER 8 platform is Little Endian. It uses + ELF version 2 which does not use function descriptors. This patch adds the + needed #ifdef support to correctly compile the test case for Big Endian or + Little Endian. This patch is untested by the PAPI developers (hardware not + easily accessible). + +2014-10-15 + + * 14f70ebc src/ctests/sprofile.c: PPC64 add support for PPC64 Little Endian + to sprofile.c Thanks to Carl Love for this patch and the following + description: The POWER 8 platform is Little Endian. It uses ELF version 2 + which does not use function descriptors. This patch adds the needed #ifdef + support to correctly compile the test case for Big Endian or Little Endian. + + * 6d41e208 src/linux-memory.c: PPC64 sys_mem_info array size is wrong Thanks + to Carl Love for this patch and the following description: The variable + sys_mem_info is an array of type PAPI_mh_info_t. It is statically declared + as size 4. The data for POWER8 is statically declared in entry 4 of the + array which is beyond the allocated array. The array should be declared + without a size so the compiler will automatically determine the correct size + based on the number of elements being initialized. This patch makes the + change. + + * 061817e0 src/papi_events.csv: Remove stray Intel Haswell events from Intel + Ivy Bridge presets Thanks to William Cohen for this patch and the following + description: 'Commit 4c87d753ab56688acad5bf0cb3b95eae8aa80458 added some + events meant for Intel Haswell to the Intel Ivy bridge presets. This patch + removes those stray events. Without this patch on Intel Ivy Bridge machines + would see messages like the following: PAPI Error: papi_preset: Error finding + event L2_TRANS:DEMAND_DATA_RD. PAPI Error: papi_preset: Error finding event + L2_RQSTS:ALL_DEMAND_REFERENCES.' This patch was not tested by the PAPI team + (no appropriate hardware). + +2014-10-14 + + * 8bc1ff85 src/papi_events.csv: Update papi_events.csv to match libpfm + support for Intel family 6 model 63 (hsw_ep) Thanks to William Cohen for + this patch and its information: 'A recent September 11, 2014 patch (98c00b) + to the upstream libpfm split out Intel family 6 model 63 into its own name of + "hsw_ep". The papi_events.csv needs to be updated to support that new name. + This should have no impact for older libpfms that still identify Intel family + 6 model 63 as "hswv" and "hsw_ep" map to the same papi presets.' + + * 32a8b758 src/papi_events.csv: Support for the ARM X-Gene processor. Thanks + to William Cohen for this patch. The events for the Applied Micro X-Gene + processor are slightly different from other ARM processors. Thus, need to + define those presets for the X-Gene processor. Note: This patch is not + tested by the PAPI team because we do not have the appropriate hardware. + + * 0a97f54e src/components/perf_event/pe_libpfm4_events.c + .../perf_event_uncore/peu_libpfm4_events.c src/papi_internal.c...: Thanks to + Gary Mohr for the patch --------------------- Fix for bugs in + PAPI_get_event_info when using the core and uncore components: + PAPI_get_event_info returns incorrect / incomplete information. The errors + were in how the code handled event masks and their descriptions so the errors + would not lead to program failures, just the possibility of incorrect + labeling of output. --------------------- + +2014-10-09 + + * 77960f71 src/components/perf_event/pe_libpfm4_events.c: Record + encode_failed error by setting attr.config to 0xFFFFFFF. This causes any + later validate attempts to fail, which is the desired behavior. Note: as of + 2014/10/09 this has not been thoroughly tested, since a failure case is not + known. This patch simply copies a fix that was applied to the + perf_event_uncore component. + +2014-09-25 + + * 00ae8c1e src/components/perf_event_uncore/peu_libpfm4_events.c: Based on + Gary Mohr's suggestion. If an event fails when we try to add it + (encode_failed), then we note that error by setting attr.config = 0xFFFFFF + for that event. Then, if there is a later check to validate this event, the + check will correctly return an error. + + * 3801faaf src/utils/native_avail.c: Adding the NativeAvailValidate patch + provided by Gary Mohr. The problem being addresed is that if there were any + problems validating event masks, then those problems would result in the + entire event being invalid. The desired action was to test each event mask, + and if any basic event mask can make the event succeed, then the event should + be returned as valid and available. The solution is to create a large buffer + and write events and masks into this buffer as they are processed, tracking + their validity. At the end go back and mark the validity of the entire + event. This matches the standard output of PAPI. + +2014-09-24 + + * 6abc8196 src/components/emon/README src/components/emon/Rules.emon + src/components/emon/linux-emon.c: Emon power component for BG/Q + +2014-09-23 + + * 62b9f2a9 .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore.c: + Check scheduability of events Patch by Gary Mohr, ------------------- This + patch file adds code to the uncore component to check and make sure that the + events being opened from an event set can be scheduled by the kernel. This + kind of code exists in the core component but was not moved into the uncore + component because it was felt that it would not be an issue with uncore. + Turns out the kernel has the same kind of issues when scheduling uncore + events. The symptoms of this problem will show that the kernel will report + that all events in the event set were opened successfully but when trying to + read the results one (or more) of the events will get a read failure. Seen + in the traces and on stderr (if papi configured with debug=yes) as a "short + read" error. The logic is slightly different than what is in the core + component because the events in the core component are grouped and the ones + in the uncore are not grouped. When events are grouped, you only need to + enable/disable and read results on the group leader. But when they are not + grouped you need to do these operations on each event in the event set. + +2014-09-19 + + * 8e6bf887 src/components/cuda/linux-cuda.c + src/components/infiniband/linux-infiniband.c + src/components/lustre/linux-lustre.c...: Address coverity defects in + src/components Thanks Gary Mohr ---------------- This patch file contains + fixes for defects reported by Coverity in the /src/components directory. + Mostly these changes just make sure that char buffers get null terminated so + that when they get used as a C string (they usually do) we will not end up + with unpredictable results. A problem has been reported by one of our + testers that the lustre component produced very long event names with lots of + unprintable garbage in the names. It turns out this was caused by a buffer + that filled up and never got null terminated. Then string functions were + used on the buffer which picked up the whole buffer and lots more. These + changes fixed the problem. + + * 266c61a4 src/linux-common.c src/papi_hl.c src/papi_internal.c...: Address + coverity reported issues in src/ Thanks to Gary Mohr ------------------- + Changes in this patch file: linux-common.c: Add code to insure that cpu + info vendor_string and model_string buffers are NULL terminated strings. + Also insure that the value which gets read into mdi->exe_info.fullname gets + NULL terminated. This makes it safe to use the 'strxxx' functions on the + value (which is done immediately after it is read in). papi_hl.c: Fix call + to _hl_rate_calls() where the third argument was not the correct data type. + papi_internal.c: Add code to insure that event info name, short_desc, and + long_desc buffers are NULL terminated strings. papi_user_events.c: While + processing define symbols, insure that the 'local_line', 'name', and 'value' + buffers get NULL terminated (so we can safely use 'strxxx' functions on + them). Insure that the 'symbol' field in the user defined event ends up NULL + terminated. Rearrange code to avoid falling through from one case to the next + in a switch statement. Coverity flagged falling out the bottom of a case + statement as a potential defect but it was doing what it should. + sw_multiplex.c: Unnecessary test. The value of ESI can not be NULL when + this code is reached. x86_cpuid_info.c: The variable need_leaf4 is set but + not used. The only place it gets set returns without checking its value. + The place that checks its value never could have set its value non-zero. + +2014-09-12 + + * d72277fc release_procedure.txt: Update release procedure, check buildbot! + +2014-09-08 + + * a0e4f9a7 src/components/perf_event_uncore/perf_event_uncore.c: Uncore + component fix: By Gary Mohr, The line that sets exclude_guests in the uncore + component is there because it also there in the core component. But when I + look at it closer, it is an error in both cases. I will submit a patch to + remove them and get rid of some commented out code that no longer belongs in + the source. The uncore events do not support the concept of excluding the + host or guest os so we should never set either bit. But the core events do + support this concept and libpfm4 provides event masks "mg" and "mh" to + control counting in these domains. By default if neither is set then libpfm4 + excludes counting in the guest os, if either "mh" or "mg" is provided as an + event mask then it is counted but the other is excluded, and if both are + provided then both are counted. So when the code forces the exclude_guest + bit to be set it breaks the ability to fully control what will happen with + the masks. I did not notice the uncore part of this problem when testing on + my SNB system, probably because we use an older kernel which tolerated the + bit being set (or maybe because HSW is handled differently). + +2014-09-02 + + * 4499fee7 src/papi_internal.c: Thanks to Gary Mohr for the patch + --------------------- Fix in papi_internal.c where it was trying to look up + the event name. The RAPL component found the event and returned a code but + papi_internal.c exited the enum loop for that component but failed to exit + the loop that checks all of the components. This caused it to keep looking + at other components until it fell out of the outer loop and returned an + error. In addition to the actual change, some formatting issues were fixed. + --------------------- + + * f5835c26 src/components/perf_event/perf_event_lib.h: Bump NUM_MPX_COUNTERS + for linux-perf Uncore on SNB and newer systems has enough counters to go + beyond the 64 array spaces we allocate. This needs a better long term + solution. Reported by Gary Mohr --------------------- When running on snbep + systems with the uncore component enabled, if papi is configured with + debug=yes then the message "Warning! num_cntrs is more than num_mpx_cntrs" + gets written to stderr. This happens because the snbep uncore pmu's have a + total of 81 counters and PAPI is set to only accept a maximum of 64 counters. + This change increases the amount PAPI will accept to 100 (prevents the + warning message from being printed). --------------------- + + * 07990f85 src/ctests/branches.c src/ctests/calibrate.c + src/ctests/describe.c...: ctests/ Address coverity reported defects Thanks + to Gary Mohr for the patch --------------------------------- he contents of + this patch file fix defects reported by Coverity in the directory + 'papi/src/ctests'. The defect reported in branches.c was that a comparison + between different kinds of data was being done. The defect reported in + calibrate.c was that the variable 'papi_event_str' could end up without a + null terminator. The defects reported in describe.c, get_event_component.c, + and krentel_pthreads.c were that return values from function calls were being + stored in a variable but never being used. I also did a little clean-up in + describe.c. This test had been failing for me on Intel NHM and SNBEP but now + it runs and reports that it PASSED. --------------------------------- + +2014-08-29 + + * 74cb07df src/testlib/test_utils.c: testlib/test_util.c: Check enum return + value Addresses an issue found by Coverity. Thanks Gary Mohr, + ---------------- The changes in this patch file fixes the only defect in the + src/testlib directory. The defect reported that the return value from a call + to PAPI_enum_cmp_event was being ignored. This call to enum events is to get + the first event for the component index passed into this function. It turns + out that the function that contains this code is only ever called by the + overflow_allcounters ctest and it only calls once and always passes a + component index of 0 (perf_event). So I added code to check the return value + and fail the test if an error was returned. ---------------- + + * 74041b3e src/utils/event_info.c: event_info utility: address coverity + defect From Gary Mohr -------------- This patch corrects a defect reported + by Coverity. The defect reported that the call to PAPI_enum_cmp_event was + setting retval which was never getting used before it got set again by a call + to PAPI_get_event_info. After looking at the code, I decided that we should + not be trying to get the next event inside a loop that is enumerating masks + for the current event. It makes more sense to break out of the loop to get + masks and let the outer loop that is walking the events get the next event. + -------------- + +2014-08-28 + + * 62dceb9b src/utils/native_avail.c: Extend 'papi_native_event --validate' to + check for umasks. + +2014-08-27 + + * a5c2beb2 src/components/perf_event/pe_libpfm4_events.c + src/components/perf_event/pe_libpfm4_events.h + src/components/perf_event/perf_event.c...: perf_event[_uncore]: switch to + libpfm4 extended masks Patch due to Gary Mohr, many thanks. + ------------------------------------ This patch file contains the changes to + make the perf_event and perf_event_uncore components in PAPI use the libpfm4 + extended event masks. This adds a number of new masks that can be entered + with events supported by these components. They include a mask 'u' which can + be used control if counting in the user domain should be enabled, a mask 'k' + which does the same for the kernel domain, and a mask 'cpu' which will cause + counting to only occur on a specified cpu. There are also some other new + masks which may work but have not been tested yet. + ------------------------------------ + +2014-08-20 + + * e76bbe66 src/components/perf_event/pe_libpfm4_events.c + src/components/perf_event/perf_event.c + .../perf_event_uncore/perf_event_uncore.c...: General code cleanup and + improved debugging Thanks to Gary Mohr ------------------- This patch file + does general code cleanup. It modifies the code to eliminate compiler + warnings, remove defects reported by coverity, and improve traces. + +2014-08-11 + + * 8f2a1cee src/utils/error_codes.c: error_codes utility: remove internal bits + Remove dependency on _papi_hwi_num_errors, just keep calling PAPI_strerror + until it fails. We shouldn't be using internal symbols anyways. + +2014-08-04 + + * a7136edd src/components/nvml/README: Update nvml README We changed the + options to simplify the configure line. Bad information is worse than no + information... + +2014-07-25 + + * a37160c1 src/components/perf_event/perf_event.c: perf_event.c: cleanup + error messages Thanks to Gary Mohr ------------------- This patch contains + general cleanup code. Calls to PAPIERROR pass a string which does not need + to end with a new line because this function will always add one. New lines + at the end of strings passed to this function have been removed. These + changes also add some additional debug messages. + +2014-07-24 + + * bf55b6b7 src/papi_events.csv: Update HSW presets Thanks to Gary Mohr + ------------------- Previously we sent updates to the PAPI preset event + definitions to improve the preset cache events on Haswell processors. In + checking the latest source, it looks like the L1 cache events changes did not + get applied quite right. Here is a patch to the latest source that will make + it the way we had intended. + + * eeaef9fa src/papi.c: papi.c: Add information to API entry debuging Thanks + to Gary Mohr ------------------- This patch contains the results of taking a + second pass to cleanup the debug prints in the file papi.c. It adds entry + traces to more functions that can be called from an application. It also + adds lots of additional values to the trace entries so that we can see what + is being passed to these functions from the application. + +2014-07-23 + + * ee736151 src/run_tests.sh: run_tests.sh: more exclude cleanups Thanks Gary + Mohr ---------------- This patch removes an additional check for Makefiles in + the script. The exclude files are now used to prevent Makefiles from getting + run by this script. I missed this one when providing the previous patch to + make this change. + + * c37afa23 src/papi_internal.c: papi_internal.c: change SUBDBG to INTDBG + Thanks to Gary Mohr ------------------- This patch contains changes to + replace calls to SUBDBG with calls to INTDBG in this source file. This + source file should be using the Internal debug macro rather than the + Substrate debug macro so that the PAPI debug filters work correctly. These + changes also add some new debug calls so that we will get a better picture of + what is going on in the PAPI internal layer. There are a few calls to the + SUBDBG macro that are in code that I have modified to add support for new + event level masks which are not converted by this patch. They will be + corrected when the event level mask patch is provided. + + * e43b1138 src/utils/native_avail.c: native_avail.c: Bug fixes and updates + Thanks to Gary Mohr -------------------------------------------------- This + patch fixes a couple of problems found in the papi_native_avail program. + First change fixes a problem introduced when the -validate option was added. + This option causes events to get added to an event set but never removes + them. This change will remove them if the add works. This change also fixes + a coverity detected error where the return value from PAPI_destroy_eventset + was being ignored. Second change improves the delimitor check when + separating the event description from the event mask description. The + previous check only looked for a colon but some of the event descriptions + contain a colon so descriptions would get displayed incorrectly. The new + check finds the "masks:" substring which is what papi inserts to separate + these two descriptions. Third change adds code to allow the user to enter + events of the form pmu:::event or pmu::event when using the -e option in the + program. + diff --git a/ChangeLogP541.txt b/ChangeLogP541.txt new file mode 100644 index 0000000..03fbcf3 --- /dev/null +++ b/ChangeLogP541.txt @@ -0,0 +1,201 @@ +2015-03-02 + + * bcc508a9 src/components/perf_event/pe_libpfm4_events.c: Thanks much to Gary + Mohr for the patch: This patch fixes a problem in the perf_events component + that could cause get event info to produce incorrect results. The problem was + reported by Harold Servat and occurs when the functions + PAPI_event_name_to_code and PAPI_get_event_info are called for an event with + a mask (name:mask) and then called again for the event without a mask (name). + When this is done the second call to PAPI_get_event_info will incorrectly + return the event name and mask from the first call (name:mask). This patch + also corrects a problem found with valgrind which was causing memory on the + heap to get stranded. We were passing a char **event_string to the libpfm4 + encode function and he was allocating some memory and giving us back a + pointer to the allocated space. The code in PAPI was responsible for freeing + this space but failed to do so. After looking closer at the PAPI code, it + does not need the information returned in this space so the patch changes the + code to not ask for the information so that libpfm4 no longer allocates heap + space. + + * 62e90303 src/Makefile.inc src/configure src/configure.in...: Generating + pkg-config files for papi Thanks to William Cohen for this patch (and to + Phil Mucci for the patch review). Some software makes use of pkg-config + (http://www.freedesktop.org/wiki/Software/pkg-config/) when using libraries. + pkg-config selects compiler flags and libraries for compiling user code based + on the installation location of the package. It could make it a bit easier to + build other software on papi by abstracting where the libraries are + installed. Rather than having some complicated path to the installed + library, users could use "pkg-config --libs --cflags papi" to get that + information for the compile. If there are multiple versions of papi + available on the machine, the user could get a particular one with something + like "pkg-config --libs --cflags papi-5.4.0". + +2015-02-28 + + * f6bc16c6 src/papi_events.csv: Add support for ARM 1176 cpus This is the + chip in the original Raspberry Pi. With the recently released Raspberry Pi + 3.18.8 kernel perf_event support is finally enabled by default. + +2015-02-27 + + * 74801065 src/papi_events.csv: Add ARM Cortex A7 support. Tested on a + Raspberry Pi 2 board. + +2015-02-25 + + * 71e6e5e5 src/ctests/krentel_pthreads.c: Sync thread exit in + krental_threads.c Thanks to William Cohen for this patch and to Phil Mucci + for approving it. William Cohnen and Michael Petlan noticed that this test + can have threads dangling after the main thread is done. This patch tracks + the created threads and ensures that they are joined before the code exits. + Note: There is still some problem remaining. For example, the following test + will sometimes (maybe 1 of 10 runs) generate an error message. > + ./ctests/krentel_pthreads 8 2000 10 .... [10] time = 8, count = 38110, iter = + 20, rate = 1905500.0/Kiter PAPI Error: thread->running_eventset == NULL in + _papi_pe_dispatch_timer for fd 14!. [0] time = 8, count = 38161, iter = 20, + rate = 1908050.0/Kiter krentel_pthreads.c PASSED + +2015-02-20 + + * c0de16d8 INSTALL.txt: Added additional notes and examples for the MIC. + Specify how to use qualifiers to set exclude_guest and exclude_host bits to + 0. Use micnativeloadex to run the utilites. + +2015-02-11 + + * 65825ef7 src/utils/native_avail.c: Change papi_native_avail to refer to + event qualifiers (qual) rather than event masks. Thanks to Gary Mohr for + this patch and the following notes. This patch file fixes one bug and + replaces the term "Unit Mask" and other names used to identify a unit mask + with the term "event qualifier". This renaming was done because the term + "Unit Mask" has a very specific meaning in the hardware. Many of the flags + and other fields we can now provide with an event to control how it is + counted have nothing to do with the unit masks defined in the manuals + provided by the hardware vendors. Summary of what changed: Removed the -d + command line argument. It controlled if units should be displayed in output. + Now we always display units if they are defined (only place I have seen them + defined is with rapl events). Fixed bug when displaying event units. It was + displaying the units information in front of the event name and description. + It now displays the units information after the description. Renamed the + -noumasks argument to -noqual. This prevents event qualifiers (previously + known as unit masks) from being displayed. Replaced headings "Unit Mask" and + "Mask Name" with "Qualifiers" and "Name" (when displaying a single event). + +2015-02-10 + + * 91e36312 src/ctests/Makefile.recipies src/ctests/attach_cpu.c: Test case + for attaching an eventset to a single CPU rather than a thread (attach_cpu) + Thanks to Gary Mohr for this contribution. This patch adds a test case to + demonstrate how to attach an event set to a cpu so that the event counts for + events in that event set reflect how many of those events occurred on the + attached cpu (instead of the number of events that occurred in a thread of + execution). See comments in attach_cpu.c to see how and why to probe with + specific cpus (e.g. ./attach_cpu 3). + +2015-02-02 + + * 1fc57875 src/components/cuda/Makefile.cuda.in src/components/cuda/README + src/components/cuda/Rules.cuda...: Updated CUDA component supporting multiple + GPUs and multiple CUDA contexts. This PAPI CUDA component uses the CUPTI + library to get information about the event counters. NOTE: To use this PAPI + CUDA component, there is a difference from standard PAPI usage. When adding + PAPI events to the CUDA component, each event needs to be added from the + correct CUDA context. To repeat, for each CUDA device, switch to that device + and add the events relevant to that device! If there is only one CUDA + device, then the default context will be used and things should work as + before. + + * 40151180 src/ftests/Makefile: Reported by Mark Maurice: On linux systems + without a fortran compiler installed we get an error when building the PAPI + fortran tests. The reason for the error is that in the Makefile in the + ftests directory the @echo lines start with spaces instead of tabs. 'make' is + fussy about tabs and spaces and gives a 'missing separator' error if a + command starts with spaces instead of a tab. + +2015-01-20 + + * 1dec8a9d src/components/lustre/linux-lustre.c: Thanks to Gary Mohr for the + patch:The patch provided solves the segmentation faults produced by the + lustre component.The changes done by the patch are in the + _lustre_shutdown_component() by adding lustre_native_table=NULL statement and + Later num_events=0 and table_size=32 were added in the same function to fully + solve the segmentation faults + +2014-12-17 + + * aba85b18 man/man1/PAPI_derived_event_files.1 man/man1/papi_avail.1 + src/Makefile.inc...: User defined events: Enhance PAPI preset events allow + user defined events via a user event definition file. Thanks to Gary Mohr + for this patch and its documentation. + -------------------------------------------------------- This patch file + enhances the code that processes PAPI preset event definition files + (papi_events.csv) so that it can also now be used to process a user provided + event definition file. PAPI still looks for an environment variable + 'PAPI_USER_EVENTS_FILE' and if found uses its value as the pathname of the + user event definition file to process (same behavior as before). The change + is that this is done right after processing the PAPI preset events rather + than at the end of PAPI_library_init (after all components were initialized). + An advantage of using this approach is that now user defined events, like + preset events, can define multiple versions of the same event where each + version is customized to a particular hardware platform (or pmu name). The + code which processes preset events was also enhanced in the following ways: + The papi_avail command was updated to also list user defined events in its + output. The papi_avail help and man page have been updated to include user + defined events in the descriptions. The man page was also updated to add a + "see also" reference to a new 'PAPI_derived_event_files' man page. A new + 'PAPI_derived_event_files' man page to provide the user information about how + to build an event definition file has been added. This patch file contains + both the source file changes (needed by doxygen) and updated copies of the + man pages created by doxygen. The code now allows both postfix (Reverse + Polish Notation) and infix (algebraic) formulas to be entered. There is a new + derived event type 'DERIVED_INFIX' to specify that the formula is provided in + the algebraic format. The formulas will always be converted to postfix + format as part of the event definition processing so if the user does a + 'papi_avail -e ' later it will always be displayed as a postfix + formula. When defining a new derived event (either preset or user defined), + it is now possible to use any already known native event, preset event or + user defined event. This means that new derived events can be created as a + relationship between other already known (their definitions had to already be + processed) derived events. When derived events are created, there is a list + of native events needed by that defined event created and optionally a + formula to compute the derived events value. If a new derived event is + created that depends on another derived event, then the new event will + inherit all the native events used by the event it depends on and the new + derived events formula will be merged with the formula from the event it + depends on (if there was one or if it had an implied formula like derived add + or sub). This means that after event definition processing completes the + event tables inside PAPI always contain the list of all native events needed + to compute the derived events results and a postfix formula that will be used + to compute the events result. So if a user does a 'papi_avail -e + ', the output will show what events PAPI is going to count and how + they will be used to generate the events final value. A new command 'EVENT' + has been added to the code which is intended to be used for user defined + events. It is identical to the existing command 'PRESET' used to define + preset events. They are interchangeable and both can be used in both preset + and user defined event definition files. The code now allows the user to + provide a short and long description for the derived event. The event + definition commands 'PRESET' and 'EVENT' now support tags of "LDESC" and + "SDESC" to identify what is found in the following string. This was done the + same way as the already supported 'NOTE' tag. These changes do not support + the ability to create #define variables that can then be used in event + definition formulas. This was supported by the old user event definition + code. These changes delete the existing papi_user_event code (two files that + are no longer needed). + +2014-12-15 + + * f8b722a9 src/components/perf_event/tests/event_name_lib.c: perf_event + tests: add sample haswell offcore event + +2014-12-11 + + * adbae8cd src/papi_events.csv: Update presets for Intel Haswell and + Haswell-EP (according to the updates of the libpfm4 event table for Intel + Haswell and Haswell-EP). These mods have not been tested due to lacking + access to an Intel Haswell system. + +2014-11-14 + + * ca1ba786 doc/Doxyfile-common papi.spec src/Makefile.in...: Bump master to + 5.4.1, we just released out of the stable-5.4 branch. + diff --git a/ChangeLogP543.txt b/ChangeLogP543.txt new file mode 100644 index 0000000..463ab96 --- /dev/null +++ b/ChangeLogP543.txt @@ -0,0 +1,305 @@ +2016-01-25 + + * d779d1172a6e4c73b5ece9939c4d067c2b3d7b8d Update libpfm4 current + with Jan 25 08:33:02 2016 version. + +2016-01-07 + + * 0d9776b8 src/components/stealtime/linux-stealtime.c: Free allocated memory + in the stealtime component when component is shutdown Thanks to William + Cohen for contributing this patch and the following explaination: Running + examples with "valgrind --leak-check=full ..." showed a number of items + allocated by the stealtime component were not freed when PAPI_shutdown() was + called. This patch frees those unused memory allocations. + +2016-01-06 + + * de40668c src/papi_preset.c: Fixed memory leak in papi_preset.c by updating + the infix_to_postfix function. Thanks to William Cohen for discovering the + leak. The infix_to_postfix function was re-written and tested using user + defined events. + +2015-12-30 + + * db37e115 src/utils/avail.c src/utils/native_avail.c: Added "-check" flag to + papi_avail and papi_native_avail to test counter availability/validity" This + patch updates the papi_avail and papi_native_avail utilities to use the + "-check" flag to test the actual availability of counters. There were + previously two different flags for this capability, papi_native_avail used + "-validate" and papi_avail used "-avail_test". Based on a mailing list + discussion these flags have been consolidated as "-check". + +2015-12-29 + + * 72e0ffe8 src/components/lmsensors/linux-lmsensors.c: Fixed minor error with + multiple initializers for lmsensors_vector .default_granularity. Thanks to + William Cohen for the bug report. + +2015-12-07 + + * ec3582d8 src/utils/avail.c: papi_avail to test actual availability of + counters using "papi_avail --avail-test" This problem and the associated + patch were detected and contributed by Harald Savat. Thanks. On an Intel(R) + Xeon(R) CPU E5-2660 v2 @ 2.20GHz system with PAPI 5.4.1 installed The + papi_avail command indicates that both PAPI_LD_INS and PAPI_SR_INS are + available, however the papi_event_chooser does not accept them (see below) + and return -1. This problem can occur in kernels from version 3.1 till 4.1. + The kernel devs blocked all uses of the MEM_OPS events (including load and + store). The patch modifies papi_avail to test the counters to see if they + can be added. papi_avail # gets all PAPI counters papi_avail -a # gets + all available PAPI counters papi_avail -at # shows all available PAPI + counters that can be added [Ptools-perfapi: Oct 14 2015] + +2015-11-30 + + * 1fab922e src/components/libmsr/README src/components/libmsr/configure + src/components/libmsr/configure.in...: The libmsr component is updated to + match major changes in LLNL libmsr library and the LLNL msr-safe kernel + module + +2015-11-18 + + * 242b16d3 src/Makefile.inc src/components/cuda/configure + src/components/cuda/configure.in...: Added papi_cuda_sampling utility in + /src/components/cuda/sampling changed src/Makefile.inc , + src/components/cuda/configure.in to build the utiltiy during PAPI + installation Added in /src/components/cuda/tests/Makefile which is -ldl + switch because 3.10.0-229.14.1.el7.x86_64 had issues using libpapi.a during + compilation of cuda component test programs + +2015-10-21 + + * a10e8331 src/papi_events.csv: papi_events: add Intel Skylake presets This + just shares all of teh broadwell events with skylake. Some quick tests show + that this probably works. Someone with skylake hardware should validate this + at some point. + +2015-10-08 + + * 91736851 src/papi_internal.c: Thanks to David Eberius of ICL for reporting + a bug in PAPI_get_event_info() in papi_internal.c, (info->component_index = + (unsigned int) cidx) was missing at line 2554, of papi_internal.c + +2015-08-27 + + * 502df070 src/Makefile.inc: Thanks to Steve Kaufmann for reporting about the + redundant () paramater in the OBJECTS expression of src/Makefile.inc file. + Updated Makefile.inc by removing the redundant paramater + +2015-08-24 + + * 69fdc2e0 src/papi.c: Thanks to Harald Servat for reporting the + PAPI_overflow issue for multiple eventsets. The problem was in the + PAPI_start() function in the branch at line-2166:papi.c , if(is_dirty). After + update_control_state(), it is required to re-initialize the overflow settings + using set_overflow() + +2015-07-29 + + * be81dc43 src/components/perf_event/perf_event.c: perf_event: update the ARM + domain workaround older ARM processors could not separate out KERNEL vs USER + events. ARMv7 starting with the Cortex A15 can, as can all ARMv8 (ARM64). + This updates the code with a whitelist to properly allow setting the domains. + + * 43be2588 src/linux-common.c: linux-common: clean up ARM cpu detection + Parsing cpuinfo is always a pain. Extra work because of Raspberry Pi + (ARM1176) lying and saying it's ARMv7 rather than ARMv6. + + * 5a101a50 src/linux-common.c: linux-common: split up x86, power and arm + cpuinfo parsing + + * 0d7772d9 src/linux-common.c: linux-common: clean up and comment the cpuinfo + parsing code + +2015-07-16 + + * 59489b1f src/components/libmsr/Makefile.libmsr.in + src/components/libmsr/README src/components/libmsr/Rules.libmsr...: Create + libmsr component for reading power information and writing power constraints + using MSRs on some Intel processsors The PAPI libmsr component supports + measuring and capping power usage on recent Intel architectures using the + RAPL interface exposed through MSRs (model-specific registers). Lawrence + Livermore National Laboratory has released a library (libmsr) designed to + provide a simple, safe, consistent interface to several of the model-specific + registers (MSRs) in Intel processors. The problem is that permitting open + access to the MSRs on a machine can be a safety hazard, so access to MSRs is + usually limited. In order to encourage system administrators to give wider + access to the MSRs on a machine, LLNL has released a Linux kernel module + (msr_safe) which provides safer, white-listed access to the MSRs. PAPI has + created a libmsr component that can provide read and write access to the + information and controls exposed via the libmsr library. This PAPI component + introduces a new ability for PAPI; it is the first case where PAPI is writing + information to a counter as well as reading the data from the counter. + +2015-07-13 + + * d326ecc9 src/components/perf_event/perf_event_lib.h src/papi_internal.c: + Thanks to Steve Kaufman for providing a patch that increases the + PERF_EVENT_MAX_MPX_COUNTERS to 192 from 128 and enhances the corresponding + warning message in papi_internal.c + +2015-06-29 + + * e829baa5 src/components/cuda/tests/Makefile + src/components/cuda/tests/cuda_ld_preload_example.README + src/components/cuda/tests/cuda_ld_preload_example.c: Example of using + LD_PRELOAD with the CUDA component. A short example of using LD_PRELOAD on a + Linux system to intercept function calls and PAPI-enable an un-instrumented + CUDA binary. Several CUDA events (e.g. SM PM counters) require a CUcontext + handle to be a provided since they are context switched. This means that we + cannot use a PAPI_attach from an external process to measure those events in + a preexisting executable. These events can only be measured from within the + CUcontext, that is, within the CUDA enabled code we are trying to measure. + If the user is unable to change the source code, they may be able to use + LD_PRELOAD's ability to trap functions and measure the events for within the + executable. See src/components/cuda/tests/cuda_ld_preload_example.README for + details. + +2015-06-26 + + * 0829a4f5 src/papi_events.csv: Add future broadwell-ep support. libpfm4 + doesn't support it yet, but add it for when it appears. + +2015-06-25 + + * 36c5b5b6 src/papi_events.csv: add broadwell predefined events For now they + are the same as Haswell, as that's what the Linux kernel does. + + * f42eda64 src/papi_events.csv: Added definitions to Power8 for PAPI_SP_OPS, + PAPI_DP_OPS. + +2015-06-18 + + * f87542f7 src/components/perf_event/tests/event_name_lib.c: Added [case 63: + /*Haswell EP*/] line the src/components/perf_event/tests/event_name_lib.c + file to support offcore for haswell EP + + * fbfc641f src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c + src/components/perf_event_uncore/tests/perf_event_uncore_lib.c: Added suuport + for Haswell-EP processor with model-63 in + src/components/perf_event_uncore/tests/perf_event_uncore_lib.c and + src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c files. As a + result perf_event_uncore, perf_event_uncore_multiple and + perf_event_uncore_cbox tests get passed. Tested and verified on Intel(R) + Xeon(R) CPU E5-2650 v3 @ 2.30GHz with linux kernel 4.0.4-1.el6.elrepo.x86_64 + +2015-06-17 + + * 56698211 src/components/lustre/linux-lustre.c: Thanks to Garry Mohr for the + patch that removes the error message (PAPI Error: Error Code -7,Event does + not exist) on executing papi_native_avail in PAPI built with lustre component + +2015-06-16 + + * 1b9fd867 src/components/rapl/linux-rapl.c: rapl: allow DRAM to have + separate scaling factor from CPU on Haswell-EP the DRAM scaling value is + different and cannot be detected. See https://lkml.org/lkml/2015/3/20/582 + + * 1aa74f85 src/components/rapl/linux-rapl.c: rapl: add support for Broadwell + +2015-06-11 + + * a5ecda79 src/components/rapl/linux-rapl.c: Thanks to William Cohen for the + patch which does the following: Checking the cpu family and module number is + not sufficient to determine whether RAPL can be used. If the papi is running + inside a guest VM, the MSR used by the PAPI RAPL component may not be + available. There should be a simple read test to verify the RAPL MSR + registers are available. This allows the component to more clearly report + that RAPL is unsupported rather than just exiting program when the RAPL + +2015-05-19 + + * 54c45107 src/components/rapl/utils/rapl_plot.c: Updated rapl_plot utility + so that the correct values/units are reported (e.g. scaled and fixed value + counts should not be converted) + +2015-05-04 + + * a34fbc62 src/papi_events.csv: papi_events.csv: typo in the ARM Cortex A53 + definitions + +2015-04-30 + + * caa3af72 src/papi_events.csv: papi_events.csv: add preset events for ARM + Cortex A53 This is based purely on the names in the libpfm4 output, these + were not validated in any way. + +2015-04-20 + + * 66553715 INSTALL.txt: added compile incantation for compiling programs that + offload code to MIC + +2015-04-16 + + * 8914dcfc src/papi_events.csv: Bug reported by William Cohen in + papi_events.csv for the event PAPI_L1_TCM + +2015-03-31 + + * 023af5ec src/components/nvml/configure: Updated the NVML configure script + which requires an autoconf and an updated configure script + + * 2385c1b2 src/components/nvml/Makefile.nvml.in + src/components/nvml/Rules.nvml src/components/nvml/configure.in: Updated the + NVML configure script to allow separate include and library paths + +2015-03-30 + + * 3d509095 src/components/infiniband_umad/linux-infiniband_umad.c: Bugfix + linux-infiniband_umad.c to include linux-infiniband_umad.h rather than + linux-infiniband.h. Thanks to Aurelien Bouteiller for pointing out this bug. + + * b865f227 src/components/vmware/vmware.c: Corrected function name in + _vmware_vector from _vmware_init to _vmware_init_thread. + +2015-03-24 + + * 2f58a4d8 src/configure: Regenerated configure to match the PAPI_GRN_SYS + patch + + * 12e6ef31 src/components/perf_event/tests/perf_event_system_wide.c: Support + PAPI_GRN_SYS granularity for perf component, updating the system wide test + (patch 2 of 2). Thanks to William Cohen for this patch and the documentation + Make sure that a sane cpu number is selected with PAPI_GRN_SYS Corrections + to output and comments of perf_event_system_wide.c test + + * 42879693 src/components/perf_event/perf_event.c + src/components/perf_event/tests/perf_event_system_wide.c src/config.h.in...: + Support PAPI_GRN_SYS granularity for perf component, picking a sane CPU + number (patch 1 of 2). Thanks to William Cohen for this patch and the + documentation The checks in perf_event_open syscall cause a failure when + both pid=-1 and cpu=-1. The perf_event component was passing in pid=-1 and + cpu=-1 when PAPI_GRN_SYS was selected. If possible, the code should pick the + current processor that the command is running so that the permission check + works properly when PAPI_GRN_SYS is used. The patch also adds a test fail if + PAPI_GRN_SYS unable to add PAPI_TOT_CYC. + + * 0ab9b0c8 src/ctests/krentel_pthreads.c: Added call to unregister the + overflow handler.. plus small code cleanup + +2015-03-05 + + * d886c49c src/papi.c src/papi_libpfm4_events.c src/utils/avail.c: Clean + output from papi_avail tools when there are no user defined events Thanks to + Gary Mohr for this patch. The changes in this patch improve the output from + the papi_avail tool. It was printing the user defined events header and a + PAPI Error message when no user defined events existed. These changes add + code in the enum call to return an error when trying to fetch the first user + defined event if no user events are defined. This allows the application to + detect that no user events are known and skip printing the user defined event + heading. It also prevents the application from calling PAPI_get_event_info + with a user defined event code that does not exist which avoids the PAPI + Error message. Also a one line change to modify a debug message type to make + the debug output produced by papi_libpfm4_events.c consistent. + +2015-03-03 + + * ee0c58d7 src/components/cuda/linux-cuda.c: Do not generate an error if the + CUDA libraries cannot be loaded, just write a debug message + + * 08bb9bf0 src/configure: Updating the number to 5.4.1 + +2015-03-02 + + * 01f742c1 release_procedure.txt: Minor change to specify locations of some + files diff --git a/ChangeLogP550.txt b/ChangeLogP550.txt new file mode 100644 index 0000000..d47fe5f --- /dev/null +++ b/ChangeLogP550.txt @@ -0,0 +1,235 @@ +2016-09-08 + + * dfa52d3f man/man1/PAPI_derived_event_files.1 man/man1/papi_avail.1 + man/man1/papi_clockres.1...: Generated man files for release + +2016-08-18 + + * 43c1be67 src/ctests/all_native_events.c: ctests all_native: Make sure we + count all native events for KNL. + + * adc47828 src/components/perf_event_uncore/tests/perf_event_uncore_lib.c: + perf_event_uncore tests: KNL has uncore support. + + * 0a9e1a8d src/components/perf_event/tests/event_name_lib.c: perf_event + tests: add KNL offcore event. + + * e9144b9b src/papi_events.csv: Added preset definitions for KNL. + +2016-08-12 + + * 03c766a6 src/components/rapl/linux-rapl.c: linux-rapl: update KNL support + Knight's Landing does not support pp0, and also it uses a different unit for + DRAM RAPL (much like the hsw-ep does) + +2016-08-04 + + * ce57b7a7 src/testlib/test_utils.c: testlib: give better error message if + component failed to initialize Old message: ./zero test_utils.c + FAILED Line # 697 Error: Zero Counters Available! PAPI Won't + like this! New message: ./zero Component perf_event disabled due to Error + initializing libpfm4 test_utils.c FAILED Line # 702 + Error: ERROR! Zero Counters Available! + +2016-07-25 + + * ae00a502 src/papi_internal.c: add William Cohen's rewrite of the + _papi_hwi_postfix_calc function which corrects the parsing and makes the + parser more robust by catching any errors in the parsing early with asserts + in the code rather than silently corrupting memory. + +2016-07-22 + + * a6359b9d src/papi_preset.c: This was another bug of smashing the stack. + This code declared the stack as: static char stack[PAPI_HUGE_STR_LEN]; But + then did this later. memset(stack, 0, 2*PAPI_HUGE_STR_LEN); How our static + analysis tools didn't catch this one? + +2016-06-30 + + * f35e6e77 doc/Doxyfile-common papi.spec src/Makefile.in...: Updated PAPI + version to 5.5 for upcoming release. + +2016-06-29 + + * 48aee8e1 src/Makefile.inc src/components/cuda/Rules.cuda + src/components/cuda/linux-cuda.c: cuda/sampling, cuda: Move sampling build + rules to the cuda component. Minor bugfix in linux-cuda.c to check ok return + status. + +2016-06-28 + + * 78249608 src/components/cuda/sampling/libactivity.so + src/components/cuda/sampling/path.h + src/components/cuda/sampling/test/matmul...: cuda/sampling: Removing + generated files that should not be in the repository + +2016-06-27 + + * 10385c63 src/components/cuda/sampling/Makefile: Adding the missing Makefile + for cuda/sampling. + +2016-06-22 + + * 45c2935e src/papi_events.csv: Correct IBM Power7 and Power8 computation of + PAPI_L1_DCA When reviewing the test results for IBM Power7 and Power8 + Michael Petlan found that the PAPI_L1_DCA preset was incorrectly computed. + The L1 cache misses need to be subtracted rather than added to the result. + +2016-06-23 + + * 0364d397 src/components/powercap/README + src/components/powercap/linux-powercap.c + src/components/powercap/tests/powercap_basic.c: Cleanup powercap component. + Most changes are cosmetic and achieved by runing thru astyle and cleaning up + manually. README file should match the powercap component now rather than + inheriting generic comments from other components. + + * 1c64bfc0 src/papi_events.csv: Added FP (SP, DP) presets for Broadwell. NOT + TESTED yet due to lack of access to bdw hardware + +2016-06-22 + + * 0d006ea3 src/components/rapl/linux-rapl.c: add Intel Skylake and Knights + Landing RAPL support + + * bd921b74 src/ftests/fmatrixpapi.F src/testlib/ftests_util.F: Eliminate the + sole use of ftests_skip subroutine There was only one test using ftests_skip + subroutine, fmatrixpapi.F. Converted fmatrixpapi.F to use ftest_skip + subroutine like all the other Fortran tests. + +2016-06-21 + + * e9cde551 src/ctests/tenth.c: Correct the event string names for tenth.c + There are stray ": " at the end of the event names in ctests/tenth.c. These + are unneeded because the ctests support routines already insert a ": " after + then event name when the error is printed out. + + * 97fb93c3 src/testlib/ftests_util.F: Have Fortran test support code report + errors more clearly When a Fortran test called the ftest_skip or ftest_fail + the support code would attempt to print out error strings. However, this + support code would print out gibberish because the string was not properly + initialized. There doesn't seem to be a easy way in Fortran to get the error + string, for the time being just print out the error number and people will + need to manually map it back to the string. + +2016-06-17 + + * db9c70f5 src/papi_events.csv: Added FP (SP, DP) presets for Skylake. + Corrected L1_LDM|STM, L2_DCW|TCW, PRF_DM, STL_ICY presets for Skylake. + + * 9de0c97f src/components/libmsr/linux-libmsr.c: Bugfix: libmsr component can + now disable itself without printing an error message to the screen + + * d09657bf src/components/cuda/linux-cuda.c: Bugfix: CUDA component can now + disable itself without printing an error message to the screen + +2016-05-19 + + * 4718b481 src/components/perf_event/perf_event.c + src/components/perf_event_uncore/perf_event_uncore.c: Force all processors to + check event schedulability by reading the counters There are situations + where the perf_event_open syscall will return a file descriptor for a set of + events even when they cannot be scheduled together. This occurs on 32-bit + and 64-bit ARM processors and MIPS processors. This problem also occurs on + linux kernels older than 2.6.33 and when the watchdog timer steals a + performance counter. To check that the performance counters are properly + setup PAPI needs to check that the counter values can be successfully read. + Rather than trying to avoid this test PAPI will now always do it. + +2016-03-30 + + * 35264ea6 src/papi.h: update the caddr_t compatability hack in papi.h Erik + Schnetter reported that the workaround failed with a + C11 compiler. Really, we should replace all instances of caddr_t with + something better, but I'm not sure what that does dor older compilers or + breakage of ABI. + +2016-03-16 + + * 504d05c3 src/Rules.pfm4_pe src/papi.h src/papi_fwrappers.c: Only expose the + shared libary symbols listed *papi.h files The shared library should avoid + exposing internal symbols of the library. This change hides the PAPI's + internal symbols when it is built with libpfm 4. Only the functions in papi.h + and the associated fortran wrapper functions are visible to code using the + library. This change also makes libpapi.so slightly smaller (29KB for + stripped x86_64 shared libary or about 6%). Note that a similar patch has + been proposed for upstream libpfm4 and would be needed for the bundled libpfm + if papi is being built with the bundled libpfm4. + +2016-03-10 + + * 943fb056 INSTALL.txt: Fix leftover doxygen reference in INSTALL file. Fix + leftover doxygen reference in INSTALL file. Noticed this while working + through build/install steps on a local system, Looks like the doxygen command + was switched from Doxyfile-everything to Doxyfile-html as part of revision + bfee45 "Rework the doxygen configuration files" This fixes up the INSTALL + reference to match. + + * 947f6cb3 src/Makefile.inc: Fix a bashism found in Makefile.inc While + building on an ubuntu system, hit an error that took a bit to run down. + /bin/sh: 1: [: perf_event: unexpected operator /bin/sh: 1: [: + perf_event_uncore: unexpected operator This was on an ubuntu system where + /bin/sh is actually /bin/dash, and is due to a bashism in Makefile.inc for + the build and clean of cuda_samples. Swapping out the '==' for an '=' should + be safe. + +2016-02-29 + + * 7996d480 src/components/coretemp/linux-coretemp.c: Make coretemp internal + functions static where possible As much of the internal of the papi shared + library should be hidden. A number of the internal functions for the + perf_event and coretemp components should be static since they are only used + within the individual component. Making the functions static allows the + compiler to generate better code and reduce the number of entries in the PLT + (Procedure Link Tables). + +2016-02-26 + + * a0240d5a src/components/perf_event/perf_event_lib.h: Removed the re + declaration of the static functions in the perf_event_lib.h + + * 5d6e8295 src/components/appio/appio.c src/components/example/example.c + src/components/lmsensors/linux-lmsensors.c...: Thanks to William Cohen of + RedHat for providing the patches with following description Make perf_event + and perf_event_uncore internal functions static where possible Make appio + component internal functions static where possible Make example component + internal functions static where possible Make lmsensors component internal + functions static where possible Make lustre component internal functions + static where possible Make micpower component internal functions static where + possible Make mx component internal functions static where possible Make net + component internal functions static where possible Make rapl component + internal functions static where possible Make stealtime component internal + functions static where possible. + +2016-02-24 + + * 0eb308b4 src/components/cuda/README: Fixed cuda component README to use the + correct configure flags. Thanks to Jianqiao Liu for pointing out errors in + the README file. + +2016-02-15 + + * 70bd7584 src/components/powercap/utils/README + src/components/powercap/utils/powercap_write_test.c: Cleanup powercap + utility. Removed mention of libmsr and no-longer-needed union type left over + from the libmsr example + +2016-01-31 + + * 03afa3fe src/components/powercap/README + src/components/powercap/utils/Makefile + src/components/powercap/utils/README...: added intial powercap write test and + readme + +2016-01-26 + + * 8fd9e4e3 src/components/powercap/tests/Makefile + src/components/powercap/tests/powercap_basic.c: added power cap read test + + * edf8af95 src/components/powercap/Rules.powercap + src/components/powercap/linux-powercap.c: added PAPI component + + * 66df01be ChangeLogP542.txt ChangeLogP543.txt RELEASENOTES.txt...: PAPI + 5.4.3 release (releasenotes, changelog, man files, ...) + diff --git a/ChangeLogP551.txt b/ChangeLogP551.txt new file mode 100644 index 0000000..b4dffbc --- /dev/null +++ b/ChangeLogP551.txt @@ -0,0 +1,51 @@ +2016-11-17 + + * 4b7c2c8b src/components/coretemp/linux-coretemp.c + src/components/cuda/configure src/components/cuda/configure.in...: Handing + some of the problems exposed by Coverity Mostly adding strncpy termination + to some components (coretemp, lmsensors, micpower). Removed some unused + component writing functions (lustre, mx). Fixed CUDA component configure.in + to get the correct version of nvcc. Fixed division so it works in double + precision rather than integer in the rapl component. Fixed a minor complaint + about a stack counter variable in papi_preset. Thanks to William Cohen for + sending the Coverity results report. + +2016-11-15 + + * 7384d4d1 src/components/rapl/linux-rapl.c: Enable RAPL for Broadwell-EP + +2016-11-04 + + * 0e90ecd4 src/Makefile.inc: Minor change: Removed unneeded characters in + src/Makefile.inc. (Thanks to Steve Kaufmann) + +2016-10-24 + + * b72df977 src/components/perf_event/perf_event_lib.h: Increase + PERF_EVENT_MAX_MPX_COUNTERS to 384 to support KNL uncore events + + * Update libpfm4 to enable Intel Knights Landing untile PMU support. + +2016-09-18 + + * b92abb7c src/components/powercap/utils/Makefile + src/components/powercap/utils/powercap_plot.c + src/components/powercap/utils/powercap_write_test.c: changed the tool in + /powercap/utils to behave as the similiar tool in /rapl/utils does. removed + the old code residing in /powercap/utils. + +2016-09-16 + + * 51d76878 src/threads.c: threads: silence compiler warning our_tid is only + being used in debug statements + + * 33aacc65 src/papi_preset.c: papi_preset: quiet a compiler warning we were + setting the papi_preset variable but only using it in debug statements. tell + the compiler to not warn in this case. + + * 7ff9a01c src/ctests/zero_omp.c: tests/zero_omp: fix warning in zero_omp we + weren't using the maxthr variable + + * 33deefbd src/components/rapl/tests/rapl_basic.c: componensts/rapl: fix + compiler warning in rapl_basic test + diff --git a/ChangeLogP560.txt b/ChangeLogP560.txt new file mode 100644 index 0000000..4e051a9 --- /dev/null +++ b/ChangeLogP560.txt @@ -0,0 +1,2394 @@ +Tue Dec 5 20:10:50 2017 -0800 William Cohen + + * src/libpfm4/lib/events/power9_events.h, + src/libpfm4/tests/validate_power.c: Update libpfm4 Current with + commit 206dea666e7c259c7ca53b16f934660344293475 Ensure unique + names for IBM Power 9 events Older versions of PAPI use the event + name to look up the libpfm event number when doing the enumeration + of the available events. If there were multiple events with the + same name in libpfm, the earliest one would be selected. This + selection would cause the enumeration of events in + papi_native_avail to get stuck looping on the first duplicated + named event in a pmu. In the case of IBM Power 9 the enumeration + would get stuck on PM_CO0_BUSY. Gave each event a unique name to + avoid this unfortunate behavior. + +2017-11-16 Will Schmidt + + * src/papi_events.csv: revised papi_derived patch. [PATCH, papi] + Updated derived entries for power9. This is a re-implementation of + the patch that Will Cohen posted earlier, which uses the (newly + defined) PM_LD_MISS_ALT entry instead of the PM_LD_MISS_FIN . + Thanks, -Will + +2017-12-05 Heike Jagode (jagode@icl.utk.edu) + + * release_procedure.txt: Updated notes for release procedure. + +2017-12-05 Vince Weaver + + * src/extras.c: extras.c: add string.h include to make the ffsll + warning go away + +2017-12-04 Heike Jagode (jagode@icl.utk.edu) + + * src/configure, src/configure.in: Fixed configure bug: Once ffsll + support is detected, set HAVE_FFSLL to 1 in config.h. Tested + without configure flag --with-ffsll, with --with-ffsll=yes, --with- + ffsll=no. + +2017-12-04 Vince Weaver + + * src/ctests/Makefile.recipies, src/ctests/locks_pthreads.c: ctests: + locks_pthreads: adjust run count again linear slowdown makes + things run really quickly. This patch scales it down by the square + root of the number of cores which is maybe a better compromise. + * src/ctests/locks_pthreads.c: ctests: locks_pthreads, minor cleanups + +2017-11-20 William Cohen + + * src/ctests/locks_pthreads.c: Keep locks_pthreads test's amount of + work reasonable on many core machines The runtime of + locks_pthreads test scaled by the number of processors on the + machine because of the serialized increment operation in the test. + As more machines are available with 100+ processors the runtime of + locks_pthreads is becoming execessive. Revised the test to specify + the approximate total number of iterations and split the work the + threads. + +Fri Dec 4 11:31:46 2015 -0500 sangamesh + + * src/extras.c, src/papi.h: Revert change that added ffsll to papi.h + This reverts commit 2f1ec33a9e585df1b6343a0ea735f79974c080df. + commit 2f1ec33a9e585df1b6343a0ea735f79974c080df changed #if + (!defined(HAVE_FFSLL) || defined(__bgp__)) int ffsll( long long lli + ); #endif --- to --- extern int ffsll( long long lli in extras.c + to avoid warning when --with-ffsll is used as config option + +Thu Apr 20 11:31:38 2017 -0400 Stephen Wood + + * src/extras.c, src/papi.h: revert part of patch that added extra + attributes to ffsll This manually reverts part of: commit + 9e199a8aee48f5a2c62d891f0b2c1701b496a9ca cast pointers + appropriately to avoid warnings and errors + +Sun Dec 3 09:42:44 2017 -0800 Will Schmidt + + * src/libpfm4/lib/events/power9_events.h, + src/libpfm4/tests/validate_power.c: Updated libpfm4 Current with: + ---------------- commit ed3f51c4690685675cf2766edb90acbc0c1cdb67 + (HEAD -> master, origin/master, origin/HEAD) Add alternate event + numbers for power9. I had previously missed adding the _ALT + entries, which allow some events to be specified on different + counters. This patch fills those in. This patch also adds a few + validation tests for the ALT events. ---------------- + +2017-11-28 Heike Jagode (jagode@icl.utk.edu) + + * src/utils/papi_avail.c, src/utils/papi_native_avail.c: Fixed + utility option inconsistencies between papi_avail and + papi_native_avail. There are more inconsistencies with other PAPI + utilities, which will be addressed eventually. + +2017-11-28 Heike Jagode + + * README.md: README.md edited online with Bitbucket + * README.md: README.md edited online with Bitbucket + * README.md: README.md edited online with Bitbucket + * README.md: README.md edited online with Bitbucket + +2017-11-27 Heike Jagode + + * src/components/powercap/linux-powercap.c: More clean-ups and + checking of return values. + +Mon Nov 13 23:15:53 2017 -0800 Thomas Richter + + * src/libpfm4/lib/pfmlib_common.c: Update libpfm4” > /tmp/commit- + libpfm4-header.txt echo “Current with commit + f5331b7cbc96d9f9441df6a54a6f3b6e0fab3fb9 better fix for + pfmlib_getl() The following commit: commit + 9c69edf67f6899d9c6870e9cb54dcd0990974f81 better param check in + pfmlib_getl() Fixed paramter checking of pfmlib_getl() but missed + one condition on the buffer argument. It is char **buffer. + Therefore we need to check if *buffer is not NULL before we can + check *len. + +2017-11-19 Asim YarKhan + + * src/components/cuda/linux-cuda.c: CUDA component: Bug fix for + releasing and resetting event list When an event addition failed + because the event (or metric) requires multiple-runs the eventlist + and event-context structure was not being cleaned up properly. + This fixes the event cleanup process. + +2017-11-17 Asim YarKhan + + * src/components/powercap/tests/powercap_basic.c, + src/components/powercap/tests/powercap_limit.c: Powercap component: + Updated tests to handle no-event-counters (num_cntrs==0) and skip + some compiler warnings (argv, argc unused) + +2017-11-16 William Cohen + + * src/components/lmsensors/linux-lmsensors.c: Make more of lmsensors + component internal state hidden There are a number of functions + pointers stored in variable that are only used within the lmsensors + component. Making those static ensures they are not visible + outside the lmsensors component. + * src/components/lmsensors/linux-lmsensors.c: Make internal + cached_counts variable static Want to make as little information + about the internals of the PAPI lmsensors component visible to the + outside. Thus, making cached_counts variable static. + +2017-11-15 William Cohen + + * src/components/lmsensors/linux-lmsensors.c: Avoid statically + limiting the number of lmsensor events allowed Some high-end + server machines provide more events than the 512 entries limit + imposed by the LM_SENSORS_MAX_COUNTERS define in the lmsensor + component (observed 577 entries on one machine). When this limit + was exceeded the lmsensor component would write beyond the array + bounds causing ctests/all_native_events to crash. Modified the + lmsensor code to dynamically allocate the required space for all + the available lmsensor entries on the machine. This allows + ctests/all_native_events to run to completion. + * src/components/appio/appio.c, src/components/coretemp/linux- + coretemp.c, src/components/example/example.c, + src/components/infiniband/linux-infiniband.c, src/components/lustre + /linux-lustre.c, src/components/rapl/linux-rapl.c: Use correct + argument order for calloc function calls Some calls to calloc in + PAPI have the order of the arguments reversed. According to the + calloc man page the number of elements is the first argument and + the size of each element is the second argument. Due to alignment + constraints the second argument might be rounded up. Thus, it is + best not to not to swap the arguments to calloc. + +2017-11-15 Philip Vaccaro + + * src/components/powercap/linux-powercap.c, + src/components/powercap/tests/powercap_basic.c: Updates and changes + to the powercap component to address a few areas.. Various things + were changed but mainly things were simplified and made more + streamlined. Main focus was on simpifying managing the sytem + files. + +Mon Nov 13 23:15:53 2017 -0800 Thomas Richter + + * src/libpfm4/docs/man3/pfm_get_event_encoding.3, + src/libpfm4/docs/man3/pfm_get_os_event_encoding.3, + src/libpfm4/lib/events/amd64_events_fam11h.h, + src/libpfm4/lib/events/amd64_events_fam12h.h, + src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_priv.h, + src/libpfm4/tests/validate_x86.c: Update libpfm4 Current with + commit 9c69edf67f6899d9c6870e9cb54dcd0990974f81 better param check + in pfmlib_getl() This patch ensures tha len >= 2 because we do: m + = l - 2; Reviewed-by: Hendrik Brueckner + + +2017-11-13 Vince Weaver + + * src/components/perf_event/pe_libpfm4_events.c: pe_libpfm4_events: + properly notice if trying to add invalid umask this passes the + broken-event test case and all of the unit tests, but it would be + good to test this on codes that do a lot of native event tests. + the pe_libpfm4_events code *really* needs a once-over, it is + currently a confusing mess. + * src/components/perf_event/tests/Makefile, + src/components/perf_event/tests/broken_events.c, + src/components/perf_event/tests/event_name_lib.c, + src/components/perf_event/tests/event_name_lib.h: perf_event/tsts: + add broken event name test we were wrongly accepting event names + with invalid umasks + +2017-11-13 Philip Mucci + + * src/utils/print_header.c: Removed extraneous colon in VM vendor + output + +2017-11-10 Vince Weaver + + * src/validation_tests/papi_l1_dcm.c, + src/validation_tests/papi_l2_dcm.c, + src/validation_tests/papi_l2_dcr.c, + src/validation_tests/papi_l2_dcw.c: validation_tests: fix compiler + warnings on arm32 On Raspberry Pi we were getting warnings where + we were printing sizeof() valus with %ld. Convert to %zu instead. + +2017-11-09 Vince Weaver + + * src/validation_tests/papi_l2_dca.c: validation_tests: papi_l2_dca + fix crash on ARM32 On raspberry pi it's not possible to detect L2 + cache size so the test was dividing by zero. + * src/linux-common.c: linux-common: remove warning on not finding mhz + in cpuinfo This was added recently and is not needed. Most ARM32 + devices don't have MHz in the cpuinfo file and it's not really a + bug. + * src/components/perf_event/perf_event.c: perf_event: disable the old + pre-Linux-2.6.34 workarounds by default There were a number of + bugs in perf_event that PAPI had to work around, but most of these + were fixed by 2.6.34 In order to hit these bugs you would need to + be running a kernel from before 2010 which wouldn't support any + recent hardware. Unfortunately these bugs are hard to test for. + We were enabling things based on kernel versions, but this caught + vendors (such as Redhat) shipping 2.6.32 kernels that had + backported fixes. This fix just #ifdefs things out, if no one + complains then we can fully remove the code. + * src/components/perf_event/perf_event.c: perf_event: decrement the + available counter count if NMI_WATCHDOG is stealing one + * src/components/perf_event/perf_event.c: perf_event: move the + paranoid handling code to its own function + * src/components/perf_event/perf_event.c: perf_event: centralize + fast_counter_read flag just use the component version of the flag, + rather than having a shadow global version. + +2017-11-09 William Cohen + + * src/linux-memory.c: Make the fallback generic_get_memory_info + function more robust On the aarch64 processor linux 4.11.0 kernels + /sys/devices/system/cpu/cpu0/cache is available, but the index[0-9] + subdirectories are not fully populated with information about cache + and line size, associativity, or number of sets. These missing + files would cause the generic_get_memory_info function to attempt + to read data using a NULL file descriptor causing the program to + crash. Added checks to see if every fopen was and fscan was + successful and just say there is no cache if there is any failure. + +2017-11-09 Asim YarKhan + + * src/components/cuda/linux-cuda.c, + src/components/cuda/tests/Makefile, + src/components/nvml/tests/Makefile, src/configure, + src/configure.in: Enable icc and nvcc to work together in cuda and + nvml components. For nvcc to work with Intel icc to compile cuda + and nvml components and tests , it needs to use nvcc -ccbin=<$CC- + compilerbin> . The compiler name in CC also needs to be clean, so + CC= and any other flags are pushed to CFLAGS (changed + in src/configure.in script). + * src/ctests/mpifirst.c: Minor correction to mpifirst.c test + +2017-11-09 Vince Weaver + + * src/utils/print_header.c: utils: print fast_counter_read (rdpmc) + status in the utils header + +2017-11-08 William Cohen + + * src/validation_tests/cache_helper.c: Ensure access to array within + bounds Coverity reported the following issues. Need the test to + be "type>=MAX_CACHE" rather than "type>MAX_CACHE". Error: OVERRUN + (CWE-119): papi-5.5.2/src/validation_tests/cache_helper.c:85: + cond_at_most: Checking "type > 4" implies that "type" may be up to + 4 on the false branch. + papi-5.5.2/src/validation_tests/cache_helper.c:90: overrun-local: + Overrunning array "cache_info" of 4 24-byte elements at element + index 4 (byte offset 96) using index "type" (which evaluates to 4). + Error: OVERRUN (CWE-119): + papi-5.5.2/src/validation_tests/cache_helper.c:101: cond_at_most: + Checking "type > 4" implies that "type" may be up to 4 on the false + branch. papi-5.5.2/src/validation_tests/cache_helper.c:106: + overrun-local: Overrunning array "cache_info" of 4 24-byte elements + at element index 4 (byte offset 96) using index "type" (which + evaluates to 4). Error: OVERRUN (CWE-119): + papi-5.5.2/src/validation_tests/cache_helper.c:117: cond_at_most: + Checking "type > 4" implies that "type" may be up to 4 on the false + branch. papi-5.5.2/src/validation_tests/cache_helper.c:122: + overrun-local: Overrunning array "cache_info" of 4 24-byte elements + at element index 4 (byte offset 96) using index "type" (which + evaluates to 4). + * src/ctests/overflow_pthreads.c: Eliminate coverity overflow warning + about expression + * src/components/perf_event_uncore/tests/perf_event_uncore_lib.c: + Remove dead code from perf_event_uncore_lib.c + +2017-11-09 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: don't + initialize globals statically from the mucci-5.5.2 tree + +2017-11-08 phil@minimalmetrics.com + + * src/linux-common.c: linux-common: clean up the /proc/cpuinfo + parsing code From the mucci-cleanup branch + * src/components/perf_event/perf_event.c, + .../perf_event_uncore/perf_event_uncore.c, + src/papi_libpfm4_events.c, src/papi_libpfm4_events.h: perf_event: + clean up _papi_libpfm4_shutdown() From the mucci-cleanup branch + * src/utils/print_header.c: utils: clean up the cpuinfo header From + the mucci-cleanup branch + * src/papi_internal.c, src/papi_internal.h: papi_internal: add + PAPI_WARN() function From the mucci-cleanup branch + * src/components/perf_event/pe_libpfm4_events.c: perf_event: clean up + pe_libpfm4_events From the mucci-cleanup branch -- + +2017-11-08 Vince Weaver + + * src/utils/papi_avail.c: utils/papi_avail: update the manpage info + based on changes by Phil Mucci + * .../perf_event/tests/perf_event_system_wide.c: perf_event tests: + perf_event_system_wide: don't fail if permissions restrict system- + wide events right now we just skip if we get EPERM, we should also + maybe check the perf_event_paranoid setting and print a more + meaningful report + * src/ctests/locks_pthreads.c: ctests/locks_pthreads: avoid printing + values when in quiet mode + +2017-08-31 phil@minimalmetrics.com + + * src/Makefile.inc: Better symlink creation for shared library in + make phase + +2017-08-28 phil@minimalmetrics.com + + * doc/Makefile, src/.gitignore, src/Makefile.inc, + src/components/.gitignore, src/components/Makefile_comp_tests, + src/ctests/.gitignore, src/ctests/Makefile.recipies, + src/ftests/.gitignore, src/ftests/Makefile.recipies, + src/testlib/.gitignore, src/utils/.gitignore, src/utils/Makefile, + src/validation_tests/.gitignore, + src/validation_tests/Makefile.recipies: Full cleanup, including + removal of .gitignore files that prevented us from realizing we + were really cleaning/clobbering properly + * src/validation_tests/.gitignore: .gitignore Makefile.target + * src/papi.c: Remove PAPI_VERB_ECONT setting by default from + initialization path. This prints all kinds of needless errors on + virtual platforms. + * src/x86_cpuid_info.c: Remove leftover printf + +2017-08-21 phil@minimalmetrics.com + + * src/ctests/locks_pthreads.c: Test now performs a fixed number of + iterations, and reports lock/unlock timings per thread. + * src/components/perf_event/perf_event.c: Added more descriptive + error message to exclude_guest check + * src/papi_internal.c: Removed leading newline and trailing . from + error messages + * src/papi_preset.c: Updated message for derived event failures + +2017-11-07 Vince Weaver + + * src/Makefile.inc, src/ctests/Makefile, + src/ctests/Makefile.target.in, src/ftests/Makefile, + src/ftests/Makefile.target.in, src/testlib/Makefile.target.in, + src/utils/Makefile.target.in, src/validation_tests/Makefile, + src/validation_tests/Makefile.target.in: tests: make sure DESTDIR + and DATADIR are passed in when doing an install + * src/ctests/Makefile, src/ctests/Makefile.target.in, + src/ftests/Makefile, src/ftests/Makefile.target.in, + src/utils/Makefile, src/utils/Makefile.target.in, + src/validation_tests/Makefile, + src/validation_tests/Makefile.target.in: + ctests/ftests/utils/validation_tests: get shared library linking + working again This should let the various tests and utils be + linked as shared libraries again. + * src/validation_tests/Makefile: validation_tests: add an + installation target this makes the validation tests have an + install target, like the ctests and ftests + * src/ctests/Makefile, src/ftests/Makefile: ctests/ftests: fix + "install" target at some point DATADIR was renamed datadir and the + install targets were not updated. + +2017-11-07 Asim YarKhan + + * bitbucket-pipelines.yml: Bitbucket pipeline testing: Inspired by + Phil Mucci's branch; copied the functionalty tests run in that + branch. + * src/components/lmsensors/linux-lmsensors.c: lmsensors component: + Changed event names to use lm_sensors (only once) instead of + LM_SENSORS (twice) to be consistent with other events + +2017-11-02 William Cohen + + * src/components/appio/tests/iozone/gnu3d.dem: gnu3d.dem should not + be executed by the test framework This file is a gnuplot file and + should not be executed as part of the tests. Removing the + executable perms will signal to the testing framework that it + shouldn't be executed. + * src/components/appio/tests/iozone/Gnuplot.txt: Gnuplot.txt should + not be executed by the test framework This file is a readme file + and should not be executed as part of the tests. Removing the + executable perms will signal to the testing framework that it + shouldn't be executed. + * .../appio/tests/iozone/iozone_visualizer.pl, + src/components/appio/tests/iozone/report.pl: Fix perl scripts so + they run on Linux machines The DOS style newlines were preventing + Linux from selecting the appropriate interpreter for these scripts + and causing these tests to fail. + +2017-11-07 Asim YarKhan + + * src/components/lmsensors/configure: lmsensors component: Regenerate + the configure file for the component + +2017-11-02 William Cohen + + * src/components/lmsensors/Makefile.lmsensors.in, + src/components/lmsensors/configure.in, src/components/lmsensors + /linux-lmsensors.c: Make the lmsensors dynamically load the needed + shared library When attempting to build the current git repo of + papi the build of the files in the utils subdirectory failed + because the lmsensors libraries were not being linked in. Rather + than forcing the papi to link in the lmsensor library during the + build the lmsensors component has been modified to dynamically load + the needed libraries and enable the lmsensors events when + available. This allows machines missing the lmsensor libraries + installed to still use papi. + +2017-11-06 Asim YarKhan + + * src/components/cuda/linux-cuda.c: CUDA component: On architectures + without CUDA Metrics (e.g. Tesla C2050), skip metric registration + rather than returning errors + +2017-11-06 Vince Weaver + + * src/validation_tests/papi_l2_dca.c, + src/validation_tests/papi_l2_dcm.c, + src/validation_tests/papi_l2_dcr.c, + src/validation_tests/papi_l2_dcw.c: validation_tests: make the + papi_l2 tests fail with warnings On Haswell/Broadwell and newer + these tests fail for unknown reasons. This isn't new behavior, + it's just that the tests are new. It's unlikely we will have time + to completely sort this out before the upcoming release, so change + the FAIL to WARN so testers won't be unnecessarily alarmed. + +2017-11-05 Vince Weaver + + * src/components/perf_event/perf_event.c, src/configure, + src/configure.in: perf_event: enable rdpmc support by default It + can still be disabled at configure time with --enable-perfevent- + rdpmc=no This speeds up PAPI_read() by at least a factor of 5x + (see the ESPT'17 workshop presentation) It is only enabled on + Linux 4.13 and newer due to bugs in previous versions. + +2017-11-03 Vince Weaver + + * src/ctests/sdsc-mpx.c: ctests: sdsc: fix issue where the error + message is not printed correctly + +2017-11-01 Heike Jagode + + * src/components/powercap/linux-powercap.c: Intermediate check-in: + Fixed a whole bunch of careless file handling (missing closing of + open files, missing setting of open/close flag, etc). Still more + rigorous checks needed. + +Mon Oct 30 17:16:32 2017 -0700 Stephane Eranian + + * src/libpfm4/lib/events/intel_skl_events.h: Update + libpfm4\n\nCurrent with\n commit + 21405fb3c247a0d16861483daf0696cf4fa0cc43 update SW_PREFETCH event + for Intel Skylake Event was renamed SW_PREFETCH_ACCESS, but we + keep SW_PREFETCH as an alias. Added PREFETCHW umask. Enabled + suport for both Skylake client and server as per official event + table from 10/27/2017. See download.01.org/perfmon/ + +2017-10-30 Vince Weaver + + * src/validation_tests/Makefile.recipies, + src/validation_tests/cycles.c, + src/validation_tests/cycles_validation.c: validation_tests: add + cycles_validation test this is the old zero test, which does a + number of cycles tests It should be extended to add more. + +2017-10-30 Vince Weaver + + * src/ctests/attach2.c, src/ctests/attach3.c, src/ctests/calibrate.c, + src/ctests/child_overflow.c, src/ctests/code2name.c, + src/ctests/earprofile.c, src/ctests/exec_overflow.c, + src/ctests/fork_overflow.c, src/ctests/hwinfo.c, src/ctests/mendes- + alt.c, src/ctests/prof_utils.c, src/ctests/prof_utils.h, + src/ctests/profile.c, src/ctests/remove_events.c, + src/ctests/shlib.c, src/ctests/system_child_overflow.c, + src/ctests/system_overflow.c, src/ctests/zero_named.c, + src/testlib/papi_test.h, src/testlib/test_utils.c: papi: c++11 + fixes: fix various ctests that c++ complains on mostly just const + warnings, some K+R function declarations, and possibly an actual + char/char* bug. + * src/papi.c, src/papi.h: papi: c++11 conversion: + PAPI_get_component_index() + * src/papi.c, src/papi.h: papi: c++11 conversion: convert + PAPI_perror() + * src/aix.c, src/components/appio/appio.c, + src/components/bgpm/CNKunit/linux-CNKunit.c, + src/components/bgpm/IOunit/linux-IOunit.c, + src/components/bgpm/L2unit/linux-L2unit.c, + src/components/bgpm/NWunit/linux-NWunit.c, src/components/emon + /linux-emon.c, src/components/net/linux-net.c, + src/components/perf_event/pe_libpfm4_events.c, + src/components/perf_event/pe_libpfm4_events.h, + src/components/perf_event/perf_event.c, + .../perf_event_uncore/perf_event_uncore.c, + src/components/perfmon_ia64/perfmon-ia64.c, src/freebsd.c, src + /linux-bgq.c, src/papi.c, src/papi.h, src/papi_internal.c, + src/papi_internal.h, src/papi_libpfm3_events.c, + src/papi_libpfm_events.h, src/papi_vector.c, src/papi_vector.h: + papi: start converting papi.h to be C++11 clean Most of the issues + have to do with string to char * conversion. This first patch + converts PAPI_event_name_to_code() The issue was first reported by + Brian Van Straalen + * src/validation_tests/papi_l2_dca.c: validation_tests/papi_l2_dca: + update some comments + * src/ctests/zero.c, src/validation_tests/cycles.c: ctests/zero: make + test pass on recent intel machines The test was failing due to the + PAPI_get_real_cycles() validation on recent Intel chips. This is + probably something that should be tested in a separate test and not + in zero which is supposed to be a bare-bones are-things-working + test. + +2017-10-27 Philip Vaccaro + + * src/components/powercap/README: updated powercap README to be more + concise. includes more details on interacting with energy counters + and power limits. + +2017-10-27 Asim YarKhan + + * src/components/cuda/linux-cuda.c, src/components/nvml/linux-nvml.c: + CUDA/NVML components: Handled segfault which can occur when + dlclosing libcudart from both components by adding an additional + flag to dlopen + +2017-10-24 Asim YarKhan + + * src/components/cuda/linux-cuda.c, + src/components/cuda/tests/simpleMultiGPU.cu: CUDA component: Clean + up fulltest by moving some output from stdout to SUBDBG, removed + some commented out lines + * src/components/nvml/linux-nvml.c: nvml component: To support V100 + (Volta) updated to get nvmlDevice handle ordered by index rather + than pci busid. + +2017-10-23 Asim YarKhan + + * src/components/cuda/linux-cuda.c: CUDA component: Minor fix to + remove some unneeded stdout which shows up during fulltest + +2017-10-20 Asim YarKhan + + * src/components/cuda/linux-cuda.c, + src/components/cuda/tests/Makefile, + src/components/cuda/tests/simpleMultiGPU.cu: CUDA component test + update: Remove some debug output. Do not build cupti_only test + binary. + +Thu Oct 19 11:23:44 2017 -0700 Stephane Eranian + + * src/libpfm4/examples/showevtinfo.c, + src/libpfm4/lib/events/intel_skl_events.h: Update + libpfm4\n\nCurrent with\n commit + 2e98642dd331b15382256caa380834d01b63bef8 Fix Intel Skylake + EXE_ACTIVITY.1_PORTS_UTIL event Was missing a umask name. + +2017-10-17 Vince Weaver + + * src/ctests/version.c: ctests: version, add INCREMENT field at the + request of Steve Kaufmann + * src/ctests/Makefile.recipies, src/ctests/version.c: ctests: re- + enable version test not sure why it was disabled + * src/ctests/Makefile.recipies: ctests: alphabetize SERIAL tests in + Makefile.recipes + +2017-10-13 Philip Vaccaro + + * src/components/powercap/tests/Makefile, + src/components/powercap/tests/powercap_limit.c: added simple limit + test for the powercap component. + +2017-10-09 Asim YarKhan + + * src/components/nvml/linux-nvml.c: Big Fix NVML component: Fix + problem with names when there are multiple identical GPUs If + multiple identical GPUs were available, the names were not mapped + correctly. Fixed event names to be + "nvml:::Tesla_K40c:device_0:myevent" rather than + "nvml:::Tesla_K40c_0:myevent". + +Fri Sep 29 00:25:09 2017 -0700 Stephane Eranian + + * src/libpfm4/include/perfmon/perf_event.h, + src/libpfm4/lib/events/intel_skl_events.h, + src/libpfm4/lib/events/s390x_cpumf_events.h, + src/libpfm4/lib/pfmlib_s390x_cpumf.c, + src/libpfm4/perf_examples/Makefile, + src/libpfm4/perf_examples/branch_smpl.c, + src/libpfm4/perf_examples/perf_util.c: Update libpfm4\n\nCurrent + with\n commit d1e7c96df60a00a371fdaa3b635ad4a38cee4c2f add new + branch_smpl.c perf_events example This patch adds a new example to + demo how to sample and parse the PERF_SAMPLE_BRANCH_STACK record + format of perf_events. It will dump branches taken from the sampled + command. + +2017-10-05 Asim YarKhan + + * src/components/nvml/README, src/components/nvml/linux-nvml.c, + src/components/nvml/linux-nvml.h, + src/components/nvml/tests/HelloWorld.cu, + src/components/nvml/tests/Makefile, + .../nvml/tests/nvml_power_limiting_test.cu: Update NVML component: + Support for power limiting using NVML PAPI has added support for + power limiting using NVML (on supported devices from the Kepler + family or later). The executable needs to have root permissions to + change the power limits on the device. We have added new events to + the NVML component to support power management limits. The + nvml:::DEVICE:power_management_limit can be written (as well as + read), but requires higher permissions (root level). The limit is + constrainted between a min and a max value, which can be read. + When the component is unloaded, the power_management_limit should + be reset to the initial value. + nvml:::DEVICE:power_management_limit + nvml:::DEVICE:power_management_limit_constraint_min + nvml:::DEVICE:power_management_limit_constraint_max A new test + (nvml/tests/nvml_power_limiting_test.cu)/ was written to check if + the writing functionality works (with the proper hardware and + permissions). + +2017-10-04 Asim YarKhan + + * src/components/nvml/linux-nvml.c, src/components/nvml/linux-nvml.h, + src/components/nvml/tests/HelloWorld.cu: Style consistency and + refactoring via astyle command. No changes to the actual code were + made here. + +2017-10-04 Vince Weaver + + * src/components/rapl/linux-rapl.c: rapl: add support for some Intel + Atom models Goldmont / Gemini_Lake / Denverton + * src/components/rapl/linux-rapl.c: rapl: fix skylake SoC measurement + support + * src/components/rapl/linux-rapl.c: rapl: add support for skylake SoC + energy measurements + * src/components/rapl/linux-rapl.c: rapl: add Skylake-X / Kabylake + support + * src/components/rapl/linux-rapl.c: rapl: centralize the "different + DRAM units" code + * src/components/rapl/linux-rapl.c: rapl: merge like processors + * src/components/rapl/linux-rapl.c: rapl: convert chip detection to a + switch statement + * src/components/rapl/linux-rapl.c: rapl: update the whitespace a bit + +2017-09-12 Heike Jagode (jagode@icl.utk.edu) + + * .../infiniband_umad/linux-infiniband_umad.c, .../infiniband_umad + /linux-infiniband_umad.h: Fixed papi_vector for infiniband_umad + component. The array of function pointers that the component + defines must use the naming convention papi_vector_t _x_vector + where x is the name of the component directory. In this case, the + name of the component directory is infiniband_umad and not + infiniband. This change has not been tested yet due to OFED lib + issues on our local machines. There may be more changes required in + order to get the infiniband_umad component to work properly. + +2017-09-11 Hanumanth + + * man/man1/papi_avail.1, man/man1/papi_native_avail.1, + src/utils/papi_avail.c, src/utils/papi_native_avail.c: Updating man + and help pages for papi_avail and papi_native_avail + +2017-09-07 Asim YarKhan + + * src/components/cuda/tests/nvlink_bandwidth.cu, + .../cuda/tests/nvlink_bandwidth_cupti_only.cu: Update to CUDA + component to support NVLink. The CUDA component has been cleaned + up and updated to support NVLink. NVLink metrics can not be + measured properly in KERNEL event collection mode, so the CUPTI + EventCollectionMode is transparently set to + CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS when a NVLink metric is + being measured in an eventset. For all other events and metrics, + the CUDA component uses the KERNEL event collection mode. A bug in + the earlier version was that repeated calls to add CUDA events were + failing because some structures were not cleaned up. This should + now be fixed. A new nvlink test was added to the CUDA component + tests. + +2017-08-31 Phil Mucci + + * man/man1/papi_avail.1, man/man1/papi_clockres.1, + man/man1/papi_command_line.1, man/man1/papi_component_avail.1, + man/man1/papi_cost.1, man/man1/papi_decode.1, + man/man1/papi_error_codes.1, man/man1/papi_event_chooser.1, + man/man1/papi_hybrid_native_avail.1, man/man1/papi_mem_info.1, + man/man1/papi_multiplex_cost.1, man/man1/papi_native_avail.1, + man/man1/papi_version.1, man/man1/papi_xml_event_info.1, + man/man3/PAPI_cleanup_eventset.3, man/man3/PAPI_destroy_eventset.3: + Updating options for papi_avail/native_avail as well as all + references to old mailing list + +2017-08-31 Asim YarKhan + + * src/components/nvml/linux-nvml.c, + src/components/nvml/tests/HelloWorld.cu, + src/components/nvml/tests/Makefile: Minor updates to NVML component + to enable it to compile and run without complaints + +2017-08-30 Vince Weaver + + * src/validation_tests/papi_br_prc.c, + src/validation_tests/papi_br_tkn.c: validation: update papi_br_prc + and papi_br_tkn for amd fam15h amd fam15h doesn't have a + conditional branch event so the measures have to be against total. + for now print warning, maybe we should let it go w/o a warning. + * src/papi_events.csv: papi_events: add PAPI_BR_PRC event to amd + fam15h + * src/papi_events.csv: papi_events: update PAPI_BR_PRC and + PAPI_BR_TKN on sandybridge/ivybridge They were using TOTAL + branches for the derived branch events rather than CONDITIONAL like + the other modern x86 processors were using. + * src/validation_tests/papi_br_tkn.c: validation_tests: papi_br_tkn: + update to only count conditional branches + * src/validation_tests/papi_br_prc.c: validation_tests: papi_br_prc: + make sure it is comparing conditional branches was doing total + branches, which made the test fail on skylake + +Mon Aug 21 23:55:46 2017 -0700 Stephane Eranian + + * src/libpfm4/lib/pfmlib_intel_x86.c: Update libpfm4\n\nCurrent + with\n commit a290dead7c1f351f8269a265c0d4a5f38a60ba29 fix usage + of is_model_event() for Intel X86 This patch fixes a couple of + problems introduced by commit: 77a5ac9d43b1 add model field to + intel_x86_entry_t The code in pfm_intel_x86_get_event_first() was + incorrect. It was calling is_model_event() before checking if the + index was within bounds. It should have been the opposite. Same + issue in pfm_intel_x86_get_next_event(). This could cause SEGFAULT + as report by Phil Mucci. The patch also fixes the return value of + pfm_intel_x86_get_event_first(). It was not calculated correctly. + Reported-by: Phil Mucci + +2017-08-20 Vince Weaver + + * src/ctests/Makefile.recipies, src/ctests/failed_events.c: ctests: + add failed_events test it tries to create invalid events to make + sure the event parser properly handles invalid events. + +2017-08-19 Vince Weaver + + * src/components/perf_event_uncore/tests/Makefile, + .../perf_event_uncore/tests/perf_event_uncore.c, + .../tests/perf_event_uncore_attach.c: perf_event_uncore: tests: + update perf_event_uncore to use :cpu=0 This is the more common way + of specifying uncore events. Rename the old test that uses + PAPI_set_opt() to perf_event_uncore_attach + * .../tests/perf_event_uncore_cbox.c, + .../tests/perf_event_uncore_lib.c, + .../tests/perf_event_uncore_lib.h: perf_event_uncore: tests: update + uncore events for recent processors + * src/ctests/zero_pthreads.c: ctests: zero_pthreads: remove + extraneous printf when in quiet mode + * .../tests/perf_event_uncore_lib.c: perf_event_uncore: event list, + add recent processors libpfm4 still doesn't support regular + Haswell, Broadwell, or Skylake machines + * .../perf_event_uncore/tests/perf_event_uncore.c, + .../tests/perf_event_uncore_cbox.c, + .../tests/perf_event_uncore_multiple.c: perf_event_uncore: tests: + print a message indicating the problem on skip also some + whitespace cleanups + * src/components/perf_event/tests/event_name_lib.c: perf_event: + tests: update event_name_lib for recent Intel processors + * src/components/perf_event/tests/event_name_lib.c: perf_event: + tests: event_name_lib, clean up whitespace + * .../perf_event/tests/perf_event_offcore_response.c: perf_event: + tests: update perf_event_offcore_response test print an indicator + of why we are skipping the test also some gratuitous whitespace + cleanups + * src/ctests/zero_shmem.c: ctests: zero_shmem: document the code a + little better + * src/ctests/zero_smp.c: ctests: zero_smp: make it actually do + something on Linux Linux can use the pthread code just like AIX + although we don't validate the results, so this test could be + another candidate for not being necessary anymore. + * src/ctests/zero_shmem.c: ctests: zero_shmem: minor cleanups we + pretty much always skip this test. Is it needed anymore? What was + it testing in the first place? The code it calls (start_pes() ) + doesn't seem to exist anymore + * src/ctests/zero_omp.c, src/ctests/zero_pthreads.c: ctests: zero_omp + and zero_pthread were skipping due to a typo when updating the + code I had left a stray ! before PAPI_query_event() + +2017-08-19 Vince Weaver + + * src/papi_events.csv: papi_events: the skylake fixes broke hsw/bdw + this skylake-x change is way more trouble than it was worth. + +2017-08-19 Vince Weaver + + * src/papi_events.csv: papi_events: on skylake the SNP_FWD umask was + renamed to SNP_HIT_WITH_FWD This broke presets on skylake, + skylake-x + * src/components/perf_event/pe_libpfm4_events.c: perf_event: fix + uninitialized descr issue reported by valgrind I don't think this + is the skylake-x bug though + +2017-08-18 Vince Weaver + + * src/components/perf_event/pe_libpfm4_events.c: perf_event: clean up + some whitespace in pe_libpfm4_events.c + * src/linux-memory.c: linux-memory: various errors when compiling + with debug enabled the new proc memory code had some mistakes in + the debug messages that only appeared when compiled with --with- + debug Reported-by: Steve Kaufmann + +2017-08-17 Vince Weaver + + * src/papi_events.csv: papi_events: missed one of the skx event + locations + +2017-08-16 Vince Weaver + + * src/papi_events.csv: papi_events: enable Skylake X support + +Sun Aug 6 00:22:52 2017 -0700 Stephane Eranian + + * src/libpfm4/include/perfmon/pfmlib.h, + src/libpfm4/lib/events/intel_skl_events.h, + src/libpfm4/lib/pfmlib_common.c, + src/libpfm4/lib/pfmlib_intel_skl.c, + src/libpfm4/lib/pfmlib_intel_snbep_unc.c, + src/libpfm4/lib/pfmlib_intel_x86.c, + src/libpfm4/lib/pfmlib_intel_x86_priv.h, + src/libpfm4/lib/pfmlib_priv.h, src/libpfm4/tests/validate_x86.c: + Update libpfm4\n\nCurrent with\n commit + efd16920194999fdf1146e9dab3f7435608a9479 add support for Intel + Skylake X This patch adds support for Intel Skylake X core PMU + events. Based on + download.01.org/perfmon/SKX/skylakex_core_v25.json. New PMU is + called skx. + +2017-08-07 Vince Weaver + + * src/papi_events.csv: papi_events: add initial AMD fam17h support + not tested on actual hardware yet + * src/papi_events.csv: papi_events: fix the amd_fam16h PMU name The + way libpfm4 reports fam16h was modified a bit from my initial + patches. fam16h seems to be working now. + +Thu Jul 27 23:30:20 2017 -0700 Stephane Eranian + + * src/libpfm4/README, src/libpfm4/docs/Makefile, + src/libpfm4/docs/man3/libpfm_amd64_fam16h.3, + src/libpfm4/docs/man3/libpfm_amd64_fam17h.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_cbo.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ha.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_imc.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_irp.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_pcu.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_qpi.3, + .../docs/man3/libpfm_intel_bdx_unc_r2pcie.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_r3qpi.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_sbo.3, + src/libpfm4/docs/man3/libpfm_intel_bdx_unc_ubo.3, + src/libpfm4/examples/showevtinfo.c, + src/libpfm4/include/perfmon/pfmlib.h, src/libpfm4/lib/Makefile, + src/libpfm4/lib/events/amd64_events_fam16h.h, + src/libpfm4/lib/events/amd64_events_fam17h.h, + src/libpfm4/lib/events/intel_bdx_unc_cbo_events.h, + src/libpfm4/lib/events/intel_bdx_unc_ha_events.h, + src/libpfm4/lib/events/intel_bdx_unc_imc_events.h, + src/libpfm4/lib/events/intel_bdx_unc_irp_events.h, + src/libpfm4/lib/events/intel_bdx_unc_pcu_events.h, + src/libpfm4/lib/events/intel_bdx_unc_qpi_events.h, + .../lib/events/intel_bdx_unc_r2pcie_events.h, + .../lib/events/intel_bdx_unc_r3qpi_events.h, + src/libpfm4/lib/events/intel_bdx_unc_sbo_events.h, + src/libpfm4/lib/events/intel_bdx_unc_ubo_events.h, + src/libpfm4/lib/pfmlib_amd64.c, + src/libpfm4/lib/pfmlib_amd64_fam16h.c, + src/libpfm4/lib/pfmlib_amd64_fam17h.c, + src/libpfm4/lib/pfmlib_amd64_priv.h, + src/libpfm4/lib/pfmlib_common.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_cbo.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_ha.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_imc.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_irp.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_pcu.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_qpi.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_r2pcie.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_r3qpi.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_sbo.c, + src/libpfm4/lib/pfmlib_intel_bdx_unc_ubo.c, + src/libpfm4/lib/pfmlib_intel_snbep_unc.c, + src/libpfm4/lib/pfmlib_intel_snbep_unc_priv.h, + src/libpfm4/lib/pfmlib_priv.h, + src/libpfm4/perf_examples/self_count.c, + src/libpfm4/tests/validate_x86.c: Update libpfm4 Current with + commit 72474c59d88512e49d9be7c4baa4355e8d8ad10a fix typo in AMd + Fam17h man page PMU name was mistyped. + +2017-08-04 Vince Weaver + + * src/validation_tests/papi_l1_dcm.c, + src/validation_tests/papi_l2_dcm.c: validation_tests: for the DCM + tests up the allowed error to 5% We don't want to fail too easily, + and 5% seems reasonable. This lets the test pass on ARM64 + Dragonboard 401c + * src/linux-memory.c: linux-memory: add fallback generic Linux /sys + cache size detection This will allow getting cache sizes on + architectures we don't have custom code for. Currently this mostly + means ARM64. + * src/validation_tests/papi_l1_dcm.c, + src/validation_tests/papi_l2_dcm.c: validation_tests: don't crash + if cachesize reported as zero + * src/validation_tests/branches_testcode.c: branches_testcode: add + arm64 support + +2017-07-27 Vince Weaver + + * src/papi_events.csv, src/validation_tests/papi_l2_dca.c: + validation_tests: trying to find out why PAPI_L2_DCA fails on + Haswell it's a mystery still. One alternative is to switch the + event to be the same as PAPI_L1_DCM but that seems like it would be + cheating. + * src/validation_tests/papi_l2_dcw.c: validation_tests: papi_l2_dcw: + shorten a warning message + * src/papi_events.csv: papi_events: note that libpfm4 Kaby Lake + support is treated as part of Skylake + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_l2_dcw.c: validation_tests: add + PAPI_L2_DCW test + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_l2_dcr.c: validation_tests: add + PAPI_L2_DCR test + * src/validation_tests/papi_l2_dcm.c: validation_tests: PAPI_L2_DCM + figured out a test that made sense + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_l1_dcm.c: validation_tests: add + PAPI_L1_DCM test + * src/validation_tests/Makefile.recipies, + src/validation_tests/cache_testcode.c, + src/validation_tests/papi_l2_dcm.c, + src/validation_tests/testcode.h: validation_tests: first attempt at + papi_l2_dcm test disabled for now, as it's really hard to make a + workable cache miss test on modern hardware. + +2017-07-26 Vince Weaver + + * src/ctests/Makefile, src/ctests/Makefile.recipies, + src/ctests/child_overflow.c, src/ctests/exec_overflow.c, + src/validation_tests/Makefile.recipies, + src/validation_tests/busy_work.c, src/validation_tests/testcode.h: + ctests: clean up the exec/child overflow tests The exec_overflow + test segfaults when using rdpmc This is a bug in Linux. I'm + working on getting it fixed. + +2017-07-21 Vince Weaver + + * src/validation_tests/Makefile.recipies, + src/validation_tests/cache_helper.c, + src/validation_tests/cache_helper.h, + src/validation_tests/cache_testcode.c, + src/validation_tests/papi_l1_dca.c, + src/validation_tests/papi_l2_dca.c, + src/validation_tests/testcode.h: validation_tests: add PAPI_L2_DCA + test also adds some generic cache testing infrastructure + * src/validation_tests/papi_l1_dca.c: validation_tests: PAPI_L1_DCA + fixes had to find a machine that actually supported the event. On + AMD Fam15h the write count is 3x expected? Need to investigate + further. + * src/validation_tests/papi_br_prc.c: validation_tests: papi_br_prc, + properly skip if event not found + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_l1_dca.c: validation_tests: add + PAPI_L1_DCA test + +2017-07-20 Vince Weaver + + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_br_msp.c, + src/validation_tests/papi_br_prc.c: validation_tests: add + PAPI_BR_PRC test + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_br_tkn.c: validation_tests: add + PAPI_BR_TKN test + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_br_ntk.c: validation_tests: add + PAPI_BR_NTK test + +2017-07-07 Vince Weaver + + * src/papi_events.csv: papi_events: move haswell, skylake, and + broadwell to traditional PAPI_REF_CYC there's a slight chance this + might break things for people, if so we can revert it. + * src/linux-timer.c: linux-timer: fix build warning on non-power + build + * src/ctests/flops.c, src/validation_tests/flops_testcode.c, + src/validation_tests/papi_dp_ops.c, + src/validation_tests/papi_fp_ops.c, + src/validation_tests/papi_sp_ops.c: validation: make the flops + tests handle that POWER has fused multiply-add PAPI_DP_OPS and + PAPI_SP_OPS still fail, need to audit what the event is doing + * src/papi_events.csv: POWER8: add a few branch preset events they + pass the validation tests, not sure why they weren't enabled + originally + * src/validation_tests/branches_testcode.c: validation: add POWER + branches testcode not sure I got the clobbers right + * src/components/perf_event/perf_helpers.h, + src/validation_tests/papi_tot_ins.c: POWER: fix some compiler + warnings + +2016-10-18 Phil Mucci + + * src/linux-timer.c: Ensure stdint gets included for all Linuxen. + * src/linux-timer.c: Some Linuxen need stdint to get the uint64_t + type. + +2016-10-14 Phil Mucci + + * src/linux-lock.h: Restructured unlock code to avoid warnings. + Tested against 80 threads on Power8 + +2016-10-12 Phil Mucci + + * src/linux-timer.c: PPC64/PPC fast timer fixup. + +2017-07-07 Vince Weaver + + * src/linux-timer.c: linux-timer: allow using fast timer for + get_real_cycles() on POWER + +2016-07-12 Phil Mucci + + * src/linux-timer.c, src/linux-timer.h: First pass at good rdtsc for + Power7/8 + +2017-07-03 Vince Weaver + + * src/ctests/flops.c, src/ctests/hl_rates.c, + src/validation_tests/Makefile.recipies, + src/validation_tests/flops.c, + src/validation_tests/flops_testcode.c, + src/validation_tests/flops_validation.c, + src/validation_tests/papi_dp_ops.c, + src/validation_tests/papi_fp_ops.c, + src/validation_tests/papi_sp_ops.c, + src/validation_tests/testcode.h: validation_tests: add tests for + PAPI_SP_OPS and PAPI_DP_OPS extend the flops_testcode as well, to + have both float and double versions. + * src/validation_tests/papi_ref_cyc.c: validation_tests: + papi_ref_cyc: update test to work on older systems it's actually + the newer (haswell/broadwell/skylake) that are using a different + event than the older systems. Make the test check for the old + behavior. + +2017-07-02 Vince Weaver + + * src/ctests/Makefile.recipies, src/ctests/cycle_ratio.c, + src/validation_tests/Makefile.recipies, + src/validation_tests/flops_testcode.c, + src/validation_tests/papi_ref_cyc.c, + src/validation_tests/testcode.h: validation_tests: move cycle_ratio + test to be papi_ref_cyc test + * src/ctests/cycle_ratio.c: ctests: rewrite cycle_ratio test on + Intel platforms PAPI_REF_CYC is a fixed 100MHz cycle count the + test was making the assumption that PAPI_REF_CYC was equal to the + max design freq (not turboboost) and thus as far as I can tell it + never would return the right answer. This test should probably be + moved to validation_tests. + +2017-07-01 Vince Weaver + + * src/ctests/Makefile.recipies, src/ctests/branches.c, src/ctests + /sdsc-mpx.c, src/ctests/sdsc2.c: ctests: migrate all other users of + dummy3() workload + * src/ctests/Makefile.recipies, src/ctests/sdsc4-mpx.c, + src/validation_tests/flops_testcode.c, + src/validation_tests/testcode.h: ctests: move the "dummy3" workload + to the common workload library + * src/ctests/sdsc4-mpx.c: ctests: sdsc4-mpx: fix failing on recent + Intel machines the multiplexing of an event with small results + (PAPI_SR_INS in this case) has high variance, so don't use it for + validation. There was code trying to do this but it wasn't + working. + +2017-06-30 Vince Weaver + + * src/ctests/first.c, src/ctests/matrix-hl.c, src/ctests/zero_omp.c, + src/ctests/zero_pthreads.c: ctests: catch lack of CPU component + earlier gets rid of extreaneous SKIPPED in the output of + run_tests.sh + * src/components/cuda/tests/HelloWorld.cu, + src/components/cuda/tests/Makefile: tests:cuda: make the HelloWorld + test more like a standard PAPI test + * src/validation_tests/Makefile.recipies: validation_tests: fix + linking against a CUDA enabled PAPI Fix suggested by Steve + Kaufmann + * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: make it + so it can compile with c++ this lets us link against it from the + CUDA tests + * src/components/cuda/sampling/gpu_activity.c: tests: cuda: fix + sampling/gpu_activity to compile without warnings + * src/Makefile.inc: tests: make the component tests build command be + the same as ctests/ftests + * src/ctests/calibrate.c: ctests: calibrate: turn off printf if + TEST_QUIET missed this one when testing because test machine + skipped it due to lack of floating point events + +2017-06-29 Vince Weaver + + * .../tests/perf_event_amd_northbridge.c, + src/ctests/Makefile.recipies, src/ctests/cycle_ratio.c, + src/ctests/derived.c, src/ctests/multiplex1_pthreads.c, + src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, + src/ctests/overflow_allcounters.c, src/ctests/overflow_index.c, + src/ctests/overflow_pthreads.c, src/ctests/overflow_twoevents.c, + src/ctests/prof_utils.c, src/ctests/prof_utils.h, + src/ctests/profile.c, src/ctests/profile_twoevents.c, + src/ctests/realtime.c, src/ctests/reset.c, + src/ctests/reset_multiplex.c, src/ctests/sdsc-mpx.c, + src/ctests/sdsc.c, src/ctests/sdsc4-mpx.c, src/ctests/sdsc4.c, + src/ctests/shlib.c, src/ctests/tenth.c, src/ctests/thrspecific.c, + src/testlib/papi_test.h: testlib: remove the hack where all + printf's are #defined to something else Explicitly check + everywhere for TESTS_QUIET or equivelent, rather than using c-pre- + processor macros to redefine printf + * src/papi.c, src/testlib/test_utils.c: tests: set the ctest debug + mode to VERBOSE by default for tests the TESTS_QUIET mode was + turning *off* verbose debugging, which meant that PAPIERROR() calls + wouldn't show up during a ./run_tests.sh + * src/components/perf_event/perf_event.c: perf_event: properly + initialize the mmap_addr structure It wasn't always being set to + NULL, and so on some tests the code would try to munmap() it even + though it wasn't mapped. + * src/testlib/test_utils.c: tests: enable color in test status + messages this has been an optional feature for a long time, if you + enabled the environment variable TESTS_COLOR=y this change makes + it default to being on (you can disable with export TESTS_COLOR=n + also it should automatically detect if you are piping to a file and + disable colors in the case too + * src/validation_tests/Makefile, + src/validation_tests/Makefile.recipies: validation_tests: always + include -lrt on the tests Should be harmless, and I don't always + test on an old enough machine to trigger the problem. + * src/ctests/forkexec.c, src/ctests/forkexec2.c, + src/ctests/forkexec3.c, src/ctests/forkexec4.c, + src/ctests/multiplex3_pthreads.c, + src/ctests/system_child_overflow.c: ctests: make the fork/exec + tests only print "PASSED" once this makes the run_test.sh input + look a lot nicer + * src/run_tests.sh, src/testlib/test_utils.c: tests: make the output + from run_tests.sh more compact + +2017-06-28 Vince Weaver + + * .../perf_event/tests/perf_event_system_wide.c: perf_event: tests, + make perf_event_system_wide use INS rather than CYC cycles varied + too much, making the validation fail + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_br_cn.c, + src/validation_tests/papi_br_ucn.c: validation_tests: add tests for + PAPI_BR_CN and PAPI_BR_UCN + * src/validation_tests/flops.c: validation_tests: flops: wasn't + falling back properly if no FLOPS event + * src/utils/Makefile, src/validation_tests/Makefile.recipies: tests: + clean up the Makefiles + * src/utils/print_header.c: utils: print_header: print the operating + system version in the header + * .../tests/perf_event_amd_northbridge.c: perf_event_uncore: the + perf_event_amd_northbridge test wasn't working it maybe never + worked at all? It was hardcoded to thinking it was running on a + 3.9 kernel always. + * src/ctests/Makefile, src/ctests/Makefile.recipies, + src/ctests/zero.c: ctests: zero: complete transition from FLOPS to + INS as metric this will make it more likely to be runnable on + modern machines. + * src/ctests/vector.c, src/validation_tests/vector_testcode.c: + validation_tests: move the unused vector.c code maybe we should + remove it. It was never built as far as I can tell. + * src/validation_tests/Makefile.recipies, + src/validation_tests/flops.c: validation_tests: add a generic flops + test based on hl_rates we do a lot of testing of the high-level + interface but not as much of the regular PAPI interface. + * src/ctests/Makefile.recipies, src/ctests/hl_rates.c, + src/validation_tests/flops_testcode.c, + src/validation_tests/testcode.h: ctests: hl_rates: clean up and fix + extraneous error message the error message was due to the way + TESTS_QUIET is passed as a command line argument. also made it use + the same matrix-multiply code that the flops test uses. also added + some validation to the results. + * src/ctests/all_events.c: ctests: all_events: issue warning if + preset cannot be created specifically this came up on an AMD + fam15h system where the PAPI_L1_ICH event cannot be created due to + Linux stealing a counter for the NMI watchdog + * src/validation_tests/papi_hw_int.c: validation_tests: papi_hw_int + explicitly mark large constant as ULL compiler was warning on + 32-bit machine + * src/validation_tests/papi_ld_ins.c, + src/validation_tests/papi_sr_ins.c, + src/validation_tests/papi_tot_cyc.c: validation_tests: a few tests + had the !quiet check inverted + * src/validation_tests/papi_hw_int.c: validation_tests: fix + papi_hw_int looping forever somehow the loop exit line got lost + * src/validation_tests/Makefile.recipies, + src/validation_tests/matrix_multiply.c, + src/validation_tests/matrix_multiply.h, + src/validation_tests/papi_ld_ins.c, + src/validation_tests/papi_sr_ins.c: validation_tests: add + PAPI_SR_INS test + * src/validation_tests/Makefile.recipies, + src/validation_tests/matrix_multiply.c, + src/validation_tests/matrix_multiply.h, + src/validation_tests/papi_hw_int.c, + src/validation_tests/papi_ld_ins.c: validation_tests: add + PAPI_LD_INS test + * src/run_tests.sh, src/validation_tests/Makefile.recipies, + src/validation_tests/papi_hw_int.c: validation_tests: add + PAPI_HW_INT test + +2017-06-27 Vince Weaver + + * src/run_tests_exclude.txt: run_tests_exclude: add attach_target + not really a test so we shouldn't run it + * src/ctests/byte_profile.c, src/ctests/earprofile.c, + src/ctests/prof_utils.c, src/ctests/prof_utils.h: + ctests/prof_utils: remove prof_init() helper It didn't do much + more than a papi_init, probably better to have each file do that in + the open. + * src/ctests/inherit.c, src/ctests/ipc.c, src/ctests/johnmay2.c, + src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests/low- + level.c, src/ctests/mendes-alt.c, src/ctests/multiplex1.c, + src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, + src/ctests/multiplex3_pthreads.c, src/ctests/overflow.c, + src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, + src/ctests/overflow_allcounters.c, src/ctests/overflow_index.c, + src/ctests/overflow_one_and_read.c, + src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/prof_utils.c, + src/ctests/profile.c, src/ctests/profile_pthreads.c, + src/ctests/profile_twoevents.c, src/ctests/remove_events.c, + src/ctests/sprofile.c, src/ctests/zero.c, src/ctests/zero_flip.c, + src/ctests/zero_named.c, src/testlib/test_utils.c: ctests: skip + rather than fail if no events available + +2017-06-26 Vince Weaver + + * src/ctests/first.c, src/ctests/mpifirst.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/testlib/test_utils.c: testlib: fix add_two_events() was not + setting some values, causing many tests to fail + * src/ctests/attach2.c, src/ctests/system_overflow.c: ctests: + compiler warning caught two lack-of-braces mistakes + * src/ctests/byte_profile.c, src/ctests/code2name.c, + src/ctests/describe.c, src/testlib/test_utils.c: tests: more + changes to skip instead of fail if no events available + * src/ctests/Makefile.recipies, src/ctests/child_overflow.c, + src/ctests/exec_overflow.c, src/ctests/fork_exec_overflow.c, + src/ctests/fork_overflow.c, src/ctests/system_child_overflow.c, + src/ctests/system_overflow.c: ctests: break up the + for_exec_overflow test it was really four benchmarks with some + ifdefs the proper way to do that would be to have a common C file + and link against it for the shared routines, rather than using the + pre-processor + * src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/attach_cpu.c: ctests: have attach tests cleanly skip if + no events available + * src/testlib/test_utils.c: testlib: update add_two_events to skip() + if not events found + * src/ctests/mendes-alt.c, src/ctests/multiplex2.c, + src/ctests/multiplex3_pthreads.c, src/ctests/sdsc.c, + src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/testlib/papi_test.h, + src/testlib/test_utils.c: testutils: remove init_multiplex() test + helper the only benefit it had over calling PAPI_multiplex_init() + was a domain workaround for perfctr+power6 systems. Ideally not + many of those systems are around anymore, an in any case a proper + fix would have the perfctr component handle that, not the testing + library. + * .../perf_event/tests/perf_event_system_wide.c, + .../perf_event/tests/perf_event_user_kernel.c, src/ctests/api.c, + src/ctests/byte_profile.c, src/ctests/high-level.c, + src/ctests/hl_rates.c, src/validation_tests/papi_br_ins.c, + src/validation_tests/papi_br_msp.c, + src/validation_tests/papi_tot_cyc.c, + src/validation_tests/papi_tot_ins.c: tests: try to "skip" rather + than "fail" if no events available + * src/ctests/derived.c: ctests: derived: fix warning found on older + gcc + * src/ctests/high-level2.c: ctests: clean up high-level2 test skip + on machine without flops/flips event + * src/components/Makefile_comp_tests.target.in: components test: fix + another build issue be sure to use local copy of papi.h + * src/components/Makefile_comp_tests.target.in: component tests: fix + build issue was trying to use the system version of libpapi.a + instead of local version + * src/components/appio/tests/Makefile, + src/components/appio/tests/appio_list_events.c, + src/components/appio/tests/appio_values_by_code.c, + src/components/coretemp/tests/Makefile, + src/components/example/tests/Makefile, + src/components/host_micpower/tests/Makefile, + src/components/infiniband/tests/Makefile, + .../infiniband/tests/infiniband_values_by_code.c, + src/components/infiniband_umad/tests/Makefile, + .../tests/infiniband_umad_values_by_code.c, + src/components/lustre/tests/Makefile, + src/components/micpower/tests/Makefile, + src/components/mx/tests/Makefile, + src/components/net/tests/Makefile, + src/components/perf_event/tests/Makefile, + src/components/perf_event_uncore/tests/Makefile, + src/components/powercap/tests/Makefile, + src/components/rapl/tests/Makefile, + src/components/stealtime/tests/Makefile: components: update + component test Makefiles to include Makefile_comp_test.target + * src/components/Makefile_comp_tests.target.in: components: update + Makefile_comp_test.target.in should now be usable by the + components without many Makefile changes + * src/components/perf_event/tests/Makefile, + src/components/perf_event/tests/nmi_watchdog.c, + src/ctests/Makefile.recipies, src/ctests/nmi_watchdog.c: ctests: + nmi_watchdog is a perf_event specific test, move it there + * src/components/Makefile_comp_tests.target.in, + src/components/README, src/components/perf_event/tests/Makefile: + components: update the autoconfigure to generate more useful + Makefile.target.in although I don't think most components are + using it at all + +2017-06-26 Asim YarKhan + + * src/components/cuda/Makefile.cuda.in, src/components/cuda/README, + src/components/cuda/Rules.cuda, src/components/cuda/configure, + src/components/cuda/configure.in, src/components/cuda/linux-cuda.c, + src/components/cuda/sampling/Makefile, + src/components/cuda/tests/HelloWorld.cu, + src/components/cuda/tests/Makefile, + src/components/cuda/tests/simpleMultiGPU.cu: CUDA component update: + Support for CUPTI metrics (early release) This commit adds support + for CUPTI metrics, which are higher level measures that may be + decompsed into multiple lower level CUPTI events. Known problems + and limitations in early release of metric support * Only sets of + metrics and events that can be gathered in a single pass are + supported. Transparent multi-pass support is expected * All + metrics are returned as long long integers, which means that CUPTI + double precision values will be truncated, possibly severely. * The + NVLink metrics have been disabled for this alpha release. + +2017-06-23 Vince Weaver + + * src/validation_tests/papi_fp_ops.c: validation: papi_fp_ops, skip + (not fail) if PAPI_FP_OPS unavailable + * src/ctests/Makefile, src/ctests/Makefile.recipies, + src/ctests/Makefile.target.in, src/ctests/flops.c: ctests: flops, + update to use some of the validate_tests infrastructure + * src/validation_tests/Makefile.recipies, + src/validation_tests/flops_testcode.c, + src/validation_tests/papi_fp_ops.c, + src/validation_tests/testcode.h: validation_tests: add papi_fp_ops + test tested on an AMD fam15h machine + * src/components/powercap/tests/powercap_basic.c: powercap: fix + compiler warnings in the powercap_basic test + * src/ctests/flops.c: ctests: update flops test + * src/ctests/api.c: ctests: update api test only seems to test the + high-level API + * src/ctests/all_native_events.c: ctests: update all_native_events + removed some ancient warnings about uncore/offcore events. Should + not be a problem on libpfm4/perf_event + * src/ctests/all_events.c: ctests: clean up all_events test + * src/components/appio/tests/appio_list_events.c, + src/components/appio/tests/appio_test_blocking.c, + .../appio/tests/appio_test_fread_fwrite.c, + src/components/appio/tests/appio_test_pthreads.c, + src/components/appio/tests/appio_test_read_write.c, + src/components/appio/tests/appio_test_recv.c, + src/components/appio/tests/appio_test_seek.c, + src/components/appio/tests/appio_test_select.c, + src/components/appio/tests/appio_test_socket.c, + src/components/appio/tests/appio_values_by_code.c, + src/components/appio/tests/appio_values_by_name.c, + src/components/coretemp/tests/coretemp_basic.c, + src/components/coretemp/tests/coretemp_pretty.c, + src/components/example/tests/example_basic.c, + .../example/tests/example_multiple_components.c, + .../host_micpower/tests/host_micpower_basic.c, + .../infiniband/tests/infiniband_list_events.c, + .../infiniband/tests/infiniband_values_by_code.c, + .../tests/infiniband_umad_list_events.c, + src/components/libmsr/tests/libmsr_basic.c, + src/components/lustre/tests/lustre_basic.c, + src/components/micpower/tests/micpower_basic.c, + src/components/mx/tests/mx_basic.c, + src/components/mx/tests/mx_elapsed.c, + src/components/net/tests/net_list_events.c, + src/components/net/tests/net_values_by_code.c, + src/components/net/tests/net_values_by_name.c, + .../perf_event/tests/perf_event_offcore_response.c, + .../perf_event/tests/perf_event_system_wide.c, + .../perf_event/tests/perf_event_user_kernel.c, + .../tests/perf_event_amd_northbridge.c, + .../perf_event_uncore/tests/perf_event_uncore.c, + .../tests/perf_event_uncore_cbox.c, + .../tests/perf_event_uncore_multiple.c, + src/components/powercap/tests/powercap_basic.c, + src/components/rapl/tests/rapl_basic.c, + src/components/rapl/tests/rapl_overflow.c, + src/components/stealtime/tests/stealtime_basic.c, + src/components/vmware/tests/vmware_basic.c, + src/ctests/all_events.c, src/ctests/all_native_events.c, + src/ctests/api.c, src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/attach_cpu.c, src/ctests/branches.c, + src/ctests/byte_profile.c, src/ctests/calibrate.c, + src/ctests/case1.c, src/ctests/case2.c, + src/ctests/clockres_pthreads.c, src/ctests/cmpinfo.c, + src/ctests/code2name.c, src/ctests/cycle_ratio.c, + src/ctests/data_range.c, src/ctests/derived.c, + src/ctests/describe.c, src/ctests/disable_component.c, + src/ctests/dmem_info.c, src/ctests/earprofile.c, + src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, + src/ctests/exeinfo.c, src/ctests/first.c, src/ctests/flops.c, + src/ctests/fork.c, src/ctests/fork2.c, + src/ctests/fork_exec_overflow.c, src/ctests/forkexec.c, + src/ctests/forkexec2.c, src/ctests/forkexec3.c, + src/ctests/forkexec4.c, src/ctests/get_event_component.c, + src/ctests/high-level.c, src/ctests/high-level2.c, + src/ctests/hl_rates.c, src/ctests/hwinfo.c, src/ctests/inherit.c, + src/ctests/ipc.c, src/ctests/johnmay2.c, + src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, + src/ctests/locks_pthreads.c, src/ctests/low-level.c, src/ctests + /matrix-hl.c, src/ctests/max_multiplex.c, src/ctests/memory.c, + src/ctests/mendes-alt.c, src/ctests/multiattach.c, + src/ctests/multiattach2.c, src/ctests/multiplex1.c, + src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, + src/ctests/multiplex3_pthreads.c, src/ctests/nmi_watchdog.c, + src/ctests/omptough.c, src/ctests/overflow.c, + src/ctests/overflow2.c, src/ctests/overflow3_pthreads.c, + src/ctests/overflow_allcounters.c, + src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, + src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, + src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, + src/ctests/profile.c, src/ctests/profile_pthreads.c, + src/ctests/profile_twoevents.c, src/ctests/pthrtough.c, + src/ctests/pthrtough2.c, src/ctests/realtime.c, + src/ctests/remove_events.c, src/ctests/reset.c, + src/ctests/reset_multiplex.c, src/ctests/sdsc.c, + src/ctests/sdsc2.c, src/ctests/sdsc4.c, src/ctests/second.c, + src/ctests/shlib.c, src/ctests/sprofile.c, src/ctests/tenth.c, + src/ctests/thrspecific.c, src/ctests/timer_overflow.c, + src/ctests/virttime.c, src/ctests/zero.c, src/ctests/zero_attach.c, + src/ctests/zero_flip.c, src/ctests/zero_fork.c, + src/ctests/zero_named.c, src/ctests/zero_omp.c, + src/ctests/zero_pthreads.c, src/ctests/zero_smp.c, + src/testlib/papi_test.h, src/testlib/test_utils.c, + src/validation_tests/papi_br_ins.c, + src/validation_tests/papi_br_msp.c, + src/validation_tests/papi_tot_cyc.c, + src/validation_tests/papi_tot_ins.c: testlib: remove the "free + variables" option from test_pass() It was only used by a small + handfull of tests, and wasn't really strictly necessary anyway. + test_pass() should pass the test and that's all. + * src/ctests/zero.c: ctests: zero: start cleaning up this test + * src/validation_tests/Makefile.recipies: validation_tests: + clock_gettime() requires -lrt on older versions of glibc + +2017-06-22 Will Schmidt + + * src/linux-memory.c, src/papi_events.csv: PAPI power9 event list + presets Here is an initial set of events and changes to help + support Power9. This is based on similar changes that were made + for power8 when initial support was added there. I've updated the + event names to match what we expect to have in power9, and have + done compile/build/ sniff tests. + +2017-06-22 Vince Weaver + + * src/ftests/Makefile.target.in: ftests: fortran tests weren't + getting the TOPTFLAGS var set + * src/testlib/test_utils.c: testlib: fix colors not turning off in + pass/fail indicator + * src/ctests/api.c, src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/attach_cpu.c, src/ctests/inherit.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/ctests/zero_attach.c, src/testlib/papi_test.h, + src/testlib/test_utils.c: testlib: update the way pass/fail is + printed It's been bugging me for years that they don't line up + * src/run_tests.sh: run_tests.sh: run the validation tests too + * src/Makefile.inc: Makefile.inc: make it compile the + validation_tests + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_br_msp.c: validation-tests: add + papi_br_msp test + * src/validation_tests/Makefile.recipies, + src/validation_tests/branches_testcode.c, + src/validation_tests/matrix_multiply.c, + src/validation_tests/matrix_multiply.h, + src/validation_tests/papi_br_ins.c, + src/validation_tests/testcode.h: validation_tests: add papi_br_ins + test + * src/validation_tests/Makefile.recipies, + src/validation_tests/papi_tot_cyc.c: validation_tests: add + papi_tot_cyc test + * src/Makefile.inc: fix "make install-all" had some extraneous ".." + after some previous changes + * src/configure, src/configure.in, + src/validation_tests/Makefile.target.in, + src/validation_tests/papi_tot_ins.c: validation_tests: update + configure so it sets up the Makefile + * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: + papi_print_header() lives with the utils code now + * src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: make + tests_quiet() return an integer This way we don't have to depend + on the global var TESTS_QUIET if we don't want to. + * src/validation_tests/Makefile, + src/validation_tests/Makefile.recipies, + src/validation_tests/Makefile.target.in, + src/validation_tests/display_error.c, + src/validation_tests/display_error.h, + src/validation_tests/instructions_testcode.c, + src/validation_tests/papi_tot_ins.c, + src/validation_tests/testcode.h: validation_tests: add initial + papi_tot_ins test it is not hooked up to the build system yet + * src/ctests/multiplex1.c, src/ctests/multiplex2.c, + src/ctests/second.c, src/ctests/sprofile.c, src/ctests/virttime.c, + src/ctests/zero_attach.c, src/ctests/zero_flip.c, + src/ctests/zero_fork.c, src/ctests/zero_omp.c, + src/ctests/zero_pthreads.c: ctests: more printf/TESTS_QUIET + conversions + * src/testlib/fpapi_test.h: ftests: missing define was making + second.F fail + * src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, + src/ctests/kufrin.c, src/ctests/locks_pthreads.c, + src/ctests/memory.c, src/ctests/multiattach.c, + src/ctests/multiattach2.c, src/ctests/multiplex1.c: ctests: more + printf/TESTS_QUIET fixes + +2017-06-21 Vince Weaver + + * src/ctests/all_events.c, src/ctests/all_native_events.c, + src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/attach_cpu.c, src/ctests/byte_profile.c, + src/ctests/calibrate.c, src/ctests/cmpinfo.c, + src/ctests/code2name.c, src/ctests/cycle_ratio.c, + src/ctests/exeinfo.c, src/ctests/fork_exec_overflow.c, + src/ctests/hl_rates.c, src/ctests/hwinfo.c: ctests: explicitly + block printfs with TESTS_QUIET There was some hackery with the + preprocessor to avoid this but that wasn't a good solution. + * src/testlib/do_loops.h, src/testlib/papi_test.h, + src/testlib/test_utils.c: testlib: minor papi_test.h cleanups + * .../perf_event/tests/perf_event_offcore_response.c, + .../perf_event/tests/perf_event_system_wide.c, + .../perf_event/tests/perf_event_user_kernel.c, + .../tests/perf_event_amd_northbridge.c, + .../perf_event_uncore/tests/perf_event_uncore.c, + .../perf_event_uncore/tests/perf_event_uncore_cbox.c, + .../tests/perf_event_uncore_multiple.c, src/ctests/attach2.c, + src/ctests/attach3.c, src/ctests/attach_cpu.c, + src/ctests/attach_target.c, src/ctests/branches.c, + src/ctests/burn.c, src/ctests/byte_profile.c, + src/ctests/cycle_ratio.c, src/ctests/derived.c, + src/ctests/dmem_info.c, src/ctests/earprofile.c, + src/ctests/first.c, src/ctests/high-level.c, src/ctests/inherit.c, + src/ctests/johnmay2.c, src/ctests/krentel_pthreads.c, + src/ctests/kufrin.c, src/ctests/locks_pthreads.c, src/ctests/low- + level.c, src/ctests/matrix-hl.c, src/ctests/memory.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, + src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, + src/ctests/overflow.c, src/ctests/overflow2.c, + src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, + src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, + src/ctests/overflow_one_and_read.c, + src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, + src/ctests/prof_utils.c, src/ctests/profile.c, + src/ctests/profile_twoevents.c, src/ctests/remove_events.c, + src/ctests/reset.c, src/ctests/reset_multiplex.c, + src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, + src/ctests/second.c, src/ctests/sprofile.c, src/ctests/tenth.c, + src/ctests/zero.c, src/ctests/zero_attach.c, + src/ctests/zero_flip.c, src/ctests/zero_fork.c, + src/ctests/zero_named.c, src/ctests/zero_omp.c, + src/ctests/zero_pthreads.c, src/ctests/zero_shmem.c, + src/ctests/zero_smp.c, src/testlib/Makefile, + src/testlib/fpapi_test.h, src/testlib/papi_test.h, + src/testlib/test_utils.h: testlib: more papi_test.h reduction + * src/testlib/Makefile: testlib: turn off optimization on the + validation loops it's making tests fail, need to go back and be + sure we are properly tricking the compiler. + * src/Makefile.inc, src/components/Makefile_comp_tests, + src/components/perf_event/tests/Makefile, + src/components/perf_event_uncore/tests/Makefile, + src/components/rapl/tests/Makefile, + src/components/rapl/tests/rapl_overflow.c, src/ctests/Makefile, + src/ctests/Makefile.recipies, src/ctests/overflow_pthreads.c, + src/ctests/profile_pthreads.c, src/ftests/Makefile, + src/ftests/Makefile.recipies, src/ftests/Makefile.target.in, + src/testlib/Makefile, src/testlib/do_loops.c, + src/testlib/do_loops.h, src/testlib/papi_test.h: testlib: start + splitting the validation code off from the pass/fail code + * src/components/perf_event/tests/perf_event_offcore_response.c, + src/components/perf_event/tests/perf_event_system_wide.c, + src/components/perf_event/tests/perf_event_user_kernel.c, src/compo + nents/perf_event_uncore/tests/perf_event_amd_northbridge.c, + src/components/perf_event_uncore/tests/perf_event_uncore.c, + src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c, sr + c/components/perf_event_uncore/tests/perf_event_uncore_multiple.c, + src/components/rapl/tests/rapl_basic.c, + src/components/rapl/tests/rapl_overflow.c, + src/ctests/all_native_events.c, src/ctests/attach2.c, + src/ctests/attach3.c, src/ctests/attach_cpu.c, + src/ctests/attach_target.c, src/ctests/branches.c, + src/ctests/burn.c, src/ctests/byte_profile.c, + src/ctests/calibrate.c, src/ctests/case1.c, src/ctests/case2.c, + src/ctests/clockres_pthreads.c, src/ctests/cmpinfo.c, + src/ctests/code2name.c, src/ctests/cycle_ratio.c, + src/ctests/data_range.c, src/ctests/derived.c, + src/ctests/describe.c, src/ctests/disable_component.c, + src/ctests/dmem_info.c, src/ctests/earprofile.c, + src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, + src/ctests/exeinfo.c, src/ctests/first.c, src/ctests/flops.c, + src/ctests/fork.c, src/ctests/fork2.c, src/ctests/forkexec.c, + src/ctests/forkexec2.c, src/ctests/forkexec3.c, + src/ctests/forkexec4.c, src/ctests/get_event_component.c, + src/ctests/high-level.c, src/ctests/high-level2.c, + src/ctests/hl_rates.c, src/ctests/hwinfo.c, src/ctests/inherit.c, + src/ctests/ipc.c, src/ctests/johnmay2.c, + src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, + src/ctests/locks_pthreads.c, src/ctests/low-level.c, src/ctests + /matrix-hl.c, src/ctests/memory.c, src/ctests/mendes-alt.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, + src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, + src/ctests/nmi_watchdog.c, src/ctests/omptough.c, + src/ctests/overflow.c, src/ctests/overflow2.c, + src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, + src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, + src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, + src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, + src/ctests/prof_utils.c, src/ctests/profile.c, + src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, + src/ctests/pthrtough.c, src/ctests/pthrtough2.c, + src/ctests/realtime.c, src/ctests/remove_events.c, + src/ctests/reset.c, src/ctests/reset_multiplex.c, + src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, + src/ctests/second.c, src/ctests/shlib.c, src/ctests/sprofile.c, + src/ctests/tenth.c, src/ctests/thrspecific.c, + src/ctests/timer_overflow.c, src/ctests/virttime.c, + src/ctests/zero.c, src/ctests/zero_attach.c, + src/ctests/zero_flip.c, src/ctests/zero_fork.c, + src/ctests/zero_named.c, src/ctests/zero_omp.c, + src/ctests/zero_pthreads.c, src/ctests/zero_shmem.c, + src/ctests/zero_smp.c, src/testlib/do_loops.c, + src/testlib/papi_test.h, src/testlib/test_utils.c: testlib: remove + include of papi.h Need to explicitly include it in your test if + you need it. + * src/testlib/Makefile, src/testlib/do_loops.c, + src/testlib/do_loops.h, src/testlib/dummy.c, src/utils/Makefile, + src/utils/papi_command_line.c, src/utils/papi_cost.c: utils: remove + last uses of testlib + * src/utils/Makefile, src/utils/papi_hybrid_native_avail.c: utils: + update papi_hybrid_native_avail to not depend on testlib + * src/utils/papi_multiplex_cost.c: utils: clean up + papi_multiplex_cost remove dependeicnes on papi_test.h print + message warning that it can take a long time to run + * .../perf_event/tests/perf_event_offcore_response.c, + .../perf_event/tests/perf_event_system_wide.c, + .../perf_event/tests/perf_event_user_kernel.c, + .../perf_event_uncore/perf_event_uncore.c, + .../tests/perf_event_amd_northbridge.c, + .../perf_event_uncore/tests/perf_event_uncore.c, + .../tests/perf_event_uncore_cbox.c, + .../tests/perf_event_uncore_multiple.c, + src/components/rapl/tests/rapl_basic.c, + src/components/rapl/tests/rapl_overflow.c, + src/ctests/all_native_events.c, src/ctests/attach2.c, + src/ctests/attach3.c, src/ctests/branches.c, + src/ctests/byte_profile.c, src/ctests/calibrate.c, + src/ctests/data_range.c, src/ctests/describe.c, + src/ctests/disable_component.c, src/ctests/earprofile.c, + src/ctests/exec.c, src/ctests/exec2.c, src/ctests/exeinfo.c, + src/ctests/first.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, + src/ctests/forkexec3.c, src/ctests/forkexec4.c, + src/ctests/get_event_component.c, src/ctests/inherit.c, + src/ctests/krentel_pthreads.c, src/ctests/kufrin.c, src/ctests + /matrix-hl.c, src/ctests/multiplex1.c, + src/ctests/multiplex1_pthreads.c, src/ctests/multiplex2.c, + src/ctests/nmi_watchdog.c, src/ctests/overflow_allcounters.c, + src/ctests/overflow_force_software.c, + src/ctests/overflow_pthreads.c, src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/prof_utils.c, + src/ctests/profile_pthreads.c, src/ctests/remove_events.c, + src/ctests/reset.c, src/ctests/reset_multiplex.c, + src/ctests/sdsc.c, src/ctests/sdsc2.c, src/ctests/sdsc4.c, + src/ctests/second.c, src/ctests/shlib.c, + src/ctests/timer_overflow.c, src/ctests/zero_named.c, + src/testlib/do_loops.c, src/testlib/papi_test.h, + src/testlib/test_utils.c, src/utils/Makefile, + src/utils/cost_utils.c, src/utils/papi_command_line.c, + src/utils/papi_cost.c, src/utils/papi_event_chooser.c: testlib: + more header removal from papi_test.h + * src/components/perf_event/tests/perf_event_system_wide.c, + src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/ctests/zero_attach.c, src/testlib/papi_test.h, + src/utils/cost_utils.c: testlib: remove a few more includes from + papi_test.h + * src/components/rapl/tests/rapl_basic.c, src/ctests/all_events.c, + src/ctests/all_native_events.c, src/ctests/api.c, + src/ctests/attach2.c, src/ctests/attach3.c, + src/ctests/attach_cpu.c, src/ctests/attach_target.c, + src/ctests/branches.c, src/ctests/burn.c, src/ctests/calibrate.c, + src/ctests/case1.c, src/ctests/case2.c, + src/ctests/clockres_pthreads.c, src/ctests/code2name.c, + src/ctests/cycle_ratio.c, src/ctests/data_range.c, + src/ctests/derived.c, src/ctests/describe.c, + src/ctests/dmem_info.c, src/ctests/earprofile.c, + src/ctests/eventname.c, src/ctests/exec.c, src/ctests/exec2.c, + src/ctests/exeinfo.c, src/ctests/flops.c, src/ctests/fork.c, + src/ctests/fork2.c, src/ctests/forkexec.c, src/ctests/forkexec2.c, + src/ctests/forkexec3.c, src/ctests/forkexec4.c, src/ctests/high- + level.c, src/ctests/high-level2.c, src/ctests/hl_rates.c, + src/ctests/hwinfo.c, src/ctests/inherit.c, src/ctests/ipc.c, + src/ctests/johnmay2.c, src/ctests/kufrin.c, + src/ctests/locks_pthreads.c, src/ctests/low-level.c, + src/ctests/max_multiplex.c, src/ctests/memory.c, + src/ctests/multiattach.c, src/ctests/multiattach2.c, + src/ctests/multiplex1.c, src/ctests/multiplex1_pthreads.c, + src/ctests/multiplex2.c, src/ctests/multiplex3_pthreads.c, + src/ctests/overflow.c, src/ctests/overflow2.c, + src/ctests/overflow3_pthreads.c, src/ctests/overflow_allcounters.c, + src/ctests/overflow_force_software.c, src/ctests/overflow_index.c, + src/ctests/overflow_one_and_read.c, src/ctests/overflow_pthreads.c, + src/ctests/overflow_single_event.c, + src/ctests/overflow_twoevents.c, src/ctests/p4_lst_ins.c, + src/ctests/prof_utils.c, src/ctests/profile.c, + src/ctests/profile_pthreads.c, src/ctests/profile_twoevents.c, + src/ctests/pthrtough.c, src/ctests/pthrtough2.c, + src/ctests/realtime.c, src/ctests/sdsc.c, src/ctests/sdsc2.c, + src/ctests/sdsc4.c, src/ctests/second.c, src/ctests/shlib.c, + src/ctests/sprofile.c, src/ctests/tenth.c, + src/ctests/thrspecific.c, src/ctests/timer_overflow.c, + src/ctests/virttime.c, src/ctests/zero.c, src/ctests/zero_attach.c, + src/ctests/zero_flip.c, src/ctests/zero_fork.c, + src/ctests/zero_omp.c, src/ctests/zero_pthreads.c, + src/ctests/zero_shmem.c, src/ctests/zero_smp.c, + src/testlib/do_loops.c, src/testlib/dummy.c, + src/testlib/papi_test.h, src/testlib/test_utils.c, + src/utils/papi_command_line.c, src/utils/papi_cost.c: testlib: + split some headers out of papi_test.h Too much is going on in that + header, no need to have every include in the world in it. Trying + to make the testcode more standalone so it is easier to follow. + * src/testlib/Makefile, src/testlib/Makefile.target.in: testlib: let + testlib build properly from within the testlib directory + * src/testlib/clockcore.c: testlib: clockcore wasn't protecting all + the output with !quiet + * src/ctests/Makefile: ctests: make sure tests link against the right + papi.h file + * src/Makefile.inc, src/ctests/Makefile, + src/ctests/Makefile.target.in: ctests: allow running "make" in the + ctests directory to work + +2017-06-20 Vince Weaver + + * src/Matlab/PAPI_Matlab.readme, src/papi.c, src/utils/papi_avail.c, + src/utils/papi_clockres.c, src/utils/papi_command_line.c, + src/utils/papi_component_avail.c, src/utils/papi_cost.c, + src/utils/papi_decode.c, src/utils/papi_error_codes.c, + src/utils/papi_event_chooser.c, + src/utils/papi_hybrid_native_avail.c, src/utils/papi_mem_info.c, + src/utils/papi_multiplex_cost.c, src/utils/papi_native_avail.c, + src/utils/papi_version.c, src/utils/papi_xml_event_info.c: update + the ptools-perfapi e-mail address in the auto-generated manpages + it was still using the old ptools.org address. + * doc/Makefile: docs: fix the manpage build after renaming the utils + Thanks to Steve Kaufmann for catching this. + * src/utils/Makefile, src/utils/papi_native_avail.c: utils: + papi_native_avail: remove extraneous testing code + * src/utils/Makefile, src/utils/papi_mem_info.c: utils: + papi_mem_info: remove extraneous test code + * src/utils/Makefile, src/utils/papi_xml_event_info.c: utils: + papi_xml_event_info: remove extraneous test code + * src/utils/Makefile, src/utils/papi_decode.c: utils: papi_decode: + remove extraneous test code + * src/utils/Makefile, src/utils/papi_error_codes.c: utils: + papi_error_codes: remove extraneous test code + * src/utils/Makefile, src/utils/papi_component_avail.c: utils: + papi_component_avail: remove extraneous test code + * src/ctests/clockres_pthreads.c, src/testlib/clockcore.c, + src/testlib/clockcore.h, src/testlib/papi_test.h, + src/utils/Makefile, src/utils/papi_clockres.c: utils: + papi_clockres, remove extraneous test code + * src/utils/Makefile, src/utils/papi_avail.c, + src/utils/print_header.c, src/utils/print_header.h: utils: update + papi_avail to not depend on testlibs It's not a test. + * src/utils/Makefile: utils: add target for papi_hybrid_native_avail + do not build it by default though? Should only be built if + compiling for MIC? + * src/utils/Makefile, src/utils/avail.c, src/utils/clockres.c, + src/utils/command_line.c, src/utils/component.c, src/utils/cost.c, + src/utils/decode.c, src/utils/error_codes.c, + src/utils/event_chooser.c, src/utils/event_info.c, + src/utils/hybrid_native_avail.c, src/utils/mem_info.c, + src/utils/multiplex_cost.c, src/utils/native_avail.c, + src/utils/papi_avail.c, src/utils/papi_clockres.c, + src/utils/papi_command_line.c, src/utils/papi_component_avail.c, + src/utils/papi_cost.c, src/utils/papi_decode.c, + src/utils/papi_error_codes.c, src/utils/papi_event_chooser.c, + src/utils/papi_hybrid_native_avail.c, src/utils/papi_mem_info.c, + src/utils/papi_multiplex_cost.c, src/utils/papi_native_avail.c, + src/utils/papi_xml_event_info.c: utils: rename the utils so the + executable matches the filename This has bothered me for years, + you want to fix "papi_native_avail" but there is no file in the + tree called "papi_native_avail.c" + * src/utils/Makefile, src/utils/papi_version.c, src/utils/version.c: + utils: rename version.c to papi_version.c Also minor cleanups to + the utility. + * src/Makefile.inc, src/configure, src/configure.in, + src/utils/Makefile, src/utils/Makefile.target.in: utils: clean up + Makefile and build process of utils Now should be able to run + "make" in the utils subdir and have it build. Also move the list + of util files to build out of configure as I don't think there's + any reason for having them there. + * src/components/perf_event/pe_libpfm4_events.c: perf: fall back to + operating system default events if libpfm4 lacks support This will + allow use of PAPI on machines that Linux has support for, but + libpfm4 has not added events yet. Still some limitations, for + example the PAPI preset events won't work. + * src/components/perf_event/pe_libpfm4_events.c, + src/components/perf_event/perf_event.c: perf: report better errors + if libpfm4 initialization fails + * src/components/perf_event/pe_libpfm4_events.c: perf: + pe_libpfm4_events: minor whitespace fixup + * src/components/perf_event/pe_libpfm4_events.c: perf: + pe_libpfm4_events: whitespace changes to make code easier to follow + +2017-06-19 Vince Weaver + + * src/ctests/code2name.c: ctests/code2name: fix uninitialized + variable warning + * src/ctests/calibrate.c: ctests/calibrate: fix uninitialized + variable warning + * src/ctests/thrspecific.c: ctests: thrspecific fix so it finishes + It's actually really unclear what this code is trying to test, but + with optimization enabled it hung forever. Marking the variable + being spun on as volatile fixes things but I think there is more + wrong with the test than just that. + * src/ctests/branches.c, src/ctests/sdsc.c, src/ctests/sdsc4.c: + ctests: fix tests using "dummy3()" as a workload Now that we + enable optimization on the ctests this breaks some of the + benchmarks. dummy3() was being optimized away which caused + segfaults and other problems. The tests don't crash now, but they + still fail. Still investigating. + +2016-10-12 Phil Mucci + + * src/configure: Regenerated configure with recent autoconf + * src/configure.in: By default, we want -O1 on tests (TOPTFLAGS). -O0 + is too literal and causes a number of tests who depend on peephole + optimization to run. + * src/utils/Makefile: Utils are installed therefore they should be + built with production flags not test/debug flags + * src/Makefile.inc: Make clean should not clean up libpfm. Thats for + make distclean. We're not developing libpfm! + +2016-07-04 Phil Mucci + + * src/ctests/mendes-alt.c, src/ctests/zero.c: Moved functions + definitions to top of file to eliminate non-ANSI-C prototypes + inside main. Modified message in zero to not turbo boost will also + cause errors (cycles > real-time-cycle + * src/Makefile.in, src/Makefile.inc, src/configure, src/configure.in: + Remove EXTRA_CFLAGS, now CFLAGS. Added FTOPTS so compiling Fortran + tests have same flags as ctests. Fix proper testing at configure + time of libpfm for proper combinations of libpfm options + * src/ftests/Makefile: Homogenize include flags + * src/ctests/Makefile: Homogenize include flags + * src/testlib/Makefile: Removed unnecessary defs and options + * src/utils/Makefile: Removed unnecessary definitions and compiler + options + +2016-07-01 Phil Mucci + + * src/Makefile.in, src/Makefile.inc, src/Rules.perfctr-pfm, + src/Rules.perfmon2, src/Rules.pfm4_pe, + src/components/Makefile_comp_tests.target.in, + src/components/perf_event/pe_libpfm4_events.c, src/configure, + src/configure.in, src/ctests/Makefile, + src/ctests/Makefile.target.in, src/ftests/Makefile, + src/ftests/Makefile.target.in: Makefile.in: - Removed DEBUGFLAGS, + NOTLS, PAPI_EVENTS_TABLE from being generated. These were not + properly used. - Added LIBCFLAGS generated from configure for + CFLAGS that ONLY apply to the library and the library code. NOT + tests nor utilities. Previously we were propagating all kinds of + bogus flags to the tests and utils. - CFLAGS is now properly set + for compiler flags not defines etc. Makefile.inc: - Put + papi_events_table.h in the right place. This is always the same + name. Previous attempts at parameterizing this were broken and/or + unnecessary. - Added dependency for the above in the right place + and ALWAYS generate it, regardless of whether we actually include + it in the library (vs load the CSV at runtime). Rules.perfctr-pfm + - Removed conditional removal of events table during clean. + Rules.perfmon2 - Removed conditional removal of events table during + clean. Rules.pfm4_pe - Stopped mussing with CFLAGS which would + pollute child builds but refer to LIBCFLAGS. CFLAGS is for + everything! - Removed conditional removal of events table during + clean. - Removed duplicate reference to papi_events_table.h + components/perf_event/pe_libpfm4_events.c: - Removed HARDCODED + include of a libpfm4 private header file. Wrong path and + unnecessary include. This would break if you linked against another + libpfm using any of the config options. + components/perf_event/peu_libpfm4_events.c: - Removed HARDCODED + include of a libpfm4 private header file. Wrong path and + unnecessary include. This would break if you linked against another + libpfm using any of the config options. + components/Makefile_comp_tests.target.in: - Refer to datarootdir to + make autoconf happy configure/configure.in: Regenerated using + autoconf 2.69 and many modifications to serious brokennesss. Lots + of fixes: - Sanitize options for static inclusion of user and papi + presets - Fix options that do not print out a result - Fix + debug=yes to not include PAPI_MEMORY_MANAGEMENT. That's only + enabled with debug=memory. This will reduce false positives when we + debug. We don't want our own malloc/free changing behavior when we + are trying to debug! - Fix CFLAGS/LIBCFLAGS/DEBUGFLAGS. configure + now exports a variable called PAPICFLAGS which gets stuffed into + LIBCFLAGS in Makefile.in. This variable IS ONLY for compiler flags + relevant to the library. Previously we were exporting all sorts of + stuff that would make our passes behave differently that user code. + _GNU_SOURCE and -D_REENTRANT. That stuff is for the library and + components. Not user code. - Update compile tests to use + AC_LANG_SOURCE as required. - Fix clock timer checking output to + now say what timer we picked instead of just skipping an answer - + Same for virtual clock timer - Remove broken --with-papi-events + option. - Fixed --with-static-tools option - Fixed/added --with- + static-papi-events option (default) and --with-static-user-events + option. - Fixed modalities of configuring whether to build a + static/shared or both. - Fixed link of tests with shared libraries + when above options don't support it. Modality again. Remove + SETPATH/LIBPATH define, which won't work for ANY combination of + --with-pfm-prefix/root/libdir except our included library. Woefully + broken and would result in many false positive failures. If you are + going to run the tests on the shared library it is now the users + responsibility to set LD_LIBRARY_PATH/LIBPATH correctly. I suspect + this may irritate some, but broken 90% of the time is no excuse for + correct 10% of the time especially when it could generate bug + reports falsely. - Fixed with-static-tools, with-shlib-tools + options to correct modalities. - Fixed all modalities with --with- + pfm-prefix/root/libdir/incdir. Previously the build, configure and + source files were still referring to pieces of code INSIDE our + libpfm4 resulting in version skew and breakage. The way to test + this stuff is to use --root or --prefix after removing the internal + libpfm4 library. - Removed unnecessary and confusing + force_pfm_incdir - Fixed with-pe-incdir option which, like before + was most of the time referring to the libpfm4 included header file. + Not good if one has a custom kernel! PECFLAGS now only appended to + PAPICFLAGS(LIBCFLAGS). - Removal of DEBUGFLAGS. aix.c needs + testing. Anyone have one? - Fixed CFLAGS for BSD - Add message for + papi_events.csv ctests/Makefile ftests/Makefile - Don't redefine + CC/CC_R/CFLAGS/FFLAGS. - Make these files consistent + ctests/Makefile.target.in ftests/Makefile.target.in - refer to + datarootdir as required + +2016-06-27 Phil Mucci + + * src/testlib/Makefile, src/testlib/Makefile.target.in: Added + explicit target for libtestlib.a. The all target should have been + markted as .PHONY as to avoid constant rebuilding. Also, we really + should merge these two files into a master and an include. + Maintaining two makefiles stinks! + +2017-06-16 Vince Weaver + + * src/papi_fwrappers.c: fwrappers: papif_unregister_thread was + misspelled as papif_unregster_thread This was noticed by Vedran + Novakovic For an extremely long time (10+ years?) the fortran + wrapper was misspelled as papif_unregster_thread() It's probably + too late to fix this without potentially breaking things, so just + add a duplicate function with the proper spelling and leave the old + one too. + * src/papi_preset.c: papi_preset: fix compiler warning This really + confusing warning has been around for a while. gcc-6.3 reports it + in a really odd way: papi_preset.c: In function + ‘check_derived_events’: papi_preset.c:513:19: warning: + ‘__s’ may be used uninitialized in this function$ int val = + atoi(&subtoken[1]); ^~~~~~~~~~~~ papi_preset.c:464:1: note: + ‘__s’ was declared here ops_string_merge(char **original, char + *insertion, int replaces, int start_ind$ ^~~~~~~~~~~~~~~~ But + there is no __s variable, or anything to do with where the arrows + are pointing. gcc-5 gives a better warning: papi_preset.c: In + function ‘check_derived_events’: papi_preset.c:513:14: warning: + ‘tok_save_ptr’ may be used uninitialized in this$ int val = + atoi(&subtoken[1]); ^ papi_preset.c:472:8: note: ‘tok_save_ptr’ + was declared here char *tok_save_ptr; So the thing it seems to be + complaining about is that the *saveptr paramater to strtok_r() is + not set to NULL. According to the manpage I don't think this + should be needed? But I think it should be safe to initialize it + anyway. + +Tue Jun 6 11:09:17 2017 -0500 Will Schmidt + + * src/libpfm4/lib/events/power9_events.h, + src/libpfm4/perf_examples/self_count.c, + src/libpfm4/tests/validate_power.c: Update libpfm4 Current with + commit ce5b320031f75f9a9881333c13902d5541f91cc8 add power9 entries + to validate_power.c Hi, Update the validate_power test to include + power9 entries. sniff-test run output: $ ./validate Libpfm + structure tests: libpfm ABI version : 0 pfm_pmu_info_t : Passed + pfm_event_info_t : Passed pfm_event_attr_info_t : Passed + pfm_pmu_encode_arg_t : Passed pfm_perf_encode_arg_t : Passed Libpfm + internal table tests: checking power9 (946 events): + Passed Architecture specific tests: 20 PowerPC events: 0 errors All + tests passed + +2017-06-15 Vince Weaver + + * src/components/perf_event/pe_libpfm4_events.c, + src/components/perf_event/pe_libpfm4_events.h, + .../perf_event_uncore/Rules.perf_event_uncore, + .../perf_event_uncore/perf_event_uncore.c, + .../perf_event_uncore/peu_libpfm4_events.c, + .../perf_event_uncore/peu_libpfm4_events.h: perf_event: merge the + libpfm4 helper libraries perf_event and perf_event_uncore had + their own almost exactly the same libpfm4 helper libraries. + Maintaining both was a chore, and it looks like it is possible to + just share one copy. This does mean that it is now not possible to + configure the perf_event_uncore component without perf_event being + enabled, but I am not sure if that was even possible to begin with. + * src/components/perf_event/pe_libpfm4_events.c, + .../perf_event_uncore/perf_event_uncore.c, + .../perf_event_uncore/peu_libpfm4_events.c, + .../perf_event_uncore/peu_libpfm4_events.h: perf_event_uncore: make + the libpfm4 routines match even more + * src/components/perf_event/pe_libpfm4_events.c, + .../perf_event_uncore/peu_libpfm4_events.c: perf_event: make + perf_event and perf_event uncore libpfm4 more similar it's a bad + idea to have more or less two copies of the same code + * src/components/perf_event/pe_libpfm4_events.c, + .../perf_event_uncore/peu_libpfm4_events.c: perf_event: Avoid + unintended libpfm build dependency due to PFM_PMU_MAX enum This + patch is based on one sent by William Cohen + The libpfm pfmlib.h file enumerates the each of performance + monitoring units (PMUs) it can program in pfm_pmu_t type. The last + enum in this type is PFM_PMU_MAX. Depending on which specific + version of libpfm being used this specific value could vary. The + problem is that PFM_PMU_MAX is statically defined in the pfmlib.h + file and this was being used as a loop bounds when iterating to + determine which PMUs are potentially available. If PAPI was built + with an older version of libpfm and then run with a newer libpfm + shared library on a machine with a larger PFM_PMU_MAX value, none + of the PMUs past the smaller PFM_PMU_MAX used for the the build + would be examined or enabled. + +2017-06-15 Heike Jagode (jagode@icl.utk.edu) + + * src/components/infiniband/linux-infiniband.c: Updated infiniband + component so that it works for mofed driver version 4.0, where + directory counters_ext in sysfs fs has changed to hw_counters. + This update to the component makes it work for both directory + names: - counters_ext for mofed driver version <4.0, and - + hw_counters for mofed driver version =>4.0 This change has not + been fully tested yet due to missing access to machine with updated + version of mofed driver. (CORAL machines will have an updated + version of this driver.) + +2017-05-04 Vince Weaver + + * src/components/rapl/linux-rapl.c: rapl: broadwell-ep DRAM units are + special (like Haswell-EP) The Linux kernel perf interface had this + wrong too. I noticed this in my cluster computing classs, the + Broadwell-EP DRAM results were unrealistically high values. + +Fri Apr 21 17:33:15 2017 -0700 William Cohen + + * src/libpfm4/README, src/libpfm4/include/perfmon/pfmlib.h, + src/libpfm4/lib/Makefile, src/libpfm4/lib/events/power9_events.h, + src/libpfm4/lib/pfmlib_common.c, src/libpfm4/lib/pfmlib_power9.c, + src/libpfm4/lib/pfmlib_power_priv.h, src/libpfm4/lib/pfmlib_priv.h, + src/libpfm4/lib/pfmlib_s390x_cpumf.c: Update libpfm4\n\nCurrent + with\n commit 8385268c98553cb5dec9ca86bbad3e5c44a2ab16 fix + internal pfm_event_attr_info_t use for S390X Commit 321133e + converted most of the architectures to use the internal + perflib_event_attr_info_t type. However, the s390 was missed in + that previous commit. This patch corrects the issue so libpfm + compiles on s390. + +2017-04-20 Stephen Wood + + * src/extras.c, src/papi.h, src/papi_fwrappers.c, src/papi_hl.c, + src/papi_internal.c: cast pointers appropriately to avoid warnings + and errors + +2017-04-19 Sangamesh Ragate + + * src/papi_events.csv: Mapped PAPI_L2_ICM preset event to + PM_INST_FROM_L2MISS native event for Power8 + +2017-04-06 Asim YarKhan + + * src/ftests/fmatrixlowpapi.F: Fixed: This fortran test exceeded 72 + columns and made the default Intel ifort compilation unhappy + +Wed Apr 5 23:35:44 2017 -0700 Andreas Beckmann + + * src/libpfm4/docs/man3/libpfm_arm_ac53.3, + src/libpfm4/docs/man3/libpfm_arm_ac57.3, + src/libpfm4/docs/man3/libpfm_arm_xgene.3, src/libpfm4/lib/Makefile, + src/libpfm4/lib/events/arm_cortex_a53_events.h, + src/libpfm4/lib/events/intel_glm_events.h, + src/libpfm4/lib/events/intel_hswep_unc_imc_events.h, + src/libpfm4/lib/events/intel_ivbep_unc_imc_events.h, + src/libpfm4/lib/events/intel_knl_events.h, + src/libpfm4/lib/events/intel_knl_unc_cha_events.h, + src/libpfm4/lib/events/power4_events.h, + src/libpfm4/lib/events/ppc970_events.h, + src/libpfm4/lib/events/ppc970mp_events.h, + src/libpfm4/perf_examples/self_smpl_multi.c: Update + libpfm4\n\nCurrent with\n commit + 71a960d9c17b663137a2023ce63edd2f3ca115f5 fix various event + description typos This patch fixes the typos in several event + description for Intel, Arm, and Power event tables. + +2017-03-30 William Cohen + + * src/ftests/cost.F, src/ftests/first.F, src/ftests/fmatrixlowpapi.F, + src/ftests/second.F: Eliminate warnings about implicit type + conversions in Fortran tests The gfortran compiler on Fedora 25 + was giving warnings indicating that a few of the tests were doing + implicit type convertion between reals and ints. Those implicit + conversions have been made explicit to elminate the fortran + compiler warning messages. + +Tue Apr 4 09:42:25 2017 -0700 Stephane Eranian + + * src/libpfm4/include/perfmon/pfmlib.h, + src/libpfm4/lib/pfmlib_amd64.c, + src/libpfm4/lib/pfmlib_amd64_priv.h, src/libpfm4/lib/pfmlib_arm.c, + src/libpfm4/lib/pfmlib_arm_priv.h, src/libpfm4/lib/pfmlib_common.c, + src/libpfm4/lib/pfmlib_intel_netburst.c, + src/libpfm4/lib/pfmlib_intel_nhm_unc.c, + src/libpfm4/lib/pfmlib_intel_snbep_unc.c, + src/libpfm4/lib/pfmlib_intel_snbep_unc_priv.h, + src/libpfm4/lib/pfmlib_intel_x86.c, + src/libpfm4/lib/pfmlib_intel_x86_perf_event.c, + src/libpfm4/lib/pfmlib_intel_x86_priv.h, + src/libpfm4/lib/pfmlib_mips.c, src/libpfm4/lib/pfmlib_mips_priv.h, + src/libpfm4/lib/pfmlib_perf_event.c, + src/libpfm4/lib/pfmlib_perf_event_pmu.c, + src/libpfm4/lib/pfmlib_perf_event_raw.c, + src/libpfm4/lib/pfmlib_power_priv.h, + src/libpfm4/lib/pfmlib_powerpc.c, src/libpfm4/lib/pfmlib_priv.h, + src/libpfm4/lib/pfmlib_sparc.c, + src/libpfm4/lib/pfmlib_sparc_priv.h, + src/libpfm4/lib/pfmlib_torrent.c, src/libpfm4/tests/validate.c, + src/libpfm4/tests/validate_x86.c: Update libpfm4\n\nCurrent with\n + commit 5e311841e5d70efb93d11826109cb5acab6e051c enable 38-bit raw + umasks for Intel offcore_response events This patch enables + support for passing and encoding of 38-bit offcore_response matrix + umask. Without the patch, the raw umask was limited to 32-bit which + is not enough to cover all the possible bits of the + offcore_response event available since Intel SandyBridge. $ + examples/check_events offcore_response_0:0xffffff Requested Event: + offcore_response_0:0xffffff Actual Event: + ivb::OFFCORE_RESPONSE_0:0xffffff:k=1:u=1:e=0:i=0:c=0:t=0 PMU + : Intel Ivy Bridge IDX : 155189325 Codes : + 0x5301b7 0xffffff The patch also adds tests to the validation + code. + +2017-03-29 Vince Weaver + + * src/components/perfctr/perfctr-x86.c: perfctr: fix perfctr + component to actually work Simple one-line typo means perfctr was + not working, probably for years. I've tested on a 2.6.32-perfctr + kernel and it works again. + +2017-03-28 Vince Weaver + + * src/papi_events.csv: papi_events: add AMD fam16h jaguar events + These will become useful if/when the contributed libpfm4 jaguar + patches get applied. + +2017-03-27 Vince Weaver + + * src/papi_events.csv: events: p4: change the PAPI_TOT_CYC event + PAPI_TOT_CYC wasn't working on Pentium4 because the + GLOBAL_POWER_EVENT:RUNNING event was being grabbed by the hardware + watchdog. perf cycles:u was still working, that's because the + kernel transparently remaps the cycles event to an alias when + global_power_event's slot is taken. The aliased event is the + unwieldly: execution_event:nbogus0:nbogus1:nbogus2:nbogus3:bogus0:b + ogus1:bogus2:bogus3:cmpl:thr=15 which does seem to give the right + results. Use this event instead by default on Pentium 4 + * src/components/perf_event/perf_event.c: perf_event: fix warning + when compiling with debug enabled the flags field is an unsigned + long, not an int + +2017-03-22 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: don't allocate + a mmap page if not rdpmc or sampling + * src/components/perf_event/perf_event.c: perf_event: only allocate 1 + mmap page (rather than 3) if not sampling Next step is to allocate + 0 mmap pages unless rdpmc is enabled + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_event_lib.h: perf_event: update the + _pe_set_overflow() call Working on making it more obvious which + events are sampling (and thus need mmap buffers) or not. Also + there were some bugs in the handling of having multiple overflow + sources per eventset, though I'm not sure if PAPI actually handles + that. + * src/components/perf_event/perf_event.c: perf_event: turn off + fast_counter_read if mmaps fail By default on Linux perf_event + can't use more than 516kB of mmap space. So perf_event-rdpmc would + fail after you added a large number (>32) of events. This shows up + on the kufrin benchmark on some machines. This fix makes PAPI fall + back to non-rdpmc if an mmap error happens. I'm also going to try + to tune the mmap usage a bit to make the limits a bit higher. + +2017-03-21 Asim YarKhan + + * src/configure: configure script updated using autoconf-2.59 + +2017-03-20 Vince Weaver + + * src/components/perf_event/perf_event.c, src/configure.in: + configure: enable rdpmc with --enable-perfevent-rdpmc=yes Make + this an option to configure. Defaults to no. Need to find a + machine with autoconf 2.59 on and I'll regenerate configure as + well. + +2017-03-16 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: try to work + around exclude_guest issue run a test at startup to see if events + with exclude_guest fail. libpfm4 sets this by default, but older + kernels will fail because this was previously a reserved (must be + zero) field. + +2017-03-14 Vince Weaver + + * src/ctests/multiattach.c: tests: multiattach: + whitespace/comments/clarifications digging through the code trying + to figure out why it fails with rdpmc enabled. it turns out it is + seeing wrong running/enabled multiplexing results even though we + aren't multiplexing tracking this down is a pain because we can't + strace/ltrace due to the code using ptrace to start/stop processes. + +2017-03-09 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: can't mmap() an + inherited event this is why the inherit test was failing + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_helpers.h: perf_event: add rdpmc + support (but disabled) finally add the rdpmc code, but it still + fails on a few tests so it is disabled by default. + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_event_lib.h: perf_event: make all + events come with a mmap buffer This wastes some address space, but + having separate codepaths for rdpmc/regular/sampling/profiling + would be hard to maintain. Had to remove some assumptions from the + profiling/sampling code that mmap_buf means sampling is happening. + * src/components/perf_event/perf_event.c: perf_event: add check for + paranoid==3 Recent distributions are *completely* disablng + perf_event by default with their vendor kernels (this is not + upstream yet). Have PAPI detect and disable the perf_event + component if this is detected. + * src/components/perf_event/perf_event.c: perf_event: split + close_pe_events() into two functions + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_helpers.h: perf_event: more + whitespace / rearrangement should not be any changes to actual + code, is just whitespace/comment/function movement I know changes + like this make the git history harder to follow, but it really + helps when trying to follow the code when working on major changes. + +2017-03-08 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: more + whitespace/comment cleanups digging through the code, still + prepping for rdpmc + +2017-03-07 Vince Weaver + + * src/components/perf_event/perf_helpers.h: perf_event: rdpmc: need + to sign extend offset too Otherwise things stop working after a + PAPI_reset() + * src/components/perf_event/perf_event.c: perf_event: split up + _pe_read() makes the code a bit easier to follow. also prep for + rdpmc() + * src/components/perf_event/perf_event.c: perf_event: clean up + whitespace in _pe_read + +2017-03-08 Vince Weaver + + * src/ctests/first.c: ctests: first: white space cleanups minor + things noticed when trying to figure out why it was failing with + rdpmc (the answer was rdpmc code not handling PAPI_reset()) + +2017-03-07 Vince Weaver + + * src/components/perf_event/perf_helpers.h: perf_event: recent + changes broke build on non-x86 an ifdef was in the wrong location. + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_helpers.h: perf_event: update rdpmc + detection + * src/utils/component.c: utils: component_avail: clean up -d + (detailed) results print rdpmc status, as well as line things up. + Also don't print redundant info, now that a lot more fields are + printed by default. + * src/utils/component.c: utils: component_avail: whitespace/grammar + fixes + * src/components/perf_event/Rules.perf_event, + src/components/perf_event/perf_helpers.h: perf_event: add + mmap/rdpmc routine we don't use it yet + +2017-03-06 Vince Weaver + + * src/components/perf_event/perf_helpers.h: perf_event: add rdtsc() + and rdpmc() inline-assembly + * src/components/perf_event/perf_event.c, + src/components/perf_event/perf_helpers.h: perf_event: move + perf_event_open() code to a helper file We'll be adding some other + helpers to this file too. + +2017-03-03 Vince Weaver + + * src/components/perf_event/perf_event.c: perf_event: move + bug_sync_read() check out of line we should eventually just phase + out a lot of these checks for older kernels, but it gets tricky as + long as RHEL is shipping 2.6.32. With this change on my IVB + machine PAPI_read() cost went from mean cycles : 932.158549 + std deviation: 358.752461 to mean cycles : 896.642644 std + deviation: 305.568268 + * src/components/perf_event/pe_libpfm4_events.c, + src/components/perf_event/pe_libpfm4_events.h, + src/components/perf_event/perf_event.c: perf_event: remove + _pe_libpfm4_get_cidx() helper function easier to explicitly pass + it to the libpfm4 event code + * src/components/perf_event/perf_event_lib.h: perf_event: wakeup_mode + field is no longer used + * src/components/perf_event/perf_event.c: perf_event: remove + WAKEUP_MODE_ defines These date back to initial perf_event + support, but were never used. Probably were meant in case advanced + sampling/profiling was ever implemented, but it wasn't. + * src/components/perf_event/perf_event.c: perf_event.c: split + setup_mmap() to its own function non-sampling events will need to + have mmap buffers when we move to rdpmc() + * src/components/perf_event/perf_event.c: perf_event: rename + tune_up_fd to configure_fd_for_sampling makes it a bit more clear + what is going on + * src/components/perf_event/perf_event.c: perf_event: remove + extraneous whitespace + +2017-02-24 Vince Weaver + + * src/utils/cost.c: papi_cost: wasn't properly resetting the event + search after POSTFIX This means some architectures could have + skipped the ADD/SUB test even though such events were available. + +Wed Feb 22 01:16:42 2017 -0800 Stephane Eranian + + * src/libpfm4/lib/events/intel_bdw_events.h, + src/libpfm4/lib/events/intel_skl_events.h, + src/libpfm4/lib/pfmlib_intel_rapl.c, + src/libpfm4/tests/validate_x86.c: Update libpfm4\n\nCurrent with\n + commit 1bd352eef242f53e130c3b025bbf7881a5fb5d1e update Intel RAPL + processor support Added Kabylake, Skylake X Added PSYS RAPL event + for Skylake client. + +2017-02-17 Vince Weaver + + * src/utils/cost.c: papi_cost: clear eventset before derived add test + we weren't clearing the eventset after the derived postfix test to + the add test was actually measuring two derived events. This was + noticed on broadwell-ep where papi_cost would fail due to the lack + of enough counters to have both the postfix and add events at the + same time. + +2017-01-23 Asim YarKhan + + * RELEASENOTES.txt: Fixing the date in the RELEASENOTES file. diff --git a/INSTALL.txt b/INSTALL.txt new file mode 100644 index 0000000..9673b4f --- /dev/null +++ b/INSTALL.txt @@ -0,0 +1,565 @@ +/* +* File: INSTALL.txt +* CVS: $Id$ +* Author: Kevin London +* london@cs.utk.edu +* Mods: Dan Terpstra +* terpstra@cs.utk.edu +* Mods: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +***************************************************************************** +HOW TO INSTALL PAPI ONTO YOUR SYSTEM +***************************************************************************** + +On some of the systems that PAPI supports, you can install PAPI right +out of the box without any additional setup. Others require drivers or +patches to be installed first. + +The general installation steps are below, but first find your particular +Operating System's section for any additional steps that may be necessary. +NOTE: the configure and make files are located in the papi/src directory. + +General Installation + +1. % ./configure + % make + +2. Check for errors. + + a) Run a simple test case: (This will run ctests/zero) + + % make test + + If you get good counts, you can optionally run all the test programs + with the included test harness. This will run the tests in quiet mode, + which will print PASSED, FAILED, or SKIPPED. Tests are SKIPPED if the + functionality being tested is not supported by that platform. + + % make fulltest (This will run ./run_tests.sh) + + To run the tests in verbose mode: + + % ./run_tests.sh -v + +3. Create a PAPI binary distribution or install PAPI directly. + + a) To install PAPI libraries and header files from the build tree: + + % make install + + b) To install PAPI manual pages from the build tree: + + % make install-man + + c) To install PAPI test programs from the build tree: + + % make install-tests + + d) To install all of the above in one step from the build tree: + + % make install-all + + e) To create a binary kit, papi-.tgz: + + % make dist + +***************************************************************************** +MORE ABOUT CONFIGURE OPTIONS +***************************************************************************** + +There is an extensive array of options available from the configure +command-line. These can differ significantly from version to versions of +PAPI. For complete details on the command-line options, use: + % ./configure --help + + +***************************************************************************** +DOCUMENTATION BY DOXYGEN +***************************************************************************** + +PAPI now ships with documentation generated by doxygen. +Documentation for the public apis can be created by running +doxygen from the doc directory. + +More complete documentation of all internal apis and structures can be +generated with: + % doxygen Doxyfile-html + +Doxygen documentation for the currently released version of PAPI is also +available on the website. + + +***************************************************************************** +Operating System Specific Installation Steps (In Alphabetical Order by OS) +***************************************************************************** + +AIX - IBM POWER5 and POWER6 and POWER7 +***************************************************************************** +PAPI is supported on AIX 5.x for POWER5 and POWER6. +PAPI is also tested on AIX 6.1 for POWER7. +Use ./configure to select the desired make options for your system, +specifying the --with_bitmode=32 or --with-bitmode=64 to select wordlength. +32 bits is the default. + +1. On AIX 5.x, the bos.pmapi is a product level fileset (part of the OS). + However, it is not installed by default. Consult your sysadmin to + make sure it is installed. +2. Follow the general instructions for installing PAPI. + +WARNING: PAPI requires XLC version 6 or greater. +Your version can be determined by running 'lslpp -a -l | grep -i xlc'. + +BG/P +***************************************************************************** +BG/P is a cross-compiled environment. The machine on which PAPI is compiled +is not the machine on which PAPI runs. To compile PAPI on BG/P, specify the +BG/P environment as shown below: + + % ./configure --with-OS=bgp + % make + +NOTE: ./configure might fail if the cross compiler is not in your path. + If that is the case, just add it to your path and everything should work: + + % export PATH=$PATH:/bgsys/drivers/ppcfloor/gnu-linux/bin + +By default this will make a subset of tests in the ctests directory and all + tests in the ftests directory. + +There is an additional C test program provided for the BG/P environment +that exercises the specific BG/P events and demonstrates how to +intermix the PAPI and BG/P UPC native calls. This test program is built with +the normal make sequence and can be found in the ctests/bgp directory. + +The testing targets in the make file will not work in the BG/P environment. +Since BG/P supports multiple queuing systems, you must manually execute +individual programs in the ctests and ftests directories to check for successful +library creation. You can also manually edit the run_tests.sh script to +automate testing for your installation. + +Most papi utilities work for BGP, including papi_avail, papi_native_avail, and +papi_command_line. Many ctests pass for BGP, but many others produce errors due +to the non-traditional architecture of BGP. In particular, PAPI_TOT_CYC always +seems to produce 0 counts, although papi_get_virt_usec and papi_get_real_usec +appear to work. + +The IBM RedPaper: http://www.redbooks.ibm.com/abstracts/redp4256.html provides +further discussion about PAPI on BGP along with other performance issues. + +BG/Q +***************************************************************************** +Five new components have been added to PAPI to support hardware performance +monitoring for the BG/Q platform; in particular the BG/Q network, the I/O system, +the Compute Node Kernel in addition to the processing core. There are no specific +component configure scripts for L2unit, IOunit, NWunit, CNKunit. In order to +configure PAPI for BG/Q, use the following configure options at the papi/src level: +% ./configure --prefix=< your_choice > \ + --with-OS=bgq \ + --with-bgpm_installdir=/bgsys/drivers/ppcfloor \ + CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc \ + F77=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gfortran \ + --with-components="bgpm/L2unit bgpm/CNKunit bgpm/IOunit bgpm/NWunit" + +CLE - Cray XT and XE Opteron +***************************************************************************** +The Cray XT/XE is a cross-compiled environment. You must specify the +perfmon version to configure as shown below. + +Before running configure to create the makefile that supports a Cray XT/XE CLE +build of PAPI, execute the following module commands: + % module purge + % module load gcc +Note: do not load the programming environment module (e.g. PrgEnv-gnu) +but the compiler module (e.g. gcc) as shown above. + +Check CLE compute nodes for the version of perfmon2 that it supports: + % aprun -b -a xt cat /sys/kernel/perfmon/version + +and use this version when configuring PAPI for a perfmon2 substrate: + % configure CFLAGS="-D__crayxt" \ + --with-perfmon=2.82 --prefix= \ + --with-virtualtimer=times --with-tls=__thread \ + --with-walltimer=cycle --with-ffsll --with-shared-lib=no \ + --with-static-tools + +Configure PAPI for a perf events substrate: + % configure CFLAGS="-D__crayxt" \ + --with-perf-events --with-pe-incdir= \ + --with-assumed-kernel=2.6.34 --prefix= \ + --with-virtualtimer=times --with-tls=__thread \ + --with-walltimer=cycle --with-ffsll --with-shared-lib=no \ + --with-static-tools + +Invoke the make accordingly: + % make CONFIG_PFMLIB_ARCH_CRAYXT=y CONFIG_PFMLIB_SHARED=n + % make CONFIG_PFMLIB_ARCH_CRAYXT=y CONFIG_PFMLIB_SHARED=n install + +The testing targets in the makefile will not work in the XT/XE CLE environment. +It is necessary to log into an interactive session and run the tests +manually through the job submission system. For example, instead of: + % make test +use: + % aprun -n1 ctests/zero +and instead of: + % make fulltest +use: + % ./run_cat_tests.sh +after substituting "aprun -n1" for "yod -sz 1" in run_cat_tests.sh. + +FreeBSD - i386 & amd64 +***************************************************************************** +PAPI requires FreeBSD 6 or higher to work. + +Kernel needs some modifications to provide PAPI access to the performance +monitoring counters. Simply, add "options HWPMC_HOOKS" and "device hwpmc" in +the kernel configuration file. For i386 systems, add also "device apic". +(You can obtain more information in hwpmc(4), see NOTE 1 to check the +supported HW) + +After this step, just recompile the kernel and boot it. + +FreeBSD 7 (or greater) does not ship with a fortran compiler. To compile +fortan tests you will need to install a fortran compiler first (e.g. +installing it from /usr/ports/lang/gcc42), and setup the F77 environment +variable with the compiler you want to use (e.g. gfortran42). + +Fortran compilers may issue errors due to "Integer too big for its kind *". +Add to FFLAGS environment variable a compiler option to use int*8 by default +(in gfortran42 it is -fdefault-integer-8). + +Follow the "General Installation" steps. + +NOTE 1: +-- +HWPMC driver supports the following processors: Intel Pentium 2, +Intel Pentium Pro, Intel Pentium 3, Intel Pentium M, Intel Celeron, +Intel Pentium 4, AMD K7 (AMD Athlon) and AMD K8 (AMD Athlon64 / Opteron). + +FreeBSD 8 also adds support for Core/Core2/Core-i[357]/Atom processors. +There is also a patch for FreeBSD 7/7.1 in http://wiki.freebsd.org/PmcTools + +Linux - Xeon Phi [MIC, KNC, Knight's Corner] +***************************************************************************** +Full PAPI support of the MIC card requires MPSS Gold Update 2 or above, and a +cross-compilation toolchain from Intel, the Intel C compiler is also +supported. + +The compiler +----------------------------------------------------------------------------- +* Download one of the MPSS full source bundles at + [http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss] +* Untar the download. +* Extract gpl/package-cross-k1om.tar.bz2 + +Building PAPI - gcc cross compiler +----------------------------------------------------------------------------- +* Add usr/linux-k1om-4.7/bin or equivalent to your PATH so PAPI can find the + cross-build utils. (see above for instructions on acquiring the cross + compilation toolchain) +* You will need to invoke configure with options: + > ./configure --with-mic --host=x86_64-k1om-linux --with-arch=k1om + + This sets up cross-compilation and sets options needed by PAPI. +* Run make to build the library. + +Building PAPI - icc +----------------------------------------------------------------------------- +If icc is in your path, + > ./configure --with-mic +You may have to provide additional configuration options... try + > ./configure --with-mic --with-ffsll --with-walltimer=cycle --with-tls=__thread --with-virtualtimer=clock_thread_cputime_id +This builds a mic native version of the library. + +Offload Code +------------ +To use PAPI in MIC offload code, build a mic-native version of PAPI +as detailed above. + +The PAPI utility programs can be run on the MIC using the +micnativeloadex tool provided by Intel. The MIC events may require +additional qualifiers to set the exclude_guest and exclude_host bits +to 0 (eventname:mg=1:mh=1). For example, get a list of events +available on the MIC by calling: +micnativeloadex ./utils/papi_native_avail +Then get an event count while setting the appropriate qualifiers +micnativeloadex ./utils/papi_command_line -a "CPU_CLK_UNHALTED:mg=1:mh=1" + +To add offload code into your program, wrap the papi.h header as +follows: +#pragma offload_attribute (push,target(mic)) +#include "papi.h" +#pragma offload_attribute (pop) + +Make PAPI calls from offload code as normal. + +Finally add -offload-option,mic,ld,$(path_to_papi)/libpapi.a +to your compile incantation or if that does not recognise papi library try +-offload-option,mic,compiler,"-lpapi -L" to +your compile incantation + + + +Linux - Itanium II & Montecito +***************************************************************************** +PAPI on Itanium Linux links to the perfmon library. The library version and +the Itanium version are automatically determined by configure. +If you wish to override the defaults, a number of pfm options are available +to configure. Use: + % ./configure --help +to learn more about these options. + +Follow the general installation instructions to complete your installation. + +PLATFORM NOTES: +The earprofile test fails under perfmon for Itanium II. It has been +reconfigured to work on the upcoming perfmon2 interface. + +Linux - PPC64 (POWER5, POWER5+, POWER6 and PowerPC970) +**************************************************************************** +Linux/PPC64 requires that the kernel be patched and recompiled with the +PerfCtr patch if the kernel is version 2.6.30 or older. The required patches +and complete installation instructions are provided in the +papi/src/perfctr-2.7.x directory. PPC64 is the ONLY platform that REQUIRES +use of PerfCtr 2.7.x. + +*- IF YOU HAVE ALREADY PATCHED YOUR KERNEL AND/OR INSTALLED PERFCTR -* + +WARNING: You should always use a PerfCtr distribution that has been distributed +with a version of PAPI or your build will fail. The reason for this is that +PAPI builds a shared library of the Perfctr runtime, on which libpapi.so +depends. PAPI also depends on the .a file, which it decomposes into component +objects files and includes in the libpapi.a file for convenience. If you +install a new perfctr, even a shared library, YOU MUST REBUILD PAPI to get +a proper, working libpapi.a. + +There are several options in configure to allow you to specify your perfctr +version and location. Use: + % ./configure --help +to learn more about these options. + +Follow the general installation instructions to complete your installation. + +Linux Perf Events ( with kernel 2.6.32 and newer ) +***************************************************************************** + +Performance counter support has been merged as the "Perf Events" +subsystem as of Linux 2.6.32. This means that PAPI can be built +without patching the kernel on new enough systems. + +Perf Events support is new, and certain functionality does not work. +If you need any of the functionality listed below, we recommend +you install the PerfCtr patchset and use that in conjunction with PAPI. + + + PAPI requires at least Linux kernel 2.6.32, as the earlier 2.6.31 + version had some significant API changes. + + Kernels before 2.6.33 have extra overhead when determining + whether events conflict or not. + + Counter multiplexing is handled by PAPI (rather than perf_events) + on kernels before 2.6.33 due to a bug in the kernel perf_events code. + + Nehalem EX support requires kernel 2.6.34 or newer. + + Pentium 4 support requires kernel 2.6.35 or newer. + +The PAPI configure script should auto-detect the availability of +Perf Events on new enough distributions (this mainly requires +that perf_event.h be available in /usr/include/linux) + +On older distributions (even ones that include the 2.6.32 kernel) +the perf_event.h file might not be there. One fix is to install +your distributions linux kernel headers package, which is often +an optional package not installed by default. + +If you cannot install the kernel headers, you can obtain the +perf_event.h file from your kernel and run configure as such: + ./configure --with-pe-incdir=INCDIR +replacing INCDIR with the directory that perf_event.h is in. + + +Linux PerfCtr (requires patching the kernel) +***************************************************************************** +When using Linux kernels before 2.6.32 the kernel must be patched with +the PerfCtr patch set. (This patchset can also be used on more recent +kernels if the support provided by Perf Events is not enough for your +workload). The required patches and complete installation instructions +are provided in the papi/src/perfctr-x.y directory. Please see the INSTALL +file in that directory. + +Do not forget, you also need to build your kernel with APIC support in order +for hardware overflow to work. This is very important for accurate statistical +profiling ala gprof via the hardware counters. + +So, when you configure your kernel to build with PERFCTR as above, make +sure you turn on APIC support in the "Processor type and features" section. +This should be enabled by default if you are on an SMP, but it is disabled +by default on a UP. + +In our 2.4.x kernels: +> grep PIC /usr/src/linux/.config +/usr/src/linux/.config:CONFIG_X86_GOOD_APIC=y +/usr/src/linux/.config:CONFIG_X86_UP_APIC=y +/usr/src/linux/.config:CONFIG_X86_UP_IOAPIC=y +/usr/src/linux/.config:CONFIG_X86_LOCAL_APIC=y +/usr/src/linux/.config:CONFIG_X86_IO_APIC=y + +You can verify the APIC is working after rebooting with the new kernel +by running the 'perfex -i' command found in the perfctr/examples/perfex +directory. + +PAPI on x86 assumes PerfCtr 2.6.x. NOTE: THE VERSIONS OF PERFCTR DO NOT +CORRESPOND TO LINUX KERNEL VERSIONS. + +*- IF YOU HAVE ALREADY PATCHED YOUR KERNEL AND/OR INSTALLED PERFCTR -* + +WARNING: You should always use a PerfCtr distribution that has been distributed +with a version of PAPI or your build may fail. Newer versions with backward +compatibility may also work. PAPI builds a shared library of the Perfctr +runtime, on which libpapi.so depends. PAPI also depends on the .a file, +which it decomposes into component objects files and includes in the libpapi.a +file for convenience. If you install a new PerfCtr, even a shared library, +YOU MUST REBUILD PAPI to get a proper, working libpapi.a. + +There are several options in configure to allow you to specify your perfctr +version and location. Use: + % ./configure --help +to learn more about these options. + +Follow the general installation instructions to complete your installation.PERFCT + +*- IF PERFCTR IS INSTALLED BUT PAPI FAILS TO INITIALIZE -* + +You may be running udev, which is not smart enough to know the permissions of +dynamically created devices. To fix this, find your udev/devices directory, +often /lib/udev/devices or /etc/udev/devices and perform the following actions: + + mknod perfctr c 10 182 + chmod 644 perfctr + +On Ubuntu 6.06 (and probably other debian distros), add a line to +/etc/udev/rules.d/40-permissions.rules like this: + +KERNEL=="perfctr", MODE="0666" + +On SuSE, you may need to add something like the following to +/etc/udev/rules.d/50-udev-default.rules: + (SuSE does not have the 40-permissions.rules file in it.] + +# cpu devices +KERNEL=="cpu[0-9]*", NAME="cpu/%n/cpuid" +KERNEL=="msr[0-9]*", NAME="cpu/%n/msr" +KERNEL=="microcode", NAME="cpu/microcode", MODE="0600" +KERNEL=="perfctr", NAME="perfctr", MODE="0644" + +These lines tell udev to always create the device file with the appropriate permissions. +Use 'perfex -i' from the perfctr distribution to test this fix. + +PLATFORM NOTES: +Opteron fails the matrix-hl test because the default definition of PAPI_FP_OPS +overcounts speculative floating point operations. + +Solaris 8 - Ultrasparc +***************************************************************************** +The only requirement for Solaris is that you must be running version 2.8 or +newer. As long as that requirement is met, no additional steps are required +to install PAPI and you can follow the general installation guide. + +Solaris 10 - UltraSPARC T2/Niagara 2 +***************************************************************************** +PAPI supports the Niagara 2 on Solaris 10. The substrate offers support for +common basic operations like adding/reading/etc and the advanced features +multiplexing (see below), overflow handling and profiling. The implementation +for Solaris 10 is based on libcpc 2, which offers access to the underlying +performance counters. Performance counters for the UltraSPARC architecture +are described in the UltraSPARC architecture manual in general with detailed +descriptions in the actual processor manual. In case of this substrate the +documentation for performance counters can be found at: + + - http://www.opensparc.net/publications/specifications/ + +In order to install PAPI on this platform make sure the packages SUNWcpc and +SUNWcpcu are installed. For the compilation Sun Studio 12 was used while the +substrate has been developed. GNU GCC has not been tested and would require +to modify the makefiles Makefile.solaris-niagara2 (32 bit) and +Makefile.solaris-niagara2-64bit (64 bit). + +The steps required for installation are as follows: + + ./configure --with-bitmode=[32|64] --prefix=/is/optional + + If no --with-bitmode parameter is present a default of + 32 bit is assumed. + + If no --prefix is used, a default of /usr/local is assumed. + + make + make install + +If you want to link your application against your installation you should +make sure to include at least the following linker options: + + -lpapi -lcpc + +PLEASE NOTE: This is the first revision of Niagara 2/libcpc 2/Solaris 10 +support and needs further testing! Contributions, especially for the preset +definitions, would be very appreciated. + +MULTIPLEXING: As the Niagara 2 offers no native event to count the cycles +elapsed, a "synthetic event" was created offering access to the cycle count. +This event is neither as accurate as the native events, nor it should be +used for anything else than the multiplexing mode, which needs the cycle +count in order to work. Therefore multiplexing and the preset PAPI_TOT_CYC +should be only used with caution. BEWARE OF WRONG COUNTER RESULTS! + +Windows XP/2000/Server 2003 - Intel Pentium III or AMD Athlon / Opteron +***************************************************************************** +Please use PAPI 3.7 (http://icl.cs.utk.edu/projects/papi/downloads/papi-3.7.2.tar.gz) + +The Windows source tree comes with Microsoft Visual Studio Version 8 projects +to build a graphical shell application, the PAPI library as a DLL, a kernel +driver to provide access to the counters, and a collection of C test programs. + +The WinPMC driver must be installed with administrator privileges. See the +winpmc.html file in the papi/win2k/winpmc directory for details on building +and installing this driver. + +The general installation instructions are irrelevant for Windows. + +Other Platforms +***************************************************************************** +PAPI can be compiled and installed on most platforms that have GNU compilers +regardless of operating system or hardware. This includes, for example, +Macintosh systems running recent versions of OSX. However, PAPI can only +provide access to the CPU hardware counters on platforms that are directly +supported. Unsupported platforms will run, buttony provide basic timing +functions, and potential access to some non-cpu components. + + +***************************************************************************** +CREATING AND RUNNING COMPONENTS +***************************************************************************** + +Basic instructions on how to create a new component can be found in +src/components/README. The components directory contains several components +developed by the PAPI team along with a simple yet functional "example" +component which can be used as a guide to aid third-party developers. +Assuming components are developed according to the specified guidelines, +they will function within the PAPI framework without requiring any changes +to PAPI source code. + +Before running any component that requires configuration, the configure +script for that component must be executed in order to generate the +Makefile which contains the configuration settings. Normally, the script +will only need to be executed once. Depending on the component, configure +may require that one or more configuration settings be specified by the user. + +The components to be added to PAPI are specified during the configuration of +PAPI by adding the --with-components= command line option to +configure. For example, to add the acpi, lustre, and net components, the +option would be: + % ./configure --with-components="acpi lustre net" + +Attempting to add a component to PAPI which requires configuration and has +not been configured will result in a compilation error because the PAPI +build environment will be unable to find the Makefile for that component. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..1780c1e --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,33 @@ + Copyright (c) 2005 - 2010 + Innovative Computing Laboratory + Dept of Electrical Engineering & Computer Science + University of Tennessee, + Knoxville, TN. + All Rights Reserved. + + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the University of Tennessee nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +This open source software license conforms to the BSD License template. diff --git a/README b/README new file mode 100644 index 0000000..a8c1fa8 --- /dev/null +++ b/README @@ -0,0 +1,83 @@ +PAPI: Performance Application Programming Interface +=================================================== + +** Innovative Computing Lab ** + +** University of Tennessee, Knoxville, TN ** + +*** +[TOC] +*** + +About +----- + +PAPI provides the tool designer and application engineer with a +consistent interface and methodology for use of the performance +counter hardware found in most major microprocessors. PAPI enables +software engineers to see, in near real time, the relation between +software performance and processor events. + +In addition, PAPI provides access to a collection of components that +expose performance measurement opportunites across the hardware and +software stack. + + +Getting Started +--------------- + +If this is the first file you've opened in the PAPI tree, we'll try to give you +a few tips on where to go from here. + +* Read the license found in LICENSE.txt. It's pretty short, and not very + restrictive, but it'll give you an idea of what you can and can't do with the + PAPI sources. +* Visit the website at: + There you can find late-breaking news that may be more current than in these + files. You can also find documentation in a greater variety of formats than + in the papi/doc/ directory. +* Sign up for the PAPI mailing list(s). Instructions are on our home page. +* Read the RELEASENOTES.txt file to get an idea of what's new in the current release. + + +Installing PAPI +--------------- + +To install PAPI on your system: + +* Find the section in INSTALL.txt that pertains to your hardware and operating + system. +* Follow the directions to install required components and build the PAPI + libraries. +* Run the test suite when you are finished to verify that everything went ok. + NOTE: Although we make every attempt to get all tests to PASS or SKIP on all + platforms, there are occasional instances of FAILures due to excessively + tight compliance thresholds or platform idiosyncrasies. Don't panic if one + or two tests FAIL. Contact us with complete output and we'll see what we can do. + + +Using PAPI +---------- + +To use PAPI in your own programs: + +* Read the PAPI Overview found at: + http://icl.utk.edu/projects/papi/wiki/Main_Page. +* Try out the utility programs in /utils to see what's in your system. +* Try a test program. Source for a number of tests in both C and FORTRAN is + available in the src/tests/ and src/ftests/ directories. Find a program + that's similar to what you want to do. Make sure you can build it and run it. +* Write a test program of your own, exercising the PAPI events and features of + interest to you. +* Go for broke. Fold PAPI calls into your sources and see what you can learn. + + +Bugs and Questions +------------------ + +* Visit our FAQ at: + or read a snapshot of the FAQ in papi/PAPI_FAQ.html +* Subscribe to the PAPI mailing list at: + +* Read historical postings to the list. +* Post questions to the list. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..229e868 --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +PAPI: Performance Application Programming Interface +=================================================== + +** Innovative Computing Lab ** + +** University of Tennessee, Knoxville, TN ** + +*** +[TOC] +*** + +About +----- + +PAPI provides the tool designer and application engineer with a +consistent interface and methodology for use of the performance +counter hardware found in most major microprocessors. PAPI enables +software engineers to see, in near real time, the relation between +software performance and processor events. + +In addition, PAPI provides access to a collection of components that +expose performance measurement opportunites across the hardware and +software stack. + + +Getting Started +--------------- + +If this is the first file you've opened in the PAPI tree, we'll try to give you +a few tips on where to go from here. + +* Read the license found in LICENSE.txt. It's pretty short, and not very + restrictive, but it'll give you an idea of what you can and can't do with the + PAPI sources. +* Visit the website at: + There you can find late-breaking news that may be more current than in these + files. You can also find documentation in a greater variety of formats than + in the papi/doc/ directory. +* Sign up for the PAPI mailing list(s). Instructions are on our home page. +* Read the RELEASENOTES.txt file to get an idea of what's new in the current release. + + +Downloading PAPI +---------------- + +* Clone the PAPI repository the first time with the following command: + `> git clone https://bitbucket.org/icl/papi.git` + +* This creates a complete copy of the papi git repository on your computer + in a folder called 'papi'. + +* To make sure your copy is up to date with the repository: + `> cd papi` + `> git pull https://bitbucket.org/icl/papi.git` + + +Installing PAPI +--------------- + +To install PAPI on your system: + +* Find the section in INSTALL.txt that pertains to your hardware and operating + system. +* Follow the directions to install required components and build the PAPI + libraries. +* Run the test suite when you are finished to verify that everything went ok. + NOTE: Although we make every attempt to get all tests to PASS or SKIP on all + platforms, there are occasional instances of FAILures due to excessively + tight compliance thresholds or platform idiosyncrasies. Don't panic if one + or two tests FAIL. Contact us with complete output and we'll see what we can do. + + +Using PAPI +---------- + +To use PAPI in your own programs: + +* Read the PAPI Overview found at: + http://icl.utk.edu/projects/papi/wiki/Main_Page. +* Try out the utility programs in /utils to see what's in your system. +* Try a test program. Source for a number of tests in both C and FORTRAN is + available in the src/tests/ and src/ftests/ directories. Find a program + that's similar to what you want to do. Make sure you can build it and run it. +* Write a test program of your own, exercising the PAPI events and features of + interest to you. +* Go for broke. Fold PAPI calls into your sources and see what you can learn. + + +Bugs and Questions +------------------ + +* Visit our FAQ at: + or read a snapshot of the FAQ in papi/PAPI_FAQ.html +* Subscribe to the PAPI mailing list at: + +* Read historical postings to the list. +* Post questions to the list. \ No newline at end of file diff --git a/RELEASENOTES.txt b/RELEASENOTES.txt new file mode 100644 index 0000000..6fd90c4 --- /dev/null +++ b/RELEASENOTES.txt @@ -0,0 +1,1404 @@ +This file documents changes in recent PAPI releases in inverse chronological +order. + +For details on installing PAPI on your machine, consult the INSTALL.txt file +in this directory. + +=============================================================================== +PAPI 5.6.0 RELEASE NOTES 19 Dec 2017 +=============================================================================== + +PAPI 5.6.0 contains a major cleanup of the source code and the build +system to have consistent code structure, eliminate errors, and reduce +redundancies. A number of validation tests have been added to PAPI to +verify the PAPI preset events. Improvements and changes to multiple +PAPI components have been made, varying from supporting new events to +fixes in the component testing. + +For specific and detailed information on changes made in this release, +see ChangeLogP560.txt for keywords of interest or go directly to the +PAPI git repository. + +Major changes + +* Validation tests: A substantial effort to add validation tests to + PAPI to check and detect problems in the definition of PAPI preset + events. +* Event testing: Thorough cleanup of code in the C and Fortran testing + to add processor support, cleanup output and make the testing + behavior consistent. +* CUDA component: Updated and rewritten to support CUPTI Metric API + (combinations of basic events). This component now supports NVLink + information through the Metric API. Updated testing for the + component. +* NVML component: Updated to support power management limits and + improved event names. Minor other bug fixes. +* RAPL component: Added support for: Intel Atom models Goldmont / + Gemini_Lake / Denverton, Skylake-X / Kabylake +* PAPI preset events: Many updates to the PAPI preset event mappings; + Skylake X support, initial AMD fam17h, fix AMD fam16h, added more + Power8 events, initial Power9 events. + +Other changes + +* Updating man and help pages for papi_avail and papi_native_avail. +* Powercap component: Added test for setting power caps via PAPI + powercap component. +* Infiniband component: Bugfix for infiniband_umad component. +* Uncore component: Updated to support recent processors. +* Lmsensors component updated to support correct runtime linking, + better events name, and a number of bug fixes. +* Updated and fixed timer support for multiple architectures. +* All components: Cleanup and standardize testing behavior in the + components. +* Build system: Much needed cleanup of configure and make scripts. +* Support for C++ was enhanced. +* Enabling optional support for reading events using perfevent-rdpmc + on recent Linux kernels can speed up PAPI_read() by a factor of 5. +* Pthread testing limited to avoid excessive CPU consumption on highly + parallel machines. + +Acknowledgements: This release is the result of efforts from many +people, with special Thanks to Vince Weaver, Phil Mucci, Steve +Kauffman, William Cohen, Will Schmidt, and Stephane Eranian (for +libpfm4) from the internal PAPI team. + +=============================================================================== +PAPI 5.5.1 RELEASE NOTES 18 Nov 2016 +=============================================================================== + +PAPI 5.5.1 is now available. This is a point release intended +primarily to add support for uncore performance monitoring events on +Intel Xeon Phi Knights Landing (KNL). Other minor bugfixes have also +been made. + +For specific and detailed information on changes made in this release, +see ChangeLogP551.txt for keywords of interest or go directly to the +PAPI git repository. + +New Platforms: +* Added Knights Landing (KNL) uncore event support via libpfm4. + +Bug Fixes: +* Fix some possible string termination problems. +* Cleanup lustre and mx components. +* Enable RAPL for Broadwell-EP. + + +=============================================================================== +PAPI 5.5.0 RELEASE NOTES 14 Sep 2016 +=============================================================================== + +PAPI 5.5 is now available. This release provide a new component that +provides read and write access to the information and controls exposed +via the Linux powercap interface.The PAPI powercap component supports +measuring and capping power usage on recent Intel architectures.[a][b] + +We have added core support for Knights Landing (uncore support will be +released later) as well as power monitoring via the RAPL and powercap +components. + +For specific and detailed information on changes made in this release, +see ChangeLogP550.txt for keywords of interest or go directly to the +PAPI git repo. + +New Platforms: +* Added Knights Landing (KNL) core events and preset events. +* Added Intel Broadwell/Skylake/Knights Landing RAPL support +* Updated PAPI preset event support for Intel Broadwell/Skylake + +New Component: +* Powercap component: PAPI now supports the Linux Power Capping + Framework which exposes power capping devices and power measurement + to user space via a sysfs virtual file system interface. + +Enhancements: +* Add support for multiple flavors of POWER8 processors. +* Force all processors to check event schedulability by checking that + PAPI can successfully read the counters. +* Support for Intel Broadwell-EP, Skylake, Goldmont, Haswell-EP + inherited from libpfm4. +* Shared memory object (.so) naming is made more limited so that minor + updates do not break ABI compatibility. + +Bug Fixes: +* Improve testlib error messages if a component fails to initialize. +* Fix _papi_hwi_postfix_calc parsing and robustness. +* Clean build rules for CUDA sampling subcomponent. +* Correct IBM Power7 and Power8 computation of PAPI_L1_DCA. +* Eliminate the sole use of ftests_skip subroutine. +* Correct the event string names for tenth.c. +* Have Fortran test support code report errors more clearly. +* Cleanup output from libmsr component. +* PAPI internal functions were marked as static to avoid exposing them + externally. +* Multiple component were fixed to make internal functions static + where possible, to avoid exposing the functions as externally + accessible entry points. +* CUDA component configuration bug fixed. + + +=============================================================================== +PAPI 5.4.3 RELEASE NOTES 26 Jan 2016 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP543.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== + +New Implementations: +------------------- + * libmsr component: Using LLNL's libmsr library to access Intel + RAPL (Running Average Power Limit) library adds power capping + abilities to PAPI. + * CUDA PC sampling: A new standalone CUDA sampling tool + (papi_cuda_sampling) has been added to the CUDA component + (components/cuda/sampling/) and can be used as a preloader to + perform PC sampling on Nvidia GPUs which support the CUPTI + sampling interface (e.g. Maxwell). + * ARM Cortex A53 support: Event definitions added. + +Enhancements: +------------ + * Added Haswell-EP uncore support + * Initial Broadwell, Skylake support + * Added a general CUDA example (components/cuda/test) that uses + LD_PRELOAD to attach to a running CUcontext. + * Added "-check" flag to papi_avail and papi_native_avail to + test counter availability/validity. + +Bug Fixes: +---------- + * Clean output from papi_avail tool when there are no user defined events. + * Support PAPI_GRN_SYS granularity for perf component. + * Bug fix for infiniband_umad component. + * Bug fix for vmware component. + * Bug fix for NVML component. + * Fixed RAPL component so it reports unsupported inside a guest VM. + * Cleanup ARM CPU detection. + * Bug fix for PAPI_overflow issue for multiple eventsets. + * Increased PERF_EVENT_MAX_MPX_COUNTERS to 192 from 128. + * Fixed memory leak in papi_preset.c. + * Free allocated memory in the stealtime component. + + +=============================================================================== +PAPI 5.4.1 RELEASE NOTES 02 Mar 2015 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP541.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +The PAPI CUDA component is updated to support CUDA 6.5 with multiple GPUs. + +New Platforms: +------------- + * Updated support for Intel Haswell and Haswell-EP + * Added ARM Cortex A7 + * Added ARM 1176 cpu (original Raspberry Pi) + +Enhancements: +------------ + * Enhance PAPI preset events to allow user defined events. + * User defined events are set up via a user event definition file. + * CUDA component is updated to support multiple devices and contexts. + * Tested under and supports CUDA 6.5. + * Note: Events for different CUDA context MUST be added from within the context. + * New test demonstrating attaching an eventset to a single CPU rather than a thread. + * Use the term "event qualifiers" instead of "event masks" to clarify understanding. + * Added pkg-config support to PAPI. + +Bug Fixes: +---------- + * Fixed lustre segfault bug. + * Fixed compilation in the absence of a Fortran compiler. + * Fixed bug in krental_pthreads ctest to join threads properly on exit. + * Fixed bug in perf_events where event masks were not getting cleared properly. + * Fixed memory leak bug in perf_events. + +=============================================================================== +PAPI 5.4.0 RELEASE NOTES 13 Nov 2014 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP540.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +Full support for CUDA 6.5 has been delayed and will be included in the next +release. + +New Platforms: +------------- + * EMON power component for IBM Blue Gene/Q + * Support for the Applied Micro X-Gene processor + * Support for IBM POWER non-virtualized platform + * RAPL support for Intel Haswell models (60,69,71) + +Enhancements: +------------ + * Added list of supported PMU names (core/uncore components) + * Support for extended event masks (core/uncore components) + * Extension of the RAPL energy measurements on Intel via msr-safe + * Updated IBM POWER7, POWER8 presets + * 'papi_native_avail --validate' supports events that require + multiple masks to be valid + +Bug Fixes: +---------- + * HW counter and event count added/fixed for BGPM components + * Reduce cost of using PAPI_name_to_code + * Non-null terminated strings fixed + * Growing list of native events in core/uncore components fixed + * Cleaned up Intel IvyBridge presets + * Addressed Coverity reported issues + +=============================================================================== +PAPI 5.3.2 RELEASE NOTES 30 Jun 2014 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP532.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +An internal 5.3.1 is skipped, changes since 5.3 are detailed below. + +New Platforms: +------------- + * Intel Silvermont + * ARM Qualcomm Krait + +Enhancements: +------------ + * Rapl component support for Intel Haswell-EP + * Add units to NVML component + * Refine the definition of a Flop on the *-Bridge Intel chips. + * Updated Intel Haswell presets + +Bug Fixes: +---------- + * FreeBSD build and component fixes + * Uncore enumeration + * Printf format specifiers standardized (use # for hex) + +=============================================================================== +PAPI 5.3.0 RELEASE NOTES 18 Nov 2013 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP530.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +New Platforms: +------------- + * Intel Xeon Phi ( for offload code ) + +Enhancements: +------------ + * RAPL component better deals with counter wrap + * Floating support added for Intel IvyBridge + * PAPI_L1_ICM event added for Intel Haswell + * AMD Fam15h gets Core select umasks + * CUDA component now sets the number of native events supported + * Installed tests' code can now be built. + * host-micpower utility + +Bug Fixes: +---------- + * command_line utility event skipping bug + * remove extranious -openmp flag from icc builds + * Default to building all ctests, clean up much bit rot + +=============================================================================== +PAPI 5.2.0 RELEASE NOTES 06 Aug 2013 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP520.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +This release represents a major overhaul of several components. Support for +Intel Haswell and Power 8 has been added. Processor support code has been +moved to the components directory. + +New Platform: +------------- + * Intel Haswell (initial support) + * Power 8 (initial support) + +New Components: +--------------- + * Host-side MIC power component + +Enhancements: +------------ + * Component tests are now included with install-tests make target. + * Components with external library dependencies load them at runtime + allowing better distribution (infiniband, cuda, vmware, nvml and + host-side micpower) + * Perf_events, perfctr[_ppc] and perfmon2[_ia64] have been moved under the + components directory + * (Intel) Uncore support has been split into its own component + * Lustre component better handles large numbers of filesystems + +=============================================================================== +PAPI 5.1.1 RELEASE NOTES 21 May 2013 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP511.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +This is a bug fix release. + +New Platform: +------------- + * Intel IvyBridge-EP + +Bug Fixes: +---------- + * Many perf_event fixes + * Cuda component fixes + * IA64 and SPARC build fixes + +Enhancements: +------------ + * Better logic in run_tests.sh script + * ARM builds now use pthread_mutexes + * BG/Q overflow enhancements + +=============================================================================== +PAPI 5.1.0 RELEASE NOTES 11 Jan 2013 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP510.txt for keywords of interest or go directly to the PAPI git +repo. + +New Platform: +------------- + * Intel Xeon Phi ( Knight's Corner or KNC or MIC ) + +Bug Fixes: +---------- + * Various build system fixes. + * NVML component fix. + * Work around a sampling bug on Power Linux + +Enhancements: +------------ + * ARM Cortex A15 support. + * New API entry, PAPI_get_eventset_component + * Add options to papi_command_line to print in hex and unsigned formats + +New Components: +--------------- + * MIC Power component. + +=============================================================================== +PAPI 5.0.1 RELEASE NOTES 20 Sep 2012 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP501.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +This in a bug fix release of PAPI. Including a major bug fix in the preset +code, we recommend that all users of PAPI 5.0 upgrade; see commit 866bd51c +for a detailed discussion. + +Bug Fixes: +---------- + * Debugging macros with out variadic macro support. + * Building PAPI with an external libpfm4 installation. + * Fix a major bug in the preset code. + +Enhancements: +------------- + * CUDA configure script better supports Kepler architecture. + * rapl support for IvyBridge. + * Libpfm4 updates for SandyBridge-EP counters. + +=============================================================================== +PAPI 5.0.0 RELEASE NOTES 23 Aug 2012 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP500.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +This is a major release of PAPI. Parts of both the internal component and +external low-level interfaces have changed, this will break your 4.4 compliant +components. Numerous bug fixes are also included in this release. + +New Platforms: +------------- + * Intel IvyBridge + * Intel Atom Cedarview + +New / Improved Components: +--------------- + * nVidia Management library component - support for various system health + and power measurements on supported nVidia gpus. + * stealtime - When running in a VM, this provides information on how much + time was "stolen" by the hypervisor due to the VM being disabled. + This is currently only supported on KVM. + * RAPL - a SandyBridge RAPL (Running Average Power Level) Component + providing for energy measurement at the package-level. + * VMware component for VMware pseudo-counters + * appio - This application I/O component enables PAPI-C to determine + I/O used by the application. + +Bug Fixes: +---------- + * Numerous memory leaks, thread races, and compiler warnings corrected. + +Enhancements: +------------- + * Major overhaul of the component interface. + * Update perf_event.c rdpmc support + * Minor uncore fixes plus changes for rdpmc. + * Add a PAPI_REF_CYC preset event, defined as UNHALTED_REFERENCE_CYCLES for + all Intel platforms on which this native event is supported. + * Component names are now standardized in a meaningful way. + * Multiplexing under perf_events has been improved. + * FreeBSD cleanup/updates + * appio component now intercepts recv() + * Power7 definition of L1_DCA and LST_INS updated to a countable definition + * Added BGPM's opcode and generic event functionality to PAPI for BG/Q + (requires Q32 driver V1R1M2). + +Open Issues: +------------- + * SandyBridge PAPI_FP_* events only produce reasonable results when counted + by themselves. + * Ivy Bridge does not support floating point events. + +Experimental: +------------- + +Known Bugs: +----------- + * Software multiplexing is known to have a memory leak. + * The byte-profile test is known to fail on Power7/AIX + +Deprecated: +--------------------- + * Java PAPI wrappers + * Windows + + +=============================================================================== +PAPI 4.4.0 RELEASE NOTES 17 Apr 2012 +=============================================================================== +For specific and detailed information on changes made in this release, grep +ChangeLogP440.txt for keywords of interest or go directly to the PAPI git +repo. + +GENERAL NOTES +=============================================================================== +This is a major release of PAPI-C. Support for IBM Blue Gene/Q has been added. +Multiple bug fixes are also included in this release. +This is also the first release of papi made from the git repository; +git clone http://icl.cs.utk.edu/git/papi.git + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +RECENT CHANGES IN PAPI 4.4.0 +=============================================================================== +New Platforms: +------------- + * src/Rules.bgpm... Added PAPI support for Blue Gene/Q. + +Bug Fixes: +---------- + * Fix buffer overrun in lmsensors component + * libpfm4: Update to current git libpfm4 snapshot + * Fix broken Pentium 4 Prescott support we were missing the netbusrt_p + declaration in papi_events.csv + * Fix various locking issues in the threaded code. + * Fix multiplexing of large eventsets on perf_events systems. + This presented when using more than 31 multiplexed events on perf_event + +Enhancements: +------------- + * Update the release machinery for git. + +=============================================================================== +PAPI 4.2.1 RELEASE NOTES 13 Feb 2012 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP421.txt file for keywords of interest or go directly to the PAPI +cvs tree. + +GENERAL NOTES +=============================================================================== +This is a minor release of PAPI-C. It does not break binary or semantic +compatibility with previous versions. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +RECENT CHANGES IN PAPI 4.2.1 +=============================================================================== +Bug Fixes: +---------- + * solaris substrate set_domain call was added. + * multiplexing math errors were fixed in perf_events.c + * more multiplexing read path errors were identified and fixed + * src/linux-timer.c: Fix compilation warning if you specify + --with-walltime=gettimeofday + * src/linux-timer.c: Fix the build on Linux systems using mmtimer + * src/linux-common.c: Update the linux MHz detection code to use + bogoMIPS when there is no MHz field available in /proc/cpuinfo. + * src/: configure, configure.in: Fix a typo in the perfctr section; + it was causing a machine to default to perfctr when it had no + performance interface. ( a centos vm image with a 2.6.18 kernel) + Also checks that we actually have perfctr if we specify + --with-perfctr. + * Fix SMP ARM issues reported by Harald Servat. + Also, adds proper header dependency checking in the Rules files. + * src/ctests/api.c: Make the api test actually test PAPI_flops() as + it claims to do, rather than PAPI_flips(). + * src/papi_events.csv: Update the coreduo (not core2) events. Most + notably the FP events were wrong. + * src/papi_events.csv: Modify Intel Sandybridge PAPI_FP_OPS and + PAPI_FP_INS events to not count x87 fp instructions. + The problem is that the current predefines were made by adding 5 + events. With the NMI watchdog stealing an event and/or + hyperthreading reducing the number of available counters by half, + we just couldn't fit. + This now raises the potential for people using x87-compiled + floating point on Sandybridge and getting 0 FP_OPS. This is only + likely if running a 32-bit kernel and *not* compiling your code + with -msse. + A long-term solution might be trying to find a better set of FP + predefines for sandybridge. + * src/components/lmsensors/: Rules.lmsensors, configure.in: Fixed + configure error message and rules link error for shared object + linking. Thanks Will Cohen. + * src/components/lmsensors/linux-lmsensors.h: Added missing string + header + * src/components/net/tests/: net_values_by_code.c, + net_values_by_name.c: Apply patch suggested by Will Cohen to + check for system return values. + * src/Makefile.inc: Patch to cleanup dependencies, allowing for + parallel makes. Patch due to Will Cohen from redhat + * src/: papi_internal.c, threads.c: Fix two race + conditions that are probably the cause of the pthrtough + double-free error. + When freeing a thread, we remove and free all eventsets belonging + to that thread. This could race with the thread itself removing + the evenset, causing some ESI fields to be freed twice. + The problem was found by using the Valgrind 3.8 Helgrind tool + valgrind --tool=helgrind --free-is-write=yes ctests/pthrtough + In order for Helgrind to work, I had to temporarily modify PAPI + to use POSIX pthread mutexes for locking. + +Enhancements: +------------- + * general doxygen cleanups + * cleanup output of overflow_allcounters for clarity in debugging + * updates to most recent (as of Feb 1) libpfm4 + * remove now-opaque event codes from papi_native_avail + and papi_xml_event_info + * src/: papi_internal.c Update the component initialization code + so that it can handle a PAPI ERROR return gracefully. Previously + there was no way to indicate initialization failure besides just + setting num_native_events to 0. + +New Platforms: +------------- + * src/libpfm4/lib/: pfmlib_amd64_fam11h.c, events/amd64_events_fam11h.h + Support for AMD Family 11. + * src/libpfm4/lib/: pfmlib_amd64_fam12h.c, events/amd64_events_fam12h.h + Support for AMD Family 12. + +Deprecated Platforms: +--------------------- + * remove obsolete ACPI component + +New / Improved Components: +--------------- + * PAPI CUDA component updated for CUDA / CUPTI 4.1. + * SetCudaDevice() now works with the latest CUDA 4.1 version. + * Auto-detection of CUDA version for backward compatibility. + * PAPI_read() now accumulates event values. This fixes a bug + in earlier versions. + * extensive updates and cleanups to the example and coretemp components. + * significant updates of lustre, and mx components + * The linux net component underwent extensive updates and cleanups. + In particular, it nows dynamically detects the network + interface names [1] and export 16 counters for each interface + (see also src/components/net/{CHANGES,README}). + +Open Issues: +------------- + * multiplex1.c was rewritten to expose a multiplexing bug in the perf_events + kernel (3.0.3) for MIPS + * src/components/lmsensors/: Latest versions of lmsensors are incompatible + with current lmsensors component. Interface needs to be updated for forward + compatibility. + * There's a problem with broken overflow on POWER6 linux systems. + We suspect a kernel problem, but don't know exactly which version(s) + We're running a 2.6.36 kernel where the problem has been identified. + It may be fixed in newer versions. + +Experimental: +------------- + * a new vmware component has been added to report a variety of soft events + when running as a guest in a VMWare environment + + +=============================================================================== +PAPI 4.2.0 RELEASE NOTES 26 Oct 2011 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP420.txt file for keywords of interest or go directly to the PAPI +cvs tree. + +GENERAL NOTES +=============================================================================== +This is a major release of PAPI-C. It add a significant new feature in +user-defined events. It also marks a shift from external (and out-dated) +man pages to doxygen generated man pages. These pages can be found online at: +http://icl.cs.utk.edu/papi/docs/. They are also installable with "make install", +and you can build your own versions using doxygen. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +RECENT CHANGES IN PAPI 4.2.0 +=============================================================================== +Bug Fixes: +---------- + * Bug in CUDA v4.0 fixed. It caused a threaded application to hang when + parent called cuInit() before fork() and child called also cuInit(). + All fork ctests pass now if papi is configured with cuda component. + * If papi is configured with cuda component and running a threaded + application, we need to make sure that a thread doesn't free the same + memory location(s) more than once. Now all pthread ctests pass with cuda. + * ctests/thrspecific works now with the CUDA component + * Added CudaRemoveEvent functionality (broken in earlier CUDA RC versions). + * ctests/all_native_events works now for the default CUDA device. + * Add locking to papi_pfm4_events so that adding/looking up event names + doesn't have a race condition when multiple threads are doing it at once. + * Fixed a series of problems with Itanium builds. + * Set FD_CLOEXEC on the overflow signal handler fd. Otherwise if we exec() + with overflow enabled, the exec'd process will quickly die due to lack + of signal handler. This patch is needed due to a change in behavior in + Linux 3.0. Mark Krentel first noticed this problem. + * Recent Ubuntu versions use the ld flag --as-needed by default, which + breaks the PAPI configure step for the libdl check, as the + --as-needed flag enforces the rule that libraries must come after the + object files on the command line, not before. The fix for this is to put + the libdl check it in LIBS instead of in LDFLAGS. + * Removed an fopen() without an fclose() on /proc/cpuinfo in papi.c. + This was being done to set the event masks properly for itanium and p4. + Since the platform code sets CPU vendor and family for us we don't + really have to open cpuinfo. This fix may also work on non-Linux + systems too. + * Update papi.h to properly detect if being built with a C99 compiler. + +Enhancements: +------------- + * Default support for libpfm4 + * ./configure --with-libpfm3 to support legacy libpfm3 builds + * PERF_COUNT_SW software events are available under perf_events with + libpfm4 + * Nehalem/Westmere/SandyBridge Offcore event support is ready, + but support is not yet available in the Linux kernel. + * Add new utility to display PAPI error codes and description strings. + * Add API to access error descriptions: PAPI_descr_error( int error_code). + * Add support for handling multiattach properly. + * Cleanups to avoid gcc-4.6 warnings. + * Added ability to add tests to components. All component tests are + compiled with PAPI when typing 'make'and cleaned up with 'make clean' + or 'make clobber'. Also added tests to the example and cuda components. + * CUDA component is now thread-safe. Multiple CPU + threads can access the same CUDA context. Note, it's possible to + create a different CUDA context for each thread, but then we are + likely running into a limitation that only one context can be + profiled at a time. + * LOTS of code cleanup thanks to Will Cohen of RedHat. + * Refactored test code so no-cpu-counters can build with components + * Build all utilities with no-cpu-counters + * Modify run_tests.sh so that you can set the VALGRIND command + externally via environment variable without having to edit + run_tests.sh itself. Also adds Date and cpuinfo information to the + beginning of run_tests.sh results. This can help when run_tests.sh + output is passed around when debugging a problem. + * Parallel make now works. + +New Platforms: +------------- + * AMD Family 14h Bobcat (libpfm4 only) + * Intel SandyBridge (libpfm4 only) + * ARM Cortex-A8 and Cortex-A9 (libpfm4 only) + +Deprecated Platforms: +--------------------- + * although still technically supported, we are no longer actively testing + platforms based on the perfmon and perfctr patches. All linux kernels + > 2.6.32 provide internal support for perf_events. + +New / Improved Components: +--------------- + * Add a number of 'native' events to the component info structure in + example component. + * Introduce a papi_component_avail utility; lists the components we were + built with, optionally with native/preset counts and version number. + +Open Issues: +------------- + * On newer Linux kernels (2.6.34+) the nmi_watchdog counter can steal one + of the counters, reducing by one the total available. + There's a bug in Linux where if you try to use the full number of + counters on such a system with a group leader, the sys_perf_open() + call will succeed only to fail at read time. + (instead of the proper error code at open time). + I do wish there were a way to notify the user more visibly, + because losing a counter (when you might only have 4 total to + begin with) is a big deal, and most Linux vendors are starting to + ship kernels with the nmi_watchdog enabled. + +Experimental: +------------- + * Preliminary support for MIPS 74K. + +=============================================================================== +PAPI 4.1.4 RELEASE NOTES 29 Aug 2011 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP414.txt file for keywords of interest or go directly to the PAPI +cvs tree. + +GENERAL NOTES +=============================================================================== +This is an internal release of PAPI-C targetted specifically for a Cray tools +release. It precedes a more general 4.2.0 release and incorporates changes and +updates since PAPI 4.1.3. + +Detailed changes will be documented in the 4.2.0 release. Meanwhile the list +below highlights the most significant changes since 4.1.3. + + * Intel SandyBridge is now supported + * libpfm4 support has been updated + * internal doxygen documentation has been added for the entire API + * the man pages have been replaced with doxygen generated man pages + * CUDA component support has been improved + * an infrastructure for testing components only has been implemented + * various bugs have been addressed + +If you find issues with the 4.1.4 release, please bring them to our attention +ASAP, so they can be addressed prior to the general 4.2.0 release. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +=============================================================================== +PAPI 4.1.3 RELEASE NOTES 06 May 2011 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP413.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This is a minor release of PAPI-C. It addresses a number of bugs and other +issues that have surfaced since the 4.1.2 release. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 4.1.3 SINCE PAPI 4.1.2 +=============================================================================== +Bug Fixes: +---------- + * Fixed a linux-timer.c compile error that only shows up on PPC. + * Fixed ctests/all_native_events.c: + It failed when PAPI was built with several components because the + eventset failed to add events that were not from the first component. + * Redefined PAPI_FP_OPS for Nehalem; Now counts properly for 32-bit code. + * Uncovered and resolved bugs in attaching to fork/exec'd code. + * Reworked eventset cleanup code to avoid an error situation in perf_events + where events were being removed from a terminated attached process. + * Fixed a configure bug preventing non-default bitmode builds of perf_event + versions of PAPI. + + +Enhancements: +------------- + * consolidated a bunch of duplicated linux code into "linux-xxx.c" files. + * Split WIN32 specific code out from linux common code. + * Renamed various perfctr functions to be _perfctr_ rather than _linux_. + * Added function pointer destroy_eventset to the PAPI vector table. + Needed for the CUDA Component. + * PAPI_assign_eventset_component now refuses to reassign components. + * Implemented inherit feature for perf_events. Thanks to Gary Mohr. + * Added a case to utils/cost.c to test for processing derived events. + * Added utils/multiplex_cost.c. + * Added --with-assumed-kernel to configure + +New Platforms: +------------- + * POWER7 / AIX support is now available (see Known Bugs below) + * Intel Westmere for perfctr. + * AMD Family 15h (Interlagos) and 10h RevE processors. + +Deprecated Platforms: +--------------------- + +New Components: +--------------- + * NVidia CUDA: still in pre-release until NVidia releases official CUDA4. + +Open Issues: +------------- + * Currently using PAPI_attach() to attach to multiple processes at the same + time will not work. On the perf_events substrate this may fail with a + PAPI_EISRUN error for the subsequent attaches. On other substrates the + additional attaches may work but results read back will be invalid. + This behavior will be fixed in a subsequent PAPI release. + +Experimental: +------------- + * libpfm4 support is experimentally available but subject to change + +Known Bugs: +----------- + * POWER7 / AIX has some known bugs in this version: + * PAPI_FP_OPS overcounts by 50% in many cases + * multiplexing does not work correctly + * memory limits for threaded tests are causing problems + +=============================================================================== +PAPI 4.1.2 RELEASE NOTES 20 Jan 2011 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP412.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This is a minor release of PAPI-C. It addresses a number of bugs and other +issues that have surfaced since the 4.1.1 release. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 4.1.2 SINCE PAPI 4.1.1 +=============================================================================== +Bug Fixes: +---------- + * fixed a long-standing subtle bug identified by Richard Strong that caused + segfaults when multiplexing + * fixed several bugs that were causing test failures on POWER6/AIX + * properly detect Pentium M in configure + * fixed a problem with perf_events not properly handling overflows; first + identified by Mark Krentel + * fixed a problem where perfctr was silently adding uncountable events + * fixed a lock bug identified by Martin Schindewolf + * fixed forking order for {multi|zero}_attach.c + +Enhancements: +------------- + * updated support for freeBSD submitted by Harald Servat + * a plethora of code cleanups submitted by Robert Richter + * addressed compatibility issues in run_tests.sh to make it posix comliant + * refreshed PAPI_Matlab support + * reimplemented SUBDBG print capabilities to address an issue first + identified by Maynard Johnson + * refreshed preset event definitions for Nehalem, including implementations + for PAPI_HW_INT; submitted by Michel Brown + * added 3 new error codes: PAPI_EATTR, PAPI_ECOUNT, and PAPI_ECOMBO. + These provide more detail on why an event add fails + * implement cpuid leaf4 mtrics required by Intel Westmere + +New Platforms: +------------- + * Intel Westmere on perfctr and perf_events + +Deprecated Platforms: +--------------------- + +New Components: +--------------- + +Open Issues: +------------- + * PowerPC970 / linux is currently not supported by configure + * POWER7 / AIX support is in development + +Experimental: +------------- + * libpfm4 support is experimentally available and subject to change + +Known Bugs: +----------- + + +=============================================================================== +PAPI 4.1.1 RELEASE NOTES 01 Oct 2010 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP411.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This is a minor release of PAPI-C. It addresses a number of bugs and other +issues that have surfaced since the 4.1.0 release. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 4.1.1 SINCE PAPI 4.1.0 +=============================================================================== +Bug Fixes: +---------- + * resolved confusion in event table naming for Intel Core, Core2 and Core + Duo processors; cleaned up Nehalem and Westmere event definitions. + * the --with-no-cpu-counters function and timing functions for AIX + were fixed. + * compiler flags for AIX Fortran were fixed. + * doc directory is now preserved to prevent 'make clean' from entering an + ininite loop. + * prevent passing -Wextra to libpfm build, which was throwing errors in + that build under certain circumstances. + * fix a subtle problem in multiplexing in which final counter values could + be under-reported. Changes the behavior of PAPI_stop when multiplexing. + See the ChangeLog for further details. + +Enhancements: +------------- + * now supports attach/detach for perf_events, thanks to Gary Mohr. + * update cache information for recent Intel x86 processors. + * F_SETOWN_EX was implemented in perf_events to guarantee that each + process recieves it's own interrupts. This fixes a bug in high + interrupt rates reported by Rice. + * perf_events checks permissions at configuration rather than at start. + Thanks to Gary Mohr. + * Pentium IV now supported under perf_events in kernel 2.6.35 + * add a WARNING for tests cases that don't fail but have issues + that may need to be addressed. + * add OS kernel version to component info struct; useful for enabling / + diabling features in PAPI based on kernel version + * updated to terminal release (3.10) of libpfm. + * mmtimer support added for Altix / perf_events. + +New Platforms: +------------- + +Deprecated Platforms: +--------------------- + * support for perf_counters in the 2.6.31 Linux kernel has been deprecated + +New Components: +--------------- + * CoreTemp: exposes stuff in the /sys/class/hwmon directory + +Open Issues: +------------- + * support for cross-compiling perf-events on new Cray architectures is + still in development. + +Experimental: +------------- + +Known Bugs: +----------- + + +=============================================================================== +PAPI 4.1.0 RELEASE NOTES 22 Jun 2010 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP410.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This is the second release of Component PAPI, or PAPI-C. See other references +to PAPI-C, including the description in this file under PAPI 4.0.0 for details +on the differences between Classic PAPI and PAPI-C. + +This release includes significant code cleanup to eliminate compiler warnings +and type inconsistencies and to eliminate memory leaks. + +We also now support embedded doxygen comments for documentation. See the PAPI +website for more details. + +The component build environment has been restructured to make it easier to add +and build components without modifying baseline PAPI code. See +/src/components/README for details. + +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 4.1.0 SINCE PAPI 4.0.0 +=============================================================================== +Bug Fixes: +---------- + * configure was mis-identifying some Pentium 4 processors + * the ctests/shlib test now tests against the shared math library, libm.so, + instead of libpapi.so, which works more predictably with library renaming. + * multiplexing was silently returning without setting multiplex TRUE in + cases where no event had been assigned to an eventset. An event must be + added to an eventset or PAPI_assign_eventset_component() must be called + before multiplexing can be enabled. This silent error has been removed. + * the perfmon and perf_events counter interfaces were not properly handling + event unit masks. This has been fixed. + * PAPI_name_to_code() was not exiting properly in certain circumstances, + failing on events where there should have been a match. This is corrected. + * a serious but insidious bug in the overflow logic was corrected. This bug + would only show up when PAPI_overflow was called between calls to + PAPI_add_event. Overflow would only be set for the last call of + PAPI_overflow. This has been corrected. + * IBM Blue Gene P systems were corrupting stack frames and crashing when the + papi_get_event_info call was executed. This has been fixed. + * The PAPI cycles event was not working for IBM Blue Gene P. This is fixed. + * papi_native_avail was exiting improperly when using the -e option. + This caused problems with batch execution systems (like Blue Gene P). + This has been fixed. + * a significant number of memory leaks have been purged. + * compiler warning flags have been tightened and a range of warnings have + been eliminated. + * removed implicit type conversions in prototypes. + + +Enhancements: +------------- + * the utils/papi_version utility now reports four digits where the last digit + matches the patch number. + * Pentium II and Athlon now use libpfm for event decoding like all other x86 + platforms. + * Doxygen documentation has been added to the API and components. + * Component compilation has been completely restructured. See + /papi/src/components/README for details. + * PAPI can now be compiled with a no-cpu-counters option. + + +New Platforms: +------------- + * the ultrasparc architecture has been resurrected + * freebsd support was migrated from PAPI 3.7 + * Intel Nehalem EX and Westmere support has been added + +Deprecated Platforms: +--------------------- + * IBM BG/L has been deprecated. + * POWER 3 and POWER4 have been deprecated + +New Components: +--------------- + * Infiniband: Experimental + * Lustre: Experimental + * example: provides simple test case and template code. + +Open Issues: +------------- + +Experimental: +------------- + +Known Bugs: +----------- + +=============================================================================== +PAPI 4.0.0 RELEASE NOTES 19 Jan 2010 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP400.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This is the inaugural release of Component PAPI, or PAPI-C. It represents a +significant architectural change from PAPI 3.7.x and earlier. As such, your +application must be recompiled and relinked to libpapi, the PAPI library for +this version to work. +PAPI-C is backward compatible with earlier versions of PAPI. All new library +features are supported through new APIs and all old APIs still work as expected. +Applications instrumented for PAPI should continue to work as expected with no +changes. +The major change in PAPI-C is the support of multiple components, or counting +domains, in addition to the traditional hardware counters found in the cpu. The +goal of this first release of PAPI-C is to provide a stable technology platform +within which to explore the development and implementation of additional +components. +Although a small number of components are provided with this release, the major +objective has been to guarantee that PAPI-C works at least as well as earlier +PAPI releases and on the same range of hardware platforms. We think we have +achieved that goal. +Visit the PAPI Reference pages for more information at: +http://icl.cs.utk.edu/projects/papi/wiki/Main_Page +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 4.0.0 SINCE PAPI 3.7.2 +=============================================================================== +Bug Fixes: +---------- + +Enhancements: +------------- +- The perf_events linux kernel interface is supported for POWER and x86 in + linux kernels 2.6.31 and above. +- PAPI info now includes information on multicore heirarchy. This is reported + in the header of many tests. + +New Platforms: +------------- +- IBM Blue Gene P has been fully integrated into the code base. It still + suffers the same quirks and limitations as the earlier pre-release. + +Open Issues: +------------- +- Components are invoked from the configure line; Requires PAPI source code + modifications to add new components. + +Experimental: +------------- + +Known Bugs: +----------- +- some tests involving overflow and profiling fail with linux perf_events +- multiple event overflow only works for last event enabled on (at least) + Intel Core2 and Itanium architectures. +- clock speeds on variable speed Intel systems can be misreported, leading to + incorrect calculations of mflops +- memory leaks may lead to (rare) seg faults on Pentium4 systems + + +=============================================================================== +PAPI 3.7.2 RELEASE NOTES 02 Dec 2009 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP372.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This release is an incremental upgrade to PAPI 3.7.1. It fixes a mistake in +the 3.7.1 release by updating configure to better detect the proper counter +interface in linux kernels. Along the way, it also cleans up a few issues +found in the 3.7.1 release. + +As always, if you identify strange behavior or reproducible bugs, please +contact the PAPI team or visit the PAPI User Forum. + +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + + +CHANGES IN PAPI 3.7.2 SINCE PAPI 3.7.1 +=============================================================================== +Bug Fixes: +---------- +- fixed L3 cache size reporting for AMD Family 10h processors +- fixed std deviation underflow in sdsc2 and sdsc4 tests +- fixed bug in counter assignment for FreeBSD Atom implementation + +Enhancements: +------------- +- updated cache tables for Intel Nehalem i7 processors +- configure provides better autodetection of 2.6.31 or 2.6.32 kernels and + perf_counter interface (in most cases) +- configure provides better detection and autoselection of perfctr or perfmon + drivers for linux +- configure and sources have been modified to support perf_counter on + kernel 2.6.31 and perf_event on kernel 2.6.32 +- a papi.spec file has been added to simplify creation of rpms + +=============================================================================== +PAPI 3.7.1 RELEASE NOTES 13 Nov 2009 +=============================================================================== + +This file documents changes in recent PAPI releases in inverse chronological +order. + +For details on installing PAPI on your machine, consult the INSTALL.txt file +in this directory. + +For specific and detailed information on changes made in this release, grep +the ChangeLogP371.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This release is an incremental upgrade to PAPI 3.7.0. It cleans up several +issues found in the 3.7.0 release and provides better support for the +perf_counter interface introduced in Linux kernel 2.6.31. + +As always, if you identify strange behavior or reproducible bugs, please +contact the PAPI team or visit the PAPI User Forum. + +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +NOTE: If you are looking for the man pages and other user documentation, look +online. We decided we could provide better and more timely support by +maintaining just the online documentation. Let us know if you think this is a +bad decision. + + +CHANGES IN PAPI 3.7.1 SINCE PAPI 3.7.0 +=============================================================================== +Bug Fixes: +---------- +- fixed long standing subtle multiplexing bug in which TIDs and PIDs would get + confused. TIDs would then get lost leading to long term instability. +- fixed unit mask handling in perf_counters +- fixed uninitialized string issue in /proc/cpuinfo parsing +- fixed event reporting errors for various Opteron Family 10h models + +Enhancements: +------------- +- FreeBSD support for Intel i7 +- cleaned up libpapi.so naming for RedHat rpms +- cleaned up various other issues for rpms per RedHat +- autodetection of 2.6.31 perf_counter interface (in most cases) +- enhanced packaging options in configure to support building either static or + shared libraries independently + +New Platforms: +------------- +- Support for the perf_counters (PCL: Performance Counters for Linux) interface + for Linux kernel 2.6.31 and later has been more completely tested on a + broader range of platforms, including Opteron, Core2, i7, and POWER. + It successfully performs basic counting operations and handles many multiplex, + overflow and profiling situations. It is still not as extensivelytested as the + perfmon or perfctr interfaces, but is ready for work. Caveat Emptor. + +Major Issues: +------------- +- see 3.7.0 + +Experimental: +------------- + +Known Bugs: +----------- +- see 3.7.0 + +=============================================================================== +PAPI 3.7.0 RELEASE NOTES 08 Sep 2009 +=============================================================================== +For specific and detailed information on changes made in this release, grep +the ChangeLogP370.txt file for keywords of interest or go directly to the PAPI +cvs tree. + + +GENERAL NOTES +=============================================================================== +This release is a recommended upgrade to PAPI 3.6.x. It addresses a number of +open issues and introduces support for several new platforms, including Intel +Nehalem (Core i7), Atom, POWER7 and Niagara2. + +If you are currently using PAPI 3.6.x or earlier, it is recommended that you +upgrade to this version. + +As always, if you identify strange behavior or reproducible bugs, please +contact the PAPI team or the PAPI User Forum. + +And visit the PAPI website for the latest updates: +http://icl.cs.utk.edu/papi/ + +NOTE: If you are looking for the man pages and other user documentation, look +online. We decided we could provide better and more timely support by +maintaining just the online documentation. Let us know if you think this is a +bad decision. + + +CHANGES IN PAPI 3.7.0 SINCE PAPI 3.6.2 +=============================================================================== +Bug Fixes: +---------- +- many minor bugs fixed in tests and in specific cpu components +- fixed support for Intel CoreDuo (not Core2) broken in PAPI 3.6.x +- fixed library init failure on AIX Power6 when executable names > 32 char long +- fixed avail.F construct that was crashing some versions of gfortran + +Enhancements: +------------- +- A new utility has been added: papi_version +- Added 4 new PRESET events to better handle SIMD instructions on Intel cpus: + PAPI_DP_OPS - counts double precision scalar and vector FP operations + PAPI_SP_OPS - counts single precision scalar and vector FP operations + PAPI_VEC_DP - counts double precision vector instructions + PAPI_VEC_SP - counts single precision vector instructions +- FreeBSD support upgrade and new support for Atom and Intel Core2 + +New Platforms: +------------- +- Intel Core i7 (Nehalem) support for 7 core counters; no support for + Uncore counters +- Intel Atom +- AMD Opteron Barcelona, Shanghai, Istanbul event table support +- POWER7 support for Linux thanks to IBM +- Sun Niagara2 support thanks to Aachen University, Germany +- Resurrected support for PAPI on Windows; now supports Intel Core2 and Core i7 + +Major Issues: +------------- +- PAPI for Windows does not support 64-bit versions due to compiler issues. + +Experimental: +------------- +- Support for the perf_counters (PCL: Performance Counters for Linux) interface + is available as a technology pre-release for Linux kernel 2.6.31 and later. + This has been tested on IBM POWER and Intel Core2 and successfully performs + basic counting operations. It has not been stress tested. Caveat Emptor. + +Known Bugs: +----------- +- clock speeds are occasionally not reported correctly for systems with + SpeedStep technology. +- Intel Atom crashes on a small number of standard tests. + +=============================================================================== +PAPI 3.6.2 RELEASE NOTES 03 Oct 2008 +NOTE: For releases prior to PAPI 3.7.0, please reference the tarball for an earlier +release, or use the on-line cvs viewer at: +http://icl.cs.utk.edu/viewcvs/viewcvs.cgi/PAPI/papi/ +to see earlier versions of this file. +=============================================================================== diff --git a/doc/Doxyfile-common b/doc/Doxyfile-common new file mode 100644 index 0000000..9692ea5 --- /dev/null +++ b/doc/Doxyfile-common @@ -0,0 +1,1721 @@ +# Doxyfile 1.7.4 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = PAPI + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 5.6.0.0 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = ./ + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = NO + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = YES + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = NO + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = NO + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = NO + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= NO + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = NO + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = NO + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = NO + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = doxyerror + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = *.c *.h + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = */Matlab/* */CVS/* */libpfm-2.x/* */libpfm-3.x/* \ + */libpfm-3.y/* */libpfm4/* */perfctr-1.6.1/* */perfctr-2.3.12/* \ + */perfctr-2.4.1/* */perfctr-2.4.5/* */perfctr-2.4.x/* \ + */perfctr-2.6.x/* */perfctr-2.6.x.old/* */perfctr-2.7.x/* \ + */linux-bgp.c + + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = NO + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is adviced to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the +# mathjax.org site, so you can quickly see the result without installing +# MathJax, but it is strongly recommended to install a local copy of MathJax +# before deployment. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called Helvetica to the output +# directory and reference it in all dot files that doxygen generates. +# When you want a differently looking font you can specify the font name +# using DOT_FONTNAME. You need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = NO + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/doc/Doxyfile-html b/doc/Doxyfile-html new file mode 100644 index 0000000..dfdd6fb --- /dev/null +++ b/doc/Doxyfile-html @@ -0,0 +1,460 @@ +# Doxyfile 1.6.2 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") +@INCLUDE = Doxyfile-common + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = YES + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = DEBUG + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = PAPIERROR LEAKDBG MEMDBG MPXDBG OVFDBG PAPIDEBUG SUBDBG PRFDBG INTDBG THRDBG APIDBG + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = YES + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = YES + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ../src ../src/components/README + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = YES + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = YES + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be implemented using a PHP enabled web server instead of at the web client using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server based approach is that it scales better to large projects and allows full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = YES + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = NO + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/doc/Doxyfile-man1 b/doc/Doxyfile-man1 new file mode 100644 index 0000000..e05a338 --- /dev/null +++ b/doc/Doxyfile-man1 @@ -0,0 +1,64 @@ +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for PAPI utilities man-pages +# The following overrides default values in Doxyfile-common +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +@INCLUDE = Doxyfile-common +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ../src/utils/ + +FILE_PATTERNS = *.c + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .1 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO diff --git a/doc/Doxyfile-man3 b/doc/Doxyfile-man3 new file mode 100644 index 0000000..8ea0178 --- /dev/null +++ b/doc/Doxyfile-man3 @@ -0,0 +1,59 @@ +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for PAPI utilities man-pages +# The following overrides default values in Doxyfile-common +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +@INCLUDE = Doxyfile-common +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ../src/papi.h ../src/papi.c ../src/papi_hl.c \ + ../src/papi_fwrappers.c + +FILE_PATTERNS = *.c *.h + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..3bb7a44 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,27 @@ +.PHONY: clean clobber distclean install force_me all + +all: man + @echo "Built PAPI user documentation" + +html: force_me + doxygen Doxyfile-html + +man: man/man1 man/man3 + +man/man3: ../src/papi.h ../src/papi.c ../src/papi_hl.c ../src/papi_fwrappers.c + doxygen Doxyfile-man3 + +man/man1: ../src/utils/papi_avail.c ../src/utils/papi_clockres.c ../src/utils/papi_command_line.c ../src/utils/papi_component_avail.c ../src/utils/papi_cost.c ../src/utils/papi_decode.c ../src/utils/papi_error_codes.c ../src/utils/papi_event_chooser.c ../src/utils/papi_xml_event_info.c ../src/utils/papi_mem_info.c ../src/utils/papi_multiplex_cost.c ../src/utils/papi_native_avail.c ../src/utils/papi_version.c + doxygen Doxyfile-man1 + +clean: + rm -rf man html doxyerror + +distclean clobber: clean + +install: man + -rm -f man/man3/HighLevelInfo.3 + -rm -f man/man3/papi_data_structures.3 + -rm -r ../man/man1/*.1 ../man/man3/*.3 + -cp -R man/man1/*.1 ../man/man1 + -cp -R man/man3/*.3 ../man/man3 diff --git a/doc/doxygen_procedure.txt b/doc/doxygen_procedure.txt new file mode 100644 index 0000000..8caf15b --- /dev/null +++ b/doc/doxygen_procedure.txt @@ -0,0 +1,80 @@ +******************************************************************************** + + Check the version of doxygen you're using, there is a bug + with older versions ( < 1.7.4 ) + +******************************************************************************** + +USAGE +======================= +To invoke doxygen, + cd $(papi_dir)/doc + make (alternativly doxygen Doxyfile-{html,man1,man3} + +This command produces documentation for the PAPI user-exposed api and data-structures. + +There are several different configuration files are present: +Doxyfile-html - generates documentation for everything under src. This will +take a long time to run, and generates north of 600 megs of files. Requires +the program dot, for dependency graphs. +Doxyfile-man1 - generates man-pages for the utilities. +Doxyfile-man3 - generates man-pages for the API, see papi.h + + +Commenting the Code +======================= +To get doxygen's attention, in general, use a special comment block + +/** */ +thing_to_be_commented + +Doxygen responds to several special commands, denoted by @command +(if you're feeling texy, \command) + +As an artifact of how doxygen started life, we call our api functions 'classes' +to get doxygen to generate man-pages for the function. + +/** @class MY_FUNCTION + @brief gives a brief overview of what the function does, + limited to 1 line or 1 sentence if you need the space. + @param arg1 describes a parameter to the function + + @return describes the functions return value + + @retval allows you to enumerate return values + + Down here we have more detailed information about the function + Which can span many lines + + And paragraphs (feeling texy now?) + + @par Examples: + @code + This is the way to get examples to format nicely + code goes here.... + @endcode + + @bug + Here you get a section of freeform text to describe bugs you encounter. +*/ + +@internal keeps comment blocks marked as such out of the documentation +(unless the INTERNAL_DOCS flag is set in the config file) + + +In several places /**< */ appears, this means that the comment +pertains to the previous element. + +int foo; /**< This comment is about foo */ + + +TODO +======================= +Doxygen provides options for [ab]using the preprocessor, +Do we need to look into this? Probably not more than we already do -J + +Document the ctests? + +See +http://www.stack.nl/~dimitri/doxygen/docblocks.html +for more detail on doxygen. diff --git a/man/Makefile b/man/Makefile new file mode 100644 index 0000000..26b5483 --- /dev/null +++ b/man/Makefile @@ -0,0 +1,13 @@ +clean: + rm -f *~ core man3/*~ + +install: + @echo "Man pages (MANDIR) being installed in: \"$(MANDIR)\""; + -mkdir -p $(MANDIR)/man3 + -chmod go+rx $(MANDIR)/man3 + -cp man3/PAPI*.3 $(MANDIR)/man3 + -chmod go+r $(MANDIR)/man3/PAPI*.3 + -mkdir -p $(MANDIR)/man1 + -chmod go+rx $(MANDIR)/man1 + -cp man1/*.1 $(MANDIR)/man1 + -chmod go+r $(MANDIR)/man1/*.1 diff --git a/man/README b/man/README new file mode 100644 index 0000000..68f31ef --- /dev/null +++ b/man/README @@ -0,0 +1,23 @@ +/* +* File: README +* CVS: $Id$ +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +This directory contains: + +Makefile Installs man pages. +man1/ Man pages for the PAPI utility applications. +man3/ Man pages for the PAPI API functions. + +Makefile Usage: + + make + make install DESTDIR= + +Beginning with PAPI 4.2.0, man pages are generated from the PAPI sources +using doxygen scripts found in the papi/doc directory. +They are updated prior to each release. \ No newline at end of file diff --git a/man/man1/PAPI_derived_event_files.1 b/man/man1/PAPI_derived_event_files.1 new file mode 100644 index 0000000..b443938 --- /dev/null +++ b/man/man1/PAPI_derived_event_files.1 @@ -0,0 +1,219 @@ +.TH "PAPI_derived_event_files" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_derived_event_files \- Describes derived event definition file syntax\&. +.SH "Derived Events" +.PP +PAPI provides the ability to define events whose value will be derived from multiple native events\&. The list of native events to be used in a derived event and a formula which describes how to use them is provided in an event definition file\&. The PAPI team provides an event definition file which describes all of the supported PAPI preset events\&. PAPI also allows a user to provide an event definition file that describes a set of user defined events which can extend the events PAPI normally supports\&. +.PP +This page documents the syntax of the commands which can appear in an event definition file\&. +.PP + +.br + +.SS "General Rules:" +.PD 0 +.IP "\(bu" 2 +Blank lines are ignored\&. +.IP "\(bu" 2 +Lines that begin with '#' are comments (they are also ignored)\&. +.IP "\(bu" 2 +Names shown inside < > below represent values that must be provided by the user\&. +.IP "\(bu" 2 +If a user provided value contains white space, it must be protected with quotes\&. +.PP +.PP + +.br + +.SS "Commands:" +\fBCPU,\fP +.RS 4 +Specifies a PMU name which controls if the PRESET and EVENT commands that follow this line should be processed\&. Multiple CPU commands can be entered without PRESET or EVENT commands between them to provide a list of PMU names to which the derived events that follow will apply\&. When a PMU name provided in the list matches a PMU name known to the running system, the events which follow will be created\&. If none of the PMU names provided in the list match a PMU name on the running system, the events which follow will be ignored\&. When a new CPU command follows either a PRESET or EVENT command, the PMU list is rebuilt\&. +.br + +.br +.RE +.PP +\fBPRESET,,,,LDESC,"",SDESC,"",NOTE,""\fP +.RS 4 +Declare a PAPI preset derived event\&. +.br + +.br +.RE +.PP +\fBEVENT,,,,LDESC,"",SDESC,"",NOTE,""\fP +.RS 4 +Declare a user defined derived event\&. +.br + +.br +.RE +.PP +\fBWhere:\fP +.RS 4 + +.RE +.PP +\fBpmuName:\fP +.RS 4 +The PMU which the following events should apply to\&. A list of PMU names supported by your system can be obtained by running papi_component_avail on your system\&. +.br + +.RE +.PP +\fBeventName:\fP +.RS 4 +Specifies the name used to identify this derived event\&. This name should be unique within the events on your system\&. +.br + +.RE +.PP +\fBderivedType:\fP +.RS 4 +Specifies the kind of derived event being defined (see 'Derived Types' below)\&. +.br + +.RE +.PP +\fBeventAttr:\fP +.RS 4 +Specifies a formula and a list of base events that are used to compute the derived events value\&. The syntax of this field depends on the 'derivedType' specified above (see 'Derived Types' below)\&. +.br + +.RE +.PP +\fBlongDesc:\fP +.RS 4 +Provides the long description of the event\&. +.br + +.RE +.PP +\fBshortDesc:\fP +.RS 4 +Provides the short description of the event\&. +.br + +.RE +.PP +\fBnote:\fP +.RS 4 +Provides an event note\&. +.br + +.RE +.PP +\fBbaseEvent (used below):\fP +.RS 4 +Identifies an event on which this derived event is based\&. This may be a native event (possibly with event masks), an already known preset event, or an already known user event\&. +.br +.RE +.PP + +.br + +.SS "Notes:" +The PRESET command has traditionally been used in the PAPI provided preset definition file\&. The EVENT command is intended to be used in user defined event definition files\&. The code treats them the same so they are interchangeable and they can both be used in either event definition file\&. +.br +.PP + +.br + +.SS "Derived Types:" +This describes values allowed in the 'derivedType' field of the PRESET and EVENT commands\&. It also shows the syntax of the 'eventAttr' field for each derived type supported by these commands\&. All of the derived events provide a list of one or more events which the derived event is based on (baseEvent)\&. Some derived events provide a formula that specifies how to compute the derived events value using the baseEvents in the list\&. The following derived types are supported, the syntax of the 'eventAttr' parameter for each derived event type is shown in parentheses\&. +.br + +.br +.PP +\fBNOT_DERIVED ():\fP +.RS 4 +This derived type defines an alias for the existing event 'baseEvent'\&. +.br + +.RE +.PP +\fBDERIVED_ADD (,):\fP +.RS 4 +This derived type defines a new event that will be the sum of two other events\&. It has a value of 'baseEvent1' plus 'baseEvent2'\&. +.br + +.RE +.PP +\fBDERIVED_PS (PAPI_TOT_CYC,):\fP +.RS 4 +This derived type defines a new event that will report the number of 'baseEvent1' events which occurred per second\&. It has a value of ((('baseEvent1' * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC)\&. The user must provide PAPI_TOT_CYC as the first event of two events in the event list for this to work correctly\&. +.br + +.RE +.PP +\fBDERIVED_ADD_PS (PAPI_TOT_CYC,,):\fP +.RS 4 +This derived type defines a new event that will add together two event counters and then report the number which occurred per second\&. It has a value of (((('baseEvent1' + baseEvent2) * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC)\&. The user must provide PAPI_TOT_CYC as the first event of three events in the event list for this to work correctly\&. +.br + +.RE +.PP +\fBDERIVED_CMPD (,,):\fP +.RS 4 +This derived type defines a new event that will be the difference between two other events\&. It has a value of 'baseEvent1' minus 'baseEvent2'\&. +.br + +.RE +.PP +\fBDERIVED_POSTFIX (,,, \&.\&.\&. ,):\fP +.RS 4 +This derived type defines a new event whose value is computed from several native events using a postfix (reverse polish notation) formula\&. Its value is the result of processing the postfix formula\&. The 'pfFormula' is of the form 'N0|N1|N2|5|*|+|-|' where the '|' acts as a token separator and the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively\&. +.br + +.RE +.PP +\fBDERIVED_INFIX (,,, \&.\&.\&. ,):\fP +.RS 4 +This derived type defines a new event whose value is computed from several native events using an infix (algebraic notation) formula\&. Its value is the result of processing the infix formula\&. The 'ifFormula' is of the form 'N0-(N1+(N2*5))' where the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively\&. +.br +.RE +.PP + +.br + +.SS "Example:" +In the following example, the events PAPI_SP_OPS, USER_SP_OPS, and ALIAS_SP_OPS will all measure the same events and return the same value\&. They just demonstrate different ways to use the PRESET and EVENT event definition commands\&. +.br + +.br +.PP +.PD 0 +.IP "\(bu" 2 +# The following lines define pmu names that all share the following events +.IP "\(bu" 2 +CPU nhm +.IP "\(bu" 2 +CPU nhm-ex +.IP "\(bu" 2 +# Events which should be defined for either of the above pmu types +.IP "\(bu" 2 +PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES +.IP "\(bu" 2 +PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES +.IP "\(bu" 2 +PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|3|*|+|,FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,'Using a postfix formula' +.IP "\(bu" 2 +EVENT,USER_SP_OPS,DERIVED_INFIX,N0+(N1*3),FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,'Using the same formula in infix format' +.IP "\(bu" 2 +EVENT,ALIAS_SP_OPS,NOT_DERIVED,PAPI_SP_OPS,LDESC,'Alias for preset event PAPI_SP_OPS' +.IP "\(bu" 2 +# End of event definitions for above pmu names and start of a section for a new pmu name\&. +.IP "\(bu" 2 +CPU snb +.PP + diff --git a/man/man1/papi_avail.1 b/man/man1/papi_avail.1 new file mode 100644 index 0000000..e56a6ef --- /dev/null +++ b/man/man1/papi_avail.1 @@ -0,0 +1,73 @@ +.TH "papi_avail" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_avail \- papi_avail utility\&. +.PP +file papi_avail\&.c +.SH "Name" +.PP +papi_avail - provides availability and detailed information for PAPI preset and user defined events\&. +.SH "Synopsis" +.PP +papi_avail [-adht] [-e event] +.SH "Description" +.PP +papi_avail is a PAPI utility program that reports information about the current PAPI installation and supported preset and user defined events\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-h Display help information about this utility\&. +.IP "\(bu" 2 +-a Display only the available PAPI events\&. +.IP "\(bu" 2 +-c Display only the available PAPI events after a check\&. +.IP "\(bu" 2 +-d Display PAPI event information in a more detailed format\&. +.IP "\(bu" 2 +-e < event > Display detailed event information for the named event\&. This event can be a preset event, a user defined event, or a native event\&. If the event is a preset or a user defined event the output shows a list of native events the event is based on and the formula that is used to compute the events final value\&. +.br + +.PP +.PP +Event filtering options +.PD 0 + +.IP "\(bu" 2 +--br Display branch related PAPI preset events +.IP "\(bu" 2 +--cache Display cache related PAPI preset events +.IP "\(bu" 2 +--cnd Display conditional PAPI preset events +.IP "\(bu" 2 +--fp Display Floating Point related PAPI preset events +.IP "\(bu" 2 +--ins Display instruction related PAPI preset events +.IP "\(bu" 2 +--idl Display Stalled or Idle PAPI preset events +.IP "\(bu" 2 +--l1 Display level 1 cache related PAPI preset events +.IP "\(bu" 2 +--l2 Display level 2 cache related PAPI preset events +.IP "\(bu" 2 +--l3 Display level 3 cache related PAPI preset events +.IP "\(bu" 2 +--mem Display memory related PAPI preset events +.IP "\(bu" 2 +--msc Display miscellaneous PAPI preset events +.IP "\(bu" 2 +--tlb Display Translation Lookaside Buffer PAPI preset events +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. +.br + +.PP +\fBSee Also:\fP +.RS 4 +PAPI_derived_event_files +.RE +.PP + diff --git a/man/man1/papi_clockres.1 b/man/man1/papi_clockres.1 new file mode 100644 index 0000000..f70d43a --- /dev/null +++ b/man/man1/papi_clockres.1 @@ -0,0 +1,21 @@ +.TH "papi_clockres" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_clockres \- The papi_clockres utility\&. +.PP +file clockres\&.c +.SH "Name" +.PP +papi_clockres - measures and reports clock latency and resolution for PAPI timers\&. +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_clockres is a PAPI utility program that measures and reports the latency and resolution of the four PAPI timer functions: PAPI_get_real_cyc(), PAPI_get_virt_cyc(), PAPI_get_real_usec() and PAPI_get_virt_usec()\&. +.SH "Options" +.PP +This utility has no command line options\&. +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_command_line.1 b/man/man1/papi_command_line.1 new file mode 100644 index 0000000..58ee416 --- /dev/null +++ b/man/man1/papi_command_line.1 @@ -0,0 +1,24 @@ +.TH "papi_command_line" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_command_line \- executes PAPI preset or native events from the command line\&. +.SH "Synopsis" +.PP +papi_command_line < event > < event > \&.\&.\&. +.SH "Description" +.PP +papi_command_line is a PAPI utility program that adds named events from the command line to a PAPI EventSet and does some work with that EventSet\&. This serves as a handy way to see if events can be counted together, and if they give reasonable results for known work\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-u Display output values as unsigned integers +.IP "\(bu" 2 +-x Display output values as hexadecimal +.IP "\(bu" 2 +-h Display help information about this utility\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_component_avail.1 b/man/man1/papi_component_avail.1 new file mode 100644 index 0000000..6204083 --- /dev/null +++ b/man/man1/papi_component_avail.1 @@ -0,0 +1,26 @@ +.TH "papi_component_avail" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_component_avail \- papi_component_avail utility\&. +.PP +file papi_component_avail\&.c +.SH "NAME" +.PP +papi_native_avail - provides detailed information for PAPI native events\&. +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_component_avail is a PAPI utility program that reports information about the components papi was built with\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-h help message +.IP "\(bu" 2 +-d provide detailed information about each component\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_cost.1 b/man/man1/papi_cost.1 new file mode 100644 index 0000000..1aad87e --- /dev/null +++ b/man/man1/papi_cost.1 @@ -0,0 +1,33 @@ +.TH "papi_cost" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_cost \- papi_cost utility\&. +.PP +file papi_cost\&.c +.SH "NAME" +.PP +papi_cost - computes execution time costs for basic PAPI operations\&. +.SH "Synopsis" +.PP +papi_cost [-dhs] [-b bins] [-t threshold] +.SH "Description" +.PP +papi_cost is a PAPI utility program that computes the min / max / mean / std\&. deviation of execution times for PAPI start/stop pairs and for PAPI reads\&. This information provides the basic operating cost to a user's program for collecting hardware counter data\&. Command line options control display capabilities\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-b < bins > Define the number of bins into which the results are partitioned for display\&. The default is 100\&. +.IP "\(bu" 2 +-d Display a graphical distribution of costs in a vertical histogram\&. +.IP "\(bu" 2 +-h Display help information about this utility\&. +.IP "\(bu" 2 +-s Show the number of iterations in each of the first 10 standard deviations above the mean\&. +.IP "\(bu" 2 +-t < threshold > Set the threshold for the number of iterations to measure costs\&. The default is 100,000\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_decode.1 b/man/man1/papi_decode.1 new file mode 100644 index 0000000..9694fff --- /dev/null +++ b/man/man1/papi_decode.1 @@ -0,0 +1,37 @@ +.TH "papi_decode" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_decode \- papi_decode utility\&. +.PP +file papi_decode\&.c +.SH "NAME" +.PP +papi_decode - provides availability and detail information for PAPI preset events\&. +.SH "Synopsis" +.PP +papi_decode [-ah] +.SH "Description" +.PP +papi_decode is a PAPI utility program that converts the PAPI presets for the existing library into a comma separated value format that can then be viewed or modified in spreadsheet applications or text editors, and can be supplied to PAPI_encode_events (3) as a way of adding or modifying event definitions for specialized applications\&. The format for the csv output consists of a line of field names, followed by a blank line, followed by one line of comma separated values for each event contained in the preset table\&. A portion of this output (for Pentium 4) is shown below: +.PP +.nf +* name,derived,postfix,short_descr,long_descr,note,[native,\&.\&.\&.] +* PAPI_L1_ICM,NOT_DERIVED,,"L1I cache misses","Level 1 instruction cache misses",,BPU_fetch_request_TCMISS +* PAPI_L2_TCM,NOT_DERIVED,,"L2 cache misses","Level 2 cache misses",,BSQ_cache_reference_RD_2ndL_MISS_WR_2ndL_MISS +* PAPI_TLB_DM,NOT_DERIVED,,"Data TLB misses","Data translation lookaside buffer misses",,page_walk_type_DTMISS +* + +.fi +.PP +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-a Convert only the available PAPI preset events\&. +.IP "\(bu" 2 +-h Display help information about this utility\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_error_codes.1 b/man/man1/papi_error_codes.1 new file mode 100644 index 0000000..05e1129 --- /dev/null +++ b/man/man1/papi_error_codes.1 @@ -0,0 +1,22 @@ +.TH "papi_error_codes" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_error_codes \- papi_error_codes utility\&. +.PP +file error_codes\&.c +.SH "NAME" +.PP +papi_error_codes - lists all currently defined PAPI error codes\&. +.SH "Synopsis" +.PP +papi_error_codes +.SH "Description" +.PP +papi_error_codes is a PAPI utility program that displays all defined error codes from papi\&.h and their error strings from papi_data\&.h\&. If an error string is not defined, a warning is generated\&. This can help trap newly defined error codes for which error strings are not yet defined\&. +.SH "Options" +.PP +This utility has no command line options\&. +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_event_chooser.1 b/man/man1/papi_event_chooser.1 new file mode 100644 index 0000000..b481bac --- /dev/null +++ b/man/man1/papi_event_chooser.1 @@ -0,0 +1,22 @@ +.TH "papi_event_chooser" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_event_chooser \- papi_event_chooser utility\&. +.PP +file event_chooser\&.c +.SH "NAME" +.PP +papi_event_chooser - given a list of named events, lists other events that can be counted with them\&. +.SH "Synopsis" +.PP +papi_event_chooser NATIVE | PRESET < event > < event > \&.\&.\&. +.SH "Description" +.PP +papi_event_chooser is a PAPI utility program that reports information about the current PAPI installation and supported preset events\&. +.SH "Options" +.PP +This utility has no command line options\&. +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_hybrid_native_avail.1 b/man/man1/papi_hybrid_native_avail.1 new file mode 100644 index 0000000..fbf1b0e --- /dev/null +++ b/man/man1/papi_hybrid_native_avail.1 @@ -0,0 +1,55 @@ +.TH "papi_hybrid_native_avail" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_hybrid_native_avail \- papi_hybrid_native_avail utility\&. +.PP +file hybrid_native_avail\&.c +.SH "NAME" +.PP +papi_hybrid_native_avail - provides detailed information for PAPI native events\&. +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_hybrid_native_avail is a PAPI utility program that reports information about the native events available on the current platform or on an attached MIC card\&. A native event is an event specific to a specific hardware platform\&. On many platforms, a specific native event may have a number of optional settings\&. In such cases, the native event and the valid settings are presented, rather than every possible combination of those settings\&. For each native event, a name, a description, and specific bit patterns are provided\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +--help, -h print this help message +.IP "\(bu" 2 +-d display detailed information about native events +.IP "\(bu" 2 +-e EVENTNAME display detailed information about named native event +.IP "\(bu" 2 +-i EVENTSTR include only event names that contain EVENTSTR +.IP "\(bu" 2 +-x EVENTSTR exclude any event names that contain EVENTSTR +.IP "\(bu" 2 +--noumasks suppress display of Unit Mask information +.IP "\(bu" 2 +--mic < index > report events on the specified target MIC device +.PP +.PP +Processor-specific options +.PD 0 + +.IP "\(bu" 2 +--darr display events supporting Data Address Range Restriction +.IP "\(bu" 2 +--dear display Data Event Address Register events only +.IP "\(bu" 2 +--iarr display events supporting Instruction Address Range Restriction +.IP "\(bu" 2 +--iear display Instruction Event Address Register events only +.IP "\(bu" 2 +--opcm display events supporting OpCode Matching +.IP "\(bu" 2 +--nogroups suppress display of Event grouping information +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. +.PP +Modified by Gabriel Marin gmarin@icl.utk.edu to use offloading\&. diff --git a/man/man1/papi_mem_info.1 b/man/man1/papi_mem_info.1 new file mode 100644 index 0000000..90b4b94 --- /dev/null +++ b/man/man1/papi_mem_info.1 @@ -0,0 +1,21 @@ +.TH "papi_mem_info" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_mem_info \- papi_mem_info utility\&. +.PP +file papi_mem_info\&.c +.SH "NAME" +.PP +papi_mem_info - provides information on the memory architecture of the current processor\&. +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_mem_info is a PAPI utility program that reports information about the cache memory architecture of the current processor, including number, types, sizes and associativities of instruction and data caches and Translation Lookaside Buffers\&. +.SH "Options" +.PP +This utility has no command line options\&. +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_multiplex_cost.1 b/man/man1/papi_multiplex_cost.1 new file mode 100644 index 0000000..454aff3 --- /dev/null +++ b/man/man1/papi_multiplex_cost.1 @@ -0,0 +1,33 @@ +.TH "papi_multiplex_cost" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_multiplex_cost \- papi_multiplex_cost utility\&. +.PP +file papi_multiplex_cost\&.c +.SH "NAME" +.PP +papi_multiplex_cost - computes execution time costs for basic PAPI operations on multiplexed EventSets\&. +.SH "Synopsis" +.PP +papi_cost [-m, --min < min >] [-x, --max < max >] [-k,-s] +.SH "Description" +.PP +papi_multiplex_cost is a PAPI utility program that computes the min / max / mean / std\&. deviation of execution times for PAPI start/stop pairs and for PAPI reads on multiplexed eventsets\&. This information provides the basic operating cost to a user's program for collecting hardware counter data\&. Command line options control display capabilities\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-m < Min number of events to test > +.IP "\(bu" 2 +-x < Max number of events to test > +.IP "\(bu" 2 +-k, Do not time kernel multiplexing +.IP "\(bu" 2 +-s, Do not ime software multiplexed EventSets +.IP "\(bu" 2 +-t THREASHOLD, Test with THRESHOLD iterations of counting loop\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_native_avail.1 b/man/man1/papi_native_avail.1 new file mode 100644 index 0000000..6e16fcb --- /dev/null +++ b/man/man1/papi_native_avail.1 @@ -0,0 +1,53 @@ +.TH "papi_native_avail" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_native_avail \- papi_native_avail utility\&. +.PP +file papi_native_avail\&.c +.SH "NAME" +.PP +papi_native_avail - provides detailed information for PAPI native events\&. +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_native_avail is a PAPI utility program that reports information about the native events available on the current platform\&. A native event is an event specific to a specific hardware platform\&. On many platforms, a specific native event may have a number of optional settings\&. In such cases, the native event and the valid settings are presented, rather than every possible combination of those settings\&. For each native event, a name, a description, and specific bit patterns are provided\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +--help, -h print this help message +.IP "\(bu" 2 +--check, -c print this help message +.IP "\(bu" 2 +-e EVENTNAME display detailed information about named native event +.IP "\(bu" 2 +-i EVENTSTR include only event names that contain EVENTSTR +.IP "\(bu" 2 +-x EVENTSTR exclude any event names that contain EVENTSTR +.IP "\(bu" 2 +--noqual suppress display of event qualifiers (mask and flag) information +.br + +.PP +.PP +Processor-specific options +.PD 0 + +.IP "\(bu" 2 +--darr display events supporting Data Address Range Restriction +.IP "\(bu" 2 +--dear display Data Event Address Register events only +.IP "\(bu" 2 +--iarr display events supporting Instruction Address Range Restriction +.IP "\(bu" 2 +--iear display Instruction Event Address Register events only +.IP "\(bu" 2 +--opcm display events supporting OpCode Matching +.IP "\(bu" 2 +--nogroups suppress display of Event grouping information +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_version.1 b/man/man1/papi_version.1 new file mode 100644 index 0000000..57dc03d --- /dev/null +++ b/man/man1/papi_version.1 @@ -0,0 +1,19 @@ +.TH "papi_version" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_version \- papi_version utility\&. +.PP +file papi_version\&.c +.SH "Name" +.PP +papi_version - provides version information for papi\&. +.SH "Synopsis" +.PP +papi_version +.SH "Description" +.PP +papi_version is a PAPI utility program that reports version information about the current PAPI installation\&. +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man1/papi_xml_event_info.1 b/man/man1/papi_xml_event_info.1 new file mode 100644 index 0000000..6ffad60 --- /dev/null +++ b/man/man1/papi_xml_event_info.1 @@ -0,0 +1,32 @@ +.TH "papi_xml_event_info" 1 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +papi_xml_event_info \- papi_xml_event_info utility\&. +.PP +file papi_xml_event_info\&.c +.SH "NAME" +.PP +papi_xml_event_info - provides detailed information for PAPI events in XML format +.SH "Synopsis" +.PP +.SH "Description" +.PP +papi_native_avail is a PAPI utility program that reports information about the events available on the current platform in an XML format\&. +.PP +It will attempt to create an EventSet with each event in it, which can be slow\&. +.SH "Options" +.PP +.PD 0 +.IP "\(bu" 2 +-h print help message +.IP "\(bu" 2 +-p print only preset events +.IP "\(bu" 2 +-n print only native events +.IP "\(bu" 2 +-c COMPONENT print only events from component number COMPONENT event1, event2, \&.\&.\&. Print only events that can be created in the same event set with the events event1, event2, etc\&. +.PP +.SH "Bugs" +.PP +There are no known bugs in this utility\&. If you find a bug, it should be reported to the PAPI Mailing List at ptools-perfapi@icl.utk.edu\&. diff --git a/man/man3/PAPIF_accum.3 b/man/man3/PAPIF_accum.3 new file mode 100644 index 0000000..0566e0b --- /dev/null +++ b/man/man3/PAPIF_accum.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_accum" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_accum \- +.PP +accumulate and reset counters in an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_accum\fP( C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_accum\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_accum_counters.3 b/man/man3/PAPIF_accum_counters.3 new file mode 100644 index 0000000..61e47d9 --- /dev/null +++ b/man/man3/PAPIF_accum_counters.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_accum_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_accum_counters \- +.PP +Accumulate and reset counters\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_accum_counters\fP( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_accum_counters\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_add_event.3 b/man/man3/PAPIF_add_event.3 new file mode 100644 index 0000000..ad12ef9 --- /dev/null +++ b/man/man3/PAPIF_add_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_add_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_add_event \- +.PP +add PAPI preset or native hardware event to an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_add_event( C_INT EventSet, C_INT EventCode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_add_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_add_events.3 b/man/man3/PAPIF_add_events.3 new file mode 100644 index 0000000..7fbadc2 --- /dev/null +++ b/man/man3/PAPIF_add_events.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_add_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_add_events \- +.PP +add multiple PAPI presets or native hardware events to an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_add_events\fP( C_INT EventSet, C_INT(*) EventCodes, C_INT number, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_add_events\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_add_named_event.3 b/man/man3/PAPIF_add_named_event.3 new file mode 100644 index 0000000..e371857 --- /dev/null +++ b/man/man3/PAPIF_add_named_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_add_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_add_named_event \- +.PP +add PAPI preset or native hardware event to an event set by name + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_add_named_event( C_INT EventSet, C_STRING EventName, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_add_named_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_assign_eventset_component.3 b/man/man3/PAPIF_assign_eventset_component.3 new file mode 100644 index 0000000..2ec6b7e --- /dev/null +++ b/man/man3/PAPIF_assign_eventset_component.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_assign_eventset_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_assign_eventset_component \- +.PP +assign a component index to an existing but empty EventSet + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_assign_eventset_component( C_INT EventSet, C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_assign_eventset_component\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_cleanup_eventset.3 b/man/man3/PAPIF_cleanup_eventset.3 new file mode 100644 index 0000000..11e68b5 --- /dev/null +++ b/man/man3/PAPIF_cleanup_eventset.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_cleanup_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_cleanup_eventset \- +.PP +empty and destroy an EventSet + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_cleanup_eventset( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_cleanup_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_create_eventset.3 b/man/man3/PAPIF_create_eventset.3 new file mode 100644 index 0000000..703cc80 --- /dev/null +++ b/man/man3/PAPIF_create_eventset.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_create_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_create_eventset \- +.PP +create a new empty PAPI EventSet + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_create_eventset( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_create_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_destroy_eventset.3 b/man/man3/PAPIF_destroy_eventset.3 new file mode 100644 index 0000000..4f13964 --- /dev/null +++ b/man/man3/PAPIF_destroy_eventset.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_destroy_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_destroy_eventset \- +.PP +empty and destroy an EventSet + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_destroy_eventset( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_destroy_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_enum_event.3 b/man/man3/PAPIF_enum_event.3 new file mode 100644 index 0000000..adf7550 --- /dev/null +++ b/man/man3/PAPIF_enum_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_enum_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_enum_event \- +.PP +Return the number of events in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_enum_event( C_INT EventCode, C_INT modifier, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_enum_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_epc.3 b/man/man3/PAPIF_epc.3 new file mode 100644 index 0000000..43904e6 --- /dev/null +++ b/man/man3/PAPIF_epc.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_epc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_epc \- +.PP +Get named events per cycle, real and processor time, reference and core cycles\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_epc( C_STRING EventName, C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ref, C_LONG_LONG core, C_LONG_LONG evt, C_FLOAT epc, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_epc\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_event_code_to_name.3 b/man/man3/PAPIF_event_code_to_name.3 new file mode 100644 index 0000000..2145936 --- /dev/null +++ b/man/man3/PAPIF_event_code_to_name.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_event_code_to_name" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_event_code_to_name \- +.PP +Convert a numeric hardware event code to a name\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_event_code_to_name( C_INT EventCode, C_STRING EventName, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_code_to_name\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_event_name_to_code.3 b/man/man3/PAPIF_event_name_to_code.3 new file mode 100644 index 0000000..c3887ae --- /dev/null +++ b/man/man3/PAPIF_event_name_to_code.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_event_name_to_code" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_event_name_to_code \- +.PP +Convert a name to a numeric hardware event code\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_event_name_to_code( C_STRING EventName, C_INT EventCode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_name_to_code\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_flips.3 b/man/man3/PAPIF_flips.3 new file mode 100644 index 0000000..1c3dffa --- /dev/null +++ b/man/man3/PAPIF_flips.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_flips" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_flips \- +.PP +Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_flips( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpins, C_FLOAT mflips, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_flops.3 b/man/man3/PAPIF_flops.3 new file mode 100644 index 0000000..681ac64 --- /dev/null +++ b/man/man3/PAPIF_flops.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_flops" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_flops \- +.PP +Simplified call to get Mflops/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_flops( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG flpops, C_FLOAT mflops, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flops\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_clockrate.3 b/man/man3/PAPIF_get_clockrate.3 new file mode 100644 index 0000000..bb923bb --- /dev/null +++ b/man/man3/PAPIF_get_clockrate.3 @@ -0,0 +1,37 @@ +.TH "PAPIF_get_clockrate" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_clockrate \- +.PP +Get the clockrate in MHz for the current cpu\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_domain( C_INT cr )\fP +.RE +.PP +\fBNote:\fP +.RS 4 +This is a Fortran only interface that returns a value from the \fBPAPI_get_opt\fP call\&. +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_dmem_info.3 b/man/man3/PAPIF_get_dmem_info.3 new file mode 100644 index 0000000..f6c6e4a --- /dev/null +++ b/man/man3/PAPIF_get_dmem_info.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_dmem_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_dmem_info \- +.PP +get information about the dynamic memory usage of the current program + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_get_dmem_info( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_dmem_info\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_domain.3 b/man/man3/PAPIF_get_domain.3 new file mode 100644 index 0000000..eb1efd8 --- /dev/null +++ b/man/man3/PAPIF_get_domain.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_domain \- +.PP +Get the domain setting for the specified EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_domain( C_INT eventset, C_INT domain, C_INT mode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_event_info.3 b/man/man3/PAPIF_get_event_info.3 new file mode 100644 index 0000000..2364ebb --- /dev/null +++ b/man/man3/PAPIF_get_event_info.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_event_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_event_info \- +.PP +Get the event's name and description info\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_get_event_info\fP(C_INT EventCode, C_STRING symbol, C_STRING long_descr, C_STRING short_descr, C_INT count, C_STRING event_note, C_INT flags, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_event_info\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_exe_info.3 b/man/man3/PAPIF_get_exe_info.3 new file mode 100644 index 0000000..e42ef9f --- /dev/null +++ b/man/man3/PAPIF_get_exe_info.3 @@ -0,0 +1,38 @@ +.TH "PAPIF_get_exe_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_exe_info \- +.PP +get information about the dynamic memory usage of the current program + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_get_exe_info\fP( C_STRING fullname, C_STRING name, +.br + C_LONG_LONG text_start, C_LONG_LONG text_end, +.br + C_LONG_LONG data_start, C_LONG_LONG data_end, +.br + C_LONG_LONG bss_start, C_LONG_LONG bss_end, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_executable_info\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_granularity.3 b/man/man3/PAPIF_get_granularity.3 new file mode 100644 index 0000000..0e0597a --- /dev/null +++ b/man/man3/PAPIF_get_granularity.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_granularity \- +.PP +Get the granularity setting for the specified EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_granularity( C_INT eventset, C_INT granularity, C_INT mode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_hardware_info.3 b/man/man3/PAPIF_get_hardware_info.3 new file mode 100644 index 0000000..4e0e5f8 --- /dev/null +++ b/man/man3/PAPIF_get_hardware_info.3 @@ -0,0 +1,36 @@ +.TH "PAPIF_get_hardware_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_hardware_info \- +.PP +get information about the system hardware + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_get_hardware_info\fP( C_INT ncpu, C_INT nnodes, C_INT totalcpus, +.br + C_INT vendor, C_STRING vendor_str, C_INT model, C_STRING model_str, +.br + C_FLOAT revision, C_FLOAT mhz ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_hardware_info\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_multiplex.3 b/man/man3/PAPIF_get_multiplex.3 new file mode 100644 index 0000000..f91c157 --- /dev/null +++ b/man/man3/PAPIF_get_multiplex.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_multiplex" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_multiplex \- +.PP +Get the multiplexing status of specified event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_get_multiplex( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_multiplex\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_preload.3 b/man/man3/PAPIF_get_preload.3 new file mode 100644 index 0000000..8b4d093 --- /dev/null +++ b/man/man3/PAPIF_get_preload.3 @@ -0,0 +1,37 @@ +.TH "PAPIF_get_preload" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_preload \- +.PP +Get the LD_PRELOAD environment variable\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_preload( C_STRING lib_preload_env, C_INT check )\fP +.RE +.PP +\fBNote:\fP +.RS 4 +This is a Fortran only interface that returns a value from the \fBPAPI_get_opt\fP call\&. +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_real_cyc.3 b/man/man3/PAPIF_get_real_cyc.3 new file mode 100644 index 0000000..0fe1832 --- /dev/null +++ b/man/man3/PAPIF_get_real_cyc.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_real_cyc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_real_cyc \- +.PP +Get real time counter value in clock cycles\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_real_cyc( C_LONG_LONG real_cyc )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_real_cyc\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_real_nsec.3 b/man/man3/PAPIF_get_real_nsec.3 new file mode 100644 index 0000000..3850ff3 --- /dev/null +++ b/man/man3/PAPIF_get_real_nsec.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_real_nsec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_real_nsec \- +.PP +Get real time counter value in nanoseconds\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_real_nsec( C_LONG_LONG time )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_real_nsec\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_real_usec.3 b/man/man3/PAPIF_get_real_usec.3 new file mode 100644 index 0000000..ac2a1ee --- /dev/null +++ b/man/man3/PAPIF_get_real_usec.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_real_usec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_real_usec \- +.PP +Get real time counter value in microseconds\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_real_usec( C_LONG_LONG time )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_real_usec\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_virt_cyc.3 b/man/man3/PAPIF_get_virt_cyc.3 new file mode 100644 index 0000000..1cbca6a --- /dev/null +++ b/man/man3/PAPIF_get_virt_cyc.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_virt_cyc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_virt_cyc \- +.PP +Get virtual time counter value in clock cycles\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_virt_cyc( C_LONG_LONG virt_cyc )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_virt_cyc\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_get_virt_usec.3 b/man/man3/PAPIF_get_virt_usec.3 new file mode 100644 index 0000000..01b45b2 --- /dev/null +++ b/man/man3/PAPIF_get_virt_usec.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_get_virt_usec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_get_virt_usec \- +.PP +Get virtual time counter value in microseconds\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_get_virt_usec( C_LONG_LONG time )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_virt_usec\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_ipc.3 b/man/man3/PAPIF_ipc.3 new file mode 100644 index 0000000..b72316d --- /dev/null +++ b/man/man3/PAPIF_ipc.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_ipc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_ipc \- +.PP +Get instructions per cycle, real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_ipc( C_FLOAT real_time, C_FLOAT proc_time, C_LONG_LONG ins, C_FLOAT ipc, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_ipc\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_is_initialized.3 b/man/man3/PAPIF_is_initialized.3 new file mode 100644 index 0000000..492fc41 --- /dev/null +++ b/man/man3/PAPIF_is_initialized.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_is_initialized" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_is_initialized \- +.PP +Check for initialization\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_is_initialized( C_INT level )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_is_initialized\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_library_init.3 b/man/man3/PAPIF_library_init.3 new file mode 100644 index 0000000..7448e73 --- /dev/null +++ b/man/man3/PAPIF_library_init.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_library_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_library_init \- +.PP +Initialize the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_library_init( C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_library_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_lock.3 b/man/man3/PAPIF_lock.3 new file mode 100644 index 0000000..263c353 --- /dev/null +++ b/man/man3/PAPIF_lock.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_lock" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_lock \- +.PP +Lock one of two mutex variables defined in \fBpapi\&.h\fP\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_lock( C_INT lock )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_lock\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_multiplex_init.3 b/man/man3/PAPIF_multiplex_init.3 new file mode 100644 index 0000000..8f7a12d --- /dev/null +++ b/man/man3/PAPIF_multiplex_init.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_multiplex_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_multiplex_init \- +.PP +Initialize multiplex support in the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_multiplex_init( C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_multiplex_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_num_cmp_hwctrs.3 b/man/man3/PAPIF_num_cmp_hwctrs.3 new file mode 100644 index 0000000..9582b42 --- /dev/null +++ b/man/man3/PAPIF_num_cmp_hwctrs.3 @@ -0,0 +1,34 @@ +.TH "PAPIF_num_cmp_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_num_cmp_hwctrs \- +.PP +Return the number of hardware counters on the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_num_cmp_hwctrs( C_INT cidx, C_INT num )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_num_hwctrs\fP +.PP +\fBPAPI_num_cmp_hwctrs\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_num_counters.3 b/man/man3/PAPIF_num_counters.3 new file mode 100644 index 0000000..fbad1c7 --- /dev/null +++ b/man/man3/PAPIF_num_counters.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_num_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_num_counters \- +.PP +Get the number of hardware counters available on the system\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_num_counters( C_INT numevents )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_num_counters\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_num_events.3 b/man/man3/PAPIF_num_events.3 new file mode 100644 index 0000000..ebf2098 --- /dev/null +++ b/man/man3/PAPIF_num_events.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_num_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_num_events \- +.PP +Enumerate PAPI preset or native events\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_num_events(C_INT EventSet, C_INT count)\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_num_events\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_num_hwctrs.3 b/man/man3/PAPIF_num_hwctrs.3 new file mode 100644 index 0000000..dc63b5c --- /dev/null +++ b/man/man3/PAPIF_num_hwctrs.3 @@ -0,0 +1,34 @@ +.TH "PAPIF_num_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_num_hwctrs \- +.PP +Return the number of hardware counters on the cpu\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_num_hwctrs( C_INT num )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_num_hwctrs\fP +.PP +\fBPAPI_num_cmp_hwctrs\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_perror.3 b/man/man3/PAPIF_perror.3 new file mode 100644 index 0000000..a92f4ca --- /dev/null +++ b/man/man3/PAPIF_perror.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_perror" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_perror \- +.PP +Convert PAPI error codes to strings, and print error message to stderr\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_perror( C_STRING message )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_perror\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_query_event.3 b/man/man3/PAPIF_query_event.3 new file mode 100644 index 0000000..81c6943 --- /dev/null +++ b/man/man3/PAPIF_query_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_query_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_query_event \- +.PP +Query if PAPI event exists\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_query_event(C_INT EventCode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_query_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_query_named_event.3 b/man/man3/PAPIF_query_named_event.3 new file mode 100644 index 0000000..613791e --- /dev/null +++ b/man/man3/PAPIF_query_named_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_query_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_query_named_event \- +.PP +Query if named PAPI event exists\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_query_named_event(C_STRING EventName, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_query_named_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_read.3 b/man/man3/PAPIF_read.3 new file mode 100644 index 0000000..ddad511 --- /dev/null +++ b/man/man3/PAPIF_read.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_read" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_read \- +.PP +Read hardware counters from an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_read\fP(C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_read_ts.3 b/man/man3/PAPIF_read_ts.3 new file mode 100644 index 0000000..20ca9b5 --- /dev/null +++ b/man/man3/PAPIF_read_ts.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_read_ts" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_read_ts \- +.PP +Read hardware counters with a timestamp\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_read_ts\fP(C_INT EventSet, C_LONG_LONG(*) values, C_LONG_LONG(*) cycles, C_INT check) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read_ts\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_register_thread.3 b/man/man3/PAPIF_register_thread.3 new file mode 100644 index 0000000..8d8d8dc --- /dev/null +++ b/man/man3/PAPIF_register_thread.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_register_thread" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_register_thread \- +.PP +Notify PAPI that a thread has 'appeared'\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_register_thread( C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_register_thread\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_remove_event.3 b/man/man3/PAPIF_remove_event.3 new file mode 100644 index 0000000..637c4c2 --- /dev/null +++ b/man/man3/PAPIF_remove_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_remove_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_remove_event \- +.PP +Remove a hardware event from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_remove_event( C_INT EventSet, C_INT EventCode, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_remove_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_remove_events.3 b/man/man3/PAPIF_remove_events.3 new file mode 100644 index 0000000..4f3879e --- /dev/null +++ b/man/man3/PAPIF_remove_events.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_remove_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_remove_events \- +.PP +Remove an array of hardware event codes from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_remove_events\fP( C_INT EventSet, C_INT(*) EventCode, C_INT number, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_remove_events\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_remove_named_event.3 b/man/man3/PAPIF_remove_named_event.3 new file mode 100644 index 0000000..9b8b32f --- /dev/null +++ b/man/man3/PAPIF_remove_named_event.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_remove_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_remove_named_event \- +.PP +Remove a named hardware event from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_remove_named_event( C_INT EventSet, C_STRING EventName, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_remove_named_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_reset.3 b/man/man3/PAPIF_reset.3 new file mode 100644 index 0000000..bf370e9 --- /dev/null +++ b/man/man3/PAPIF_reset.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_reset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_reset \- +.PP +Reset the hardware event counts in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_reset( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_reset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_cmp_domain.3 b/man/man3/PAPIF_set_cmp_domain.3 new file mode 100644 index 0000000..c7bb90f --- /dev/null +++ b/man/man3/PAPIF_set_cmp_domain.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_cmp_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_cmp_domain \- +.PP +Set the default counting domain for new event sets bound to the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_cmp_domain( C_INT domain, C_INT cidx, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_cmp_domain\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_cmp_granularity.3 b/man/man3/PAPIF_set_cmp_granularity.3 new file mode 100644 index 0000000..5ff9f0a --- /dev/null +++ b/man/man3/PAPIF_set_cmp_granularity.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_cmp_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_cmp_granularity \- +.PP +Set the default counting granularity for eventsets bound to the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_cmp_granularity( C_INT granularity, C_INT cidx, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_cmp_granularity\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_debug.3 b/man/man3/PAPIF_set_debug.3 new file mode 100644 index 0000000..0f75a81 --- /dev/null +++ b/man/man3/PAPIF_set_debug.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_debug" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_debug \- +.PP +Set the current debug level for error output from PAPI\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_debug( C_INT level, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_debug\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_domain.3 b/man/man3/PAPIF_set_domain.3 new file mode 100644 index 0000000..44abb93 --- /dev/null +++ b/man/man3/PAPIF_set_domain.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_domain \- +.PP +Set the default counting domain for new event sets bound to the cpu component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_domain( C_INT domain, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_domain\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_event_domain.3 b/man/man3/PAPIF_set_event_domain.3 new file mode 100644 index 0000000..508b29d --- /dev/null +++ b/man/man3/PAPIF_set_event_domain.3 @@ -0,0 +1,34 @@ +.TH "PAPIF_set_event_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_event_domain \- +.PP +Set the default counting domain for specified EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_event_domain( C_INT EventSet, C_INT domain, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_domain\fP +.PP +\fBPAPI_set_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_granularity.3 b/man/man3/PAPIF_set_granularity.3 new file mode 100644 index 0000000..e25e153 --- /dev/null +++ b/man/man3/PAPIF_set_granularity.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_granularity \- +.PP +Set the default counting granularity for eventsets bound to the cpu component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_granularity( C_INT granularity, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_granularity\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_inherit.3 b/man/man3/PAPIF_set_inherit.3 new file mode 100644 index 0000000..84656b3 --- /dev/null +++ b/man/man3/PAPIF_set_inherit.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_inherit" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_inherit \- +.PP +Turn on inheriting of counts from daughter to parent process\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_set_inherit( C_INT inherit, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_set_multiplex.3 b/man/man3/PAPIF_set_multiplex.3 new file mode 100644 index 0000000..26d3302 --- /dev/null +++ b/man/man3/PAPIF_set_multiplex.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_set_multiplex" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_set_multiplex \- +.PP +Convert a standard event set to a multiplexed event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br +\fBPAPIF_set_multiplex( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_multiplex\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_shutdown.3 b/man/man3/PAPIF_shutdown.3 new file mode 100644 index 0000000..f16e09a --- /dev/null +++ b/man/man3/PAPIF_shutdown.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_shutdown" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_shutdown \- +.PP +finish using PAPI and free all related resources\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Prototype:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_shutdown( )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_shutdown\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_start.3 b/man/man3/PAPIF_start.3 new file mode 100644 index 0000000..209b7fb --- /dev/null +++ b/man/man3/PAPIF_start.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_start" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_start \- +.PP +Start counting hardware events in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_start( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_start\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_start_counters.3 b/man/man3/PAPIF_start_counters.3 new file mode 100644 index 0000000..9885f07 --- /dev/null +++ b/man/man3/PAPIF_start_counters.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_start_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_start_counters \- +.PP +Start counting hardware events\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_start_counters\fP( C_INT(*) events, C_INT array_len, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_start_counters\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_state.3 b/man/man3/PAPIF_state.3 new file mode 100644 index 0000000..43d5464 --- /dev/null +++ b/man/man3/PAPIF_state.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_state" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_state \- +.PP +Return the counting state of an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_state(C_INT EventSet, C_INT status, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_state\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_stop.3 b/man/man3/PAPIF_stop.3 new file mode 100644 index 0000000..2bf22bd --- /dev/null +++ b/man/man3/PAPIF_stop.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_stop" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_stop \- +.PP +Stop counting hardware events in an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_stop\fP( C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_stop_counters.3 b/man/man3/PAPIF_stop_counters.3 new file mode 100644 index 0000000..215dacf --- /dev/null +++ b/man/man3/PAPIF_stop_counters.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_stop_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_stop_counters \- +.PP +Stop counting hardware events and reset values to zero\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_stop_counters\fP( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_stop_counters\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_thread_id.3 b/man/man3/PAPIF_thread_id.3 new file mode 100644 index 0000000..6536ac1 --- /dev/null +++ b/man/man3/PAPIF_thread_id.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_thread_id" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_thread_id \- +.PP +Get the thread identifier of the current thread\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_thread_id( C_INT id )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_thread_id\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_thread_init.3 b/man/man3/PAPIF_thread_init.3 new file mode 100644 index 0000000..f89ca53 --- /dev/null +++ b/man/man3/PAPIF_thread_init.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_thread_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_thread_init \- +.PP +Initialize thread support in the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_thread_init( C_INT FUNCTION handle, C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_unlock.3 b/man/man3/PAPIF_unlock.3 new file mode 100644 index 0000000..44e3827 --- /dev/null +++ b/man/man3/PAPIF_unlock.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_unlock" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_unlock \- +.PP +Unlock one of the mutex variables defined in \fBpapi\&.h\fP\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_unlock( C_INT lock )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_unlock\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_unregister_thread.3 b/man/man3/PAPIF_unregister_thread.3 new file mode 100644 index 0000000..9ff2fbc --- /dev/null +++ b/man/man3/PAPIF_unregister_thread.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_unregister_thread" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_unregister_thread \- +.PP +Notify PAPI that a thread has 'disappeared'\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_unregister_thread( C_INT check )\fP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_unregister_thread\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPIF_write.3 b/man/man3/PAPIF_write.3 new file mode 100644 index 0000000..2808598 --- /dev/null +++ b/man/man3/PAPIF_write.3 @@ -0,0 +1,32 @@ +.TH "PAPIF_write" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPIF_write \- +.PP +Write counter values into counters\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPIF_write\fP( C_INT EventSet, C_LONG_LONG(*) values, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_write\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_accum.3 b/man/man3/PAPI_accum.3 new file mode 100644 index 0000000..f477da9 --- /dev/null +++ b/man/man3/PAPI_accum.3 @@ -0,0 +1,80 @@ +.TH "PAPI_accum" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_accum \- +.PP +Accumulate and reset counters in an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_accum( int EventSet, long_long * values ); + +These calls assume an initialized PAPI library and a properly added event set. +PAPI_accum adds the counters of the indicated event set into the array values. +The counters are zeroed and continue counting after the operation. +Note the differences between PAPI_read and PAPI_accum, specifically +that PAPI_accum resets the values array to zero. + +@param EventSet + an integer handle for a PAPI Event Set + as created by PAPI_create_eventset +@param *values + an array to hold the counter values of the counting events + +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ESYS + A system or C library call failed inside PAPI, see the errno variable. +@retval PAPI_ENOEVST + The event set specified does not exist. + +@par Examples: + +.fi +.PP + +.PP +.nf +* do_100events( ); +* if ( PAPI_read( EventSet, values) != PAPI_OK ) +* handle_error( 1 ); +* // values[0] now equals 100 +* do_100events( ); +* if (PAPI_accum( EventSet, values ) != PAPI_OK ) +* handle_error( 1 ); +* // values[0] now equals 200 +* values[0] = -100; +* do_100events( ); +* if (PAPI_accum( EventSet, values ) != PAPI_OK ) +* handle_error( 1 ); +* // values[0] now equals 0 +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPIF_accum\fP +.PP +\fBPAPI_start\fP +.PP +\fBPAPI_set_opt\fP +.PP +\fBPAPI_reset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_accum_counters.3 b/man/man3/PAPI_accum_counters.3 new file mode 100644 index 0000000..7df555d --- /dev/null +++ b/man/man3/PAPI_accum_counters.3 @@ -0,0 +1,78 @@ +.TH "PAPI_accum_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_accum_counters \- +.PP +Accumulate and reset counters\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_accum_counters( long long *values, int array_len ); + +.fi +.PP +.PP +\fBParameters:\fP +.RS 4 +\fI*values\fP an array to hold the counter values of the counting events +.br +\fIarry_len\fP the number of items in the *events array +.RE +.PP +\fBPrecondition:\fP +.RS 4 +These calls assume an initialized PAPI library and a properly added event set\&. +.RE +.PP +\fBPostcondition:\fP +.RS 4 +The counters are reset and left running after the call\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.RE +.PP +\fBPAPI_accum_counters()\fP adds the event counters into the array *values\&. +.PP +.PP +.nf +do_100events(); +if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) + handlw_error(1); +// values[0] now equals 100 +do_100events(); +if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) + handle_error(1); +// values[0] now equals 200 +values[0] = -100; +do_100events(); +if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) + handle_error(); +// values[0] now equals 0 + * +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt()\fP \fBPAPI_start_counters()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_add_event.3 b/man/man3/PAPI_add_event.3 new file mode 100644 index 0000000..8594294 --- /dev/null +++ b/man/man3/PAPI_add_event.3 @@ -0,0 +1,93 @@ +.TH "PAPI_add_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_add_event \- +.PP +add PAPI preset or native hardware event to an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_add_event( int EventSet, int EventCode ); + +PAPI_add_event adds one event to a PAPI Event Set. @n +A hardware event can be either a PAPI preset or a native hardware event code. +For a list of PAPI preset events, see PAPI_presets or run the avail test case +in the PAPI distribution. PAPI presets can be passed to PAPI_query_event to see +if they exist on the underlying architecture. +For a list of native events available on current platform, run the papi_native_avail +utility in the PAPI distribution. For the encoding of native events, +see PAPI_event_name_to_code to learn how to generate native code for the +supported native event on the underlying architecture. + +@param EventSet + An integer handle for a PAPI Event Set as created by PAPI_create_eventset. +@param EventCode + A defined event such as PAPI_TOT_INS. + +@retval Positive-Integer + The number of consecutive elements that succeeded before the error. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOMEM + Insufficient memory to complete the operation. +@retval PAPI_ENOEVST + The event set specified does not exist. +@retval PAPI_EISRUN + The event set is currently counting events. +@retval PAPI_ECNFLCT + The underlying counter hardware can not count this event and other events + in the event set simultaneously. +@retval PAPI_ENOEVNT + The PAPI preset is not available on the underlying hardware. +@retval PAPI_EBUG + Internal error, please send mail to the developers. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* unsigned int native = 0x0; +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) +* handle_error( 1 ); +* // Add native event PM_CYC to EventSet +* if ( PAPI_event_name_to_code( "PM_CYC", &native ) != PAPI_OK ) +* handle_error( 1 ); +* if ( PAPI_add_event( EventSet, native ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +.PP +.nf +@see PAPI_cleanup_eventset @n +PAPI_destroy_eventset @n +PAPI_event_code_to_name @n +PAPI_remove_events @n +PAPI_query_event @n +PAPI_presets @n +PAPI_native @n +PAPI_remove_event.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_add_events.3 b/man/man3/PAPI_add_events.3 new file mode 100644 index 0000000..0b4d2af --- /dev/null +++ b/man/man3/PAPI_add_events.3 @@ -0,0 +1,98 @@ +.TH "PAPI_add_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_add_events \- +.PP +add multiple PAPI presets or native hardware events to an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_add_events( int EventSet, int * EventCodes, int number ); + +PAPI_add_event adds one event to a PAPI Event Set. PAPI_add_events does +the same, but for an array of events. @n +A hardware event can be either a PAPI preset or a native hardware event code. +For a list of PAPI preset events, see PAPI_presets or run the avail test case +in the PAPI distribution. PAPI presets can be passed to PAPI_query_event to see +if they exist on the underlying architecture. +For a list of native events available on current platform, run native_avail +test case in the PAPI distribution. For the encoding of native events, +see PAPI_event_name_to_code to learn how to generate native code for the +supported native event on the underlying architecture. + +@param EventSet + An integer handle for a PAPI Event Set as created by PAPI_create_eventset. +@param *EventCode + An array of defined events. +@param number + An integer indicating the number of events in the array *EventCode. + It should be noted that PAPI_add_events can partially succeed, + exactly like PAPI_remove_events. + +@retval Positive-Integer + The number of consecutive elements that succeeded before the error. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOMEM + Insufficient memory to complete the operation. +@retval PAPI_ENOEVST + The event set specified does not exist. +@retval PAPI_EISRUN + The event set is currently counting events. +@retval PAPI_ECNFLCT + The underlying counter hardware can not count this event and other events + in the event set simultaneously. +@retval PAPI_ENOEVNT + The PAPI preset is not available on the underlying hardware. +@retval PAPI_EBUG + Internal error, please send mail to the developers. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* unsigned int native = 0x0; +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) +* handle_error( 1 ); +* // Add native event PM_CYC to EventSet +* if ( PAPI_event_name_to_code( "PM_CYC", &native ) != PAPI_OK ) +* handle_error( 1 ); +* if ( PAPI_add_event( EventSet, native ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +.PP +.nf +@see PAPI_cleanup_eventset @n +PAPI_destroy_eventset @n +PAPI_event_code_to_name @n +PAPI_remove_events @n +PAPI_query_event @n +PAPI_presets @n +PAPI_native @n +PAPI_remove_event.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_add_named_event.3 b/man/man3/PAPI_add_named_event.3 new file mode 100644 index 0000000..738db70 --- /dev/null +++ b/man/man3/PAPI_add_named_event.3 @@ -0,0 +1,87 @@ +.TH "PAPI_add_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_add_named_event \- +.PP +add PAPI preset or native hardware event by name to an EventSet + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_add_named_event( int EventSet, const char *EventName ); + +PAPI_add_named_event adds one event to a PAPI EventSet. @n +A hardware event can be either a PAPI preset or a native hardware event code. +For a list of PAPI preset events, see PAPI_presets or run the avail test case +in the PAPI distribution. PAPI presets can be passed to PAPI_query_event to see +if they exist on the underlying architecture. +For a list of native events available on current platform, run the papi_native_avail +utility in the PAPI distribution. + +@param EventSet + An integer handle for a PAPI Event Set as created by PAPI_create_eventset. +@param EventCode + A defined event such as PAPI_TOT_INS. + +@retval Positive-Integer + The number of consecutive elements that succeeded before the error. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOINIT + The PAPI library has not been initialized. +@retval PAPI_ENOMEM + Insufficient memory to complete the operation. +@retval PAPI_ENOEVST + The event set specified does not exist. +@retval PAPI_EISRUN + The event set is currently counting events. +@retval PAPI_ECNFLCT + The underlying counter hardware can not count this event and other events + in the event set simultaneously. +@retval PAPI_ENOEVNT + The PAPI preset is not available on the underlying hardware. +@retval PAPI_EBUG + Internal error, please send mail to the developers. + +@par Examples: + +.fi +.PP + +.PP +.nf +* char EventName = "PAPI_TOT_INS"; +* int EventSet = PAPI_NULL; +* unsigned int native = 0x0; +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_named_event( EventSet, EventName ) != PAPI_OK ) +* handle_error( 1 ); +* // Add native event PM_CYC to EventSet +* if ( PAPI_add_named_event( EventSet, "PM_CYC" ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +.PP +.nf +@see PAPI_add_event @n +PAPI_query_named_event @n +PAPI_remove_named_event.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_addr_range_option_t.3 b/man/man3/PAPI_addr_range_option_t.3 new file mode 100644 index 0000000..735e27e --- /dev/null +++ b/man/man3/PAPI_addr_range_option_t.3 @@ -0,0 +1,46 @@ +.TH "PAPI_addr_range_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_addr_range_option_t \- +.PP +address range specification for range restricted counting if both are zero, range is disabled + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "caddr_t \fBstart\fP" +.br +.ti -1c +.RI "caddr_t \fBend\fP" +.br +.ti -1c +.RI "int \fBstart_off\fP" +.br +.ti -1c +.RI "int \fBend_off\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "caddr_t PAPI_addr_range_option_t::end" +user requested end address of an address range +.SS "int PAPI_addr_range_option_t::end_off" +hardware specified offset from end address +.SS "int PAPI_addr_range_option_t::eventset" +eventset to restrict +.SS "caddr_t PAPI_addr_range_option_t::start" +user requested start address of an address range +.SS "int PAPI_addr_range_option_t::start_off" +hardware specified offset from start address + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_address_map_t.3 b/man/man3/PAPI_address_map_t.3 new file mode 100644 index 0000000..97913de --- /dev/null +++ b/man/man3/PAPI_address_map_t.3 @@ -0,0 +1,54 @@ +.TH "PAPI_address_map_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_address_map_t \- +.PP +get the executable's address space info + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "char \fBname\fP [1024]" +.br +.ti -1c +.RI "caddr_t \fBtext_start\fP" +.br +.ti -1c +.RI "caddr_t \fBtext_end\fP" +.br +.ti -1c +.RI "caddr_t \fBdata_start\fP" +.br +.ti -1c +.RI "caddr_t \fBdata_end\fP" +.br +.ti -1c +.RI "caddr_t \fBbss_start\fP" +.br +.ti -1c +.RI "caddr_t \fBbss_end\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "caddr_t PAPI_address_map_t::bss_end" +End address of program bss segment +.SS "caddr_t PAPI_address_map_t::bss_start" +Start address of program bss segment +.SS "caddr_t PAPI_address_map_t::data_end" +End address of program data segment +.SS "caddr_t PAPI_address_map_t::data_start" +Start address of program data segment +.SS "caddr_t PAPI_address_map_t::text_end" +End address of program text segment +.SS "caddr_t PAPI_address_map_t::text_start" +Start address of program text segment + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_all_thr_spec_t.3 b/man/man3/PAPI_all_thr_spec_t.3 new file mode 100644 index 0000000..d6b276a --- /dev/null +++ b/man/man3/PAPI_all_thr_spec_t.3 @@ -0,0 +1,28 @@ +.TH "PAPI_all_thr_spec_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_all_thr_spec_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBnum\fP" +.br +.ti -1c +.RI "PAPI_thread_id_t * \fBid\fP" +.br +.ti -1c +.RI "void ** \fBdata\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_assign_eventset_component.3 b/man/man3/PAPI_assign_eventset_component.3 new file mode 100644 index 0000000..e378c34 --- /dev/null +++ b/man/man3/PAPI_assign_eventset_component.3 @@ -0,0 +1,77 @@ +.TH "PAPI_assign_eventset_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_assign_eventset_component \- +.PP +Assign a component index to an existing but empty EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +PAPI_assign_eventset_component( int EventSet, int cidx ); + +@param EventSet + An integer identifier for an existing EventSet. +@param cidx + An integer identifier for a component. + By convention, component 0 is always the cpu component. + +@retval PAPI_ENOCMP + The argument cidx is not a valid component. +@retval PAPI_ENOEVST + The EventSet doesn't exist. +@retval PAPI_ENOMEM + Insufficient memory to complete the operation. + +PAPI_assign_eventset_component assigns a specific component index, +as specified by cidx, to a new EventSet identified by EventSet, as obtained +from PAPI_create_eventset. EventSets are ordinarily automatically bound +to components when the first event is added. This routine is useful to +explicitly bind an EventSet to a component before setting component related +options. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Bind our EventSet to the cpu component +* if ( PAPI_assign_eventset_component( EventSet, 0 ) != PAPI_OK ) +* handle_error( 1 ); +* // Convert our EventSet to multiplexing +* if ( PAPI_set_multiplex( EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt\fP +.br + \fBPAPI_create_eventset\fP +.br + \fBPAPI_add_events\fP +.br + \fBPAPI_set_multiplex\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_attach.3 b/man/man3/PAPI_attach.3 new file mode 100644 index 0000000..d689650 --- /dev/null +++ b/man/man3/PAPI_attach.3 @@ -0,0 +1,82 @@ +.TH "PAPI_attach" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_attach \- +.PP +Attach PAPI event set to the specified thread id\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_attach( int EventSet, unsigned long tid ); + +PAPI_attach is a wrapper function that calls PAPI_set_opt to allow PAPI to +monitor performance counts on a thread other than the one currently executing. +This is sometimes referred to as third party monitoring. +PAPI_attach connects the specified EventSet to the specifed thread; +PAPI_detach breaks that connection and restores the EventSet to the +original executing thread. + +@param EventSet + An integer handle for a PAPI EventSet as created by PAPI_create_eventset. +@param tid + A thread id as obtained from, for example, PAPI_list_threads or PAPI_thread_id. + +@retval PAPI_ECMP + This feature is unsupported on this component. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOEVST + The event set specified does not exist. +@retval PAPI_EISRUN + The event set is currently counting events. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* unsigned long pid; +* pid = fork( ); +* if ( pid <= 0 ) +* exit( 1 ); +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* exit( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) +* exit( 1 ); +* // Attach this EventSet to the forked process +* if ( PAPI_attach( EventSet, pid ) != PAPI_OK ) +* exit( 1 ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt\fP +.PP +\fBPAPI_list_threads\fP +.PP +\fBPAPI_thread_id\fP +.PP +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_attach_option_t.3 b/man/man3/PAPI_attach_option_t.3 new file mode 100644 index 0000000..60619ad --- /dev/null +++ b/man/man3/PAPI_attach_option_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_attach_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_attach_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "unsigned long \fBtid\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_cleanup_eventset.3 b/man/man3/PAPI_cleanup_eventset.3 new file mode 100644 index 0000000..48863c1 --- /dev/null +++ b/man/man3/PAPI_cleanup_eventset.3 @@ -0,0 +1,67 @@ +.TH "PAPI_cleanup_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_cleanup_eventset \- +.PP +Empty and destroy an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_cleanup_eventset( int EventSet ); + +.fi +.PP +.PP +\fBPAPI_cleanup_eventset\fP removes all events from a PAPI event set and turns off profiling and overflow for all events in the EventSet\&. This can not be called if the EventSet is not stopped\&. +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP An integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. Attempting to destroy a non-empty event set or passing in a null pointer to be destroyed\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_EBUG\fP Internal error, send mail to ptools-perfapi@icl.utk.edu and complain\&. +.RE +.PP +\fBExamples:\fP +.RS 4 + +.PP +.nf +* // Remove all events in the eventset +* if ( PAPI_cleanup_eventset( EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.RE +.PP +.PP +.nf +@see PAPI_profil @n +PAPI_create_eventset @n +PAPI_add_event @n +PAPI_stop.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_component_info_t.3 b/man/man3/PAPI_component_info_t.3 new file mode 100644 index 0000000..56020a5 --- /dev/null +++ b/man/man3/PAPI_component_info_t.3 @@ -0,0 +1,199 @@ +.TH "PAPI_component_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_component_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "char \fBname\fP [128]" +.br +.ti -1c +.RI "char \fBshort_name\fP [64]" +.br +.ti -1c +.RI "char \fBdescription\fP [128]" +.br +.ti -1c +.RI "char \fBversion\fP [64]" +.br +.ti -1c +.RI "char \fBsupport_version\fP [64]" +.br +.ti -1c +.RI "char \fBkernel_version\fP [64]" +.br +.ti -1c +.RI "char \fBdisabled_reason\fP [128]" +.br +.ti -1c +.RI "int \fBdisabled\fP" +.br +.ti -1c +.RI "int \fBCmpIdx\fP" +.br +.ti -1c +.RI "int \fBnum_cntrs\fP" +.br +.ti -1c +.RI "int \fBnum_mpx_cntrs\fP" +.br +.ti -1c +.RI "int \fBnum_preset_events\fP" +.br +.ti -1c +.RI "int \fBnum_native_events\fP" +.br +.ti -1c +.RI "int \fBdefault_domain\fP" +.br +.ti -1c +.RI "int \fBavailable_domains\fP" +.br +.ti -1c +.RI "int \fBdefault_granularity\fP" +.br +.ti -1c +.RI "int \fBavailable_granularities\fP" +.br +.ti -1c +.RI "int \fBhardware_intr_sig\fP" +.br +.ti -1c +.RI "int \fBcomponent_type\fP" +.br +.ti -1c +.RI "char * \fBpmu_names\fP [40]" +.br +.ti -1c +.RI "int \fBreserved\fP [8]" +.br +.ti -1c +.RI "unsigned int \fBhardware_intr\fP:1" +.br +.ti -1c +.RI "unsigned int \fBprecise_intr\fP:1" +.br +.ti -1c +.RI "unsigned int \fBposix1b_timers\fP:1" +.br +.ti -1c +.RI "unsigned int \fBkernel_profile\fP:1" +.br +.ti -1c +.RI "unsigned int \fBkernel_multiplex\fP:1" +.br +.ti -1c +.RI "unsigned int \fBfast_counter_read\fP:1" +.br +.ti -1c +.RI "unsigned int \fBfast_real_timer\fP:1" +.br +.ti -1c +.RI "unsigned int \fBfast_virtual_timer\fP:1" +.br +.ti -1c +.RI "unsigned int \fBattach\fP:1" +.br +.ti -1c +.RI "unsigned int \fBattach_must_ptrace\fP:1" +.br +.ti -1c +.RI "unsigned int \fBcntr_umasks\fP:1" +.br +.ti -1c +.RI "unsigned int \fBcpu\fP:1" +.br +.ti -1c +.RI "unsigned int \fBinherit\fP:1" +.br +.ti -1c +.RI "unsigned int \fBreserved_bits\fP:12" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "unsigned int PAPI_component_info_t::attach" +Supports attach +.SS "unsigned int PAPI_component_info_t::attach_must_ptrace" +Attach must first ptrace and stop the thread/process +.SS "int PAPI_component_info_t::available_domains" +Available domains +.SS "int PAPI_component_info_t::available_granularities" +Available granularities +.SS "int PAPI_component_info_t::CmpIdx" +Index into the vector array for this component; set at init time +.SS "unsigned int PAPI_component_info_t::cntr_umasks" +counters have unit masks +.SS "int PAPI_component_info_t::component_type" +Type of component +.SS "unsigned int PAPI_component_info_t::cpu" +Supports specifying cpu number to use with event set +.SS "int PAPI_component_info_t::default_domain" +The default domain when this component is used +.SS "int PAPI_component_info_t::default_granularity" +The default granularity when this component is used +.SS "char PAPI_component_info_t::description[128]" +Description of the component +.SS "int PAPI_component_info_t::disabled" +0 if enabled, otherwise error code from initialization +.SS "char PAPI_component_info_t::disabled_reason[128]" +Reason for failure of initialization +.SS "unsigned int PAPI_component_info_t::fast_counter_read" +Supports a user level PMC read instruction +.SS "unsigned int PAPI_component_info_t::fast_real_timer" +Supports a fast real timer +.SS "unsigned int PAPI_component_info_t::fast_virtual_timer" +Supports a fast virtual timer +.SS "unsigned int PAPI_component_info_t::hardware_intr" +hw overflow intr, does not need to be emulated in software +.SS "int PAPI_component_info_t::hardware_intr_sig" +Signal used by hardware to deliver PMC events +.SS "unsigned int PAPI_component_info_t::inherit" +Supports child processes inheriting parents counters +.SS "unsigned int PAPI_component_info_t::kernel_multiplex" +In kernel multiplexing +.SS "unsigned int PAPI_component_info_t::kernel_profile" +Has kernel profiling support (buffered interrupts or sprofil-like) +.SS "char PAPI_component_info_t::kernel_version[64]" +Version of the kernel PMC support driver +.SS "char PAPI_component_info_t::name[128]" +Name of the component we're using +.SS "int PAPI_component_info_t::num_cntrs" +Number of hardware counters the component supports +.SS "int PAPI_component_info_t::num_mpx_cntrs" +Number of hardware counters the component or PAPI can multiplex supports +.SS "int PAPI_component_info_t::num_native_events" +Number of native events the component supports +.SS "int PAPI_component_info_t::num_preset_events" +Number of preset events the component supports +.SS "char* PAPI_component_info_t::pmu_names[40]" +list of pmu names supported by this component +.SS "unsigned int PAPI_component_info_t::posix1b_timers" +Using POSIX 1b interval timers (timer_create) instead of setitimer +.SS "unsigned int PAPI_component_info_t::precise_intr" +Performance interrupts happen precisely +.SS "char PAPI_component_info_t::short_name[64]" + +.PP +.nf + Short name of component, + +.fi +.PP + to be prepended to event names +.SS "char PAPI_component_info_t::support_version[64]" +Version of the support library +.SS "char PAPI_component_info_t::version[64]" +Version of this component + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_cpu_option_t.3 b/man/man3/PAPI_cpu_option_t.3 new file mode 100644 index 0000000..1a5bfd8 --- /dev/null +++ b/man/man3/PAPI_cpu_option_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_cpu_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_cpu_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "unsigned int \fBcpu_num\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_create_eventset.3 b/man/man3/PAPI_create_eventset.3 new file mode 100644 index 0000000..d5945b6 --- /dev/null +++ b/man/man3/PAPI_create_eventset.3 @@ -0,0 +1,75 @@ +.TH "PAPI_create_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_create_eventset \- +.PP +Create a new empty PAPI EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +PAPI_create_eventset( int * EventSet ); + +PAPI_create_eventset creates a new EventSet pointed to by EventSet, +which must be initialized to PAPI_NULL before calling this routine. +The user may then add hardware events to the event set by calling +PAPI_add_event or similar routines. + +@note PAPI-C uses a late binding model to bind EventSets to components. +When an EventSet is first created it is not bound to a component. +This will cause some API calls that modify EventSet options to fail. +An EventSet can be bound to a component explicitly by calling +PAPI_assign_eventset_component or implicitly by calling PAPI_add_event +or similar routines. + +@param *EventSet + Address of an integer location to store the new EventSet handle. + +@exception PAPI_EINVAL + The argument handle has not been initialized to PAPI_NULL or the argument is a NULL pointer. + +@exception PAPI_ENOMEM + Insufficient memory to complete the operation. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_add_event\fP +.br + \fBPAPI_assign_eventset_component\fP +.br + \fBPAPI_destroy_eventset\fP +.br + \fBPAPI_cleanup_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_debug_option_t.3 b/man/man3/PAPI_debug_option_t.3 new file mode 100644 index 0000000..25c35d6 --- /dev/null +++ b/man/man3/PAPI_debug_option_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_debug_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_debug_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBlevel\fP" +.br +.ti -1c +.RI "PAPI_debug_handler_t \fBhandler\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_destroy_eventset.3 b/man/man3/PAPI_destroy_eventset.3 new file mode 100644 index 0000000..d1a8ca9 --- /dev/null +++ b/man/man3/PAPI_destroy_eventset.3 @@ -0,0 +1,67 @@ +.TH "PAPI_destroy_eventset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_destroy_eventset \- +.PP +Empty and destroy an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_destroy_eventset( int * EventSet ); + +.fi +.PP +.PP +\fBPAPI_destroy_eventset\fP deallocates the memory associated with an empty PAPI EventSet\&. +.PP +\fBParameters:\fP +.RS 4 +\fI*EventSet\fP A pointer to the integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP\&. The value pointed to by EventSet is then set to PAPI_NULL on success\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. Attempting to destroy a non-empty event set or passing in a null pointer to be destroyed\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_EBUG\fP Internal error, send mail to ptools-perfapi@icl.utk.edu and complain\&. +.RE +.PP +\fBExamples:\fP +.RS 4 + +.PP +.nf +* // Free all memory and data structures, EventSet must be empty\&. +* if ( PAPI_destroy_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.RE +.PP +.PP +.nf +@see PAPI_profil @n +PAPI_create_eventset @n +PAPI_add_event @n +PAPI_stop.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_detach.3 b/man/man3/PAPI_detach.3 new file mode 100644 index 0000000..cbe0f59 --- /dev/null +++ b/man/man3/PAPI_detach.3 @@ -0,0 +1,82 @@ +.TH "PAPI_detach" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_detach \- +.PP +Detach PAPI event set from previously specified thread id and restore to executing thread\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_detach( int EventSet, unsigned long tid ); + +PAPI_detach is a wrapper function that calls PAPI_set_opt to allow PAPI to +monitor performance counts on a thread other than the one currently executing. +This is sometimes referred to as third party monitoring. +PAPI_attach connects the specified EventSet to the specifed thread; +PAPI_detach breaks that connection and restores the EventSet to the +original executing thread. + +@param EventSet + An integer handle for a PAPI EventSet as created by PAPI_create_eventset. +@param tid + A thread id as obtained from, for example, PAPI_list_threads or PAPI_thread_id. + +@retval PAPI_ECMP + This feature is unsupported on this component. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOEVST + The event set specified does not exist. +@retval PAPI_EISRUN + The event set is currently counting events. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* unsigned long pid; +* pid = fork( ); +* if ( pid <= 0 ) +* exit( 1 ); +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* exit( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) +* exit( 1 ); +* // Attach this EventSet to the forked process +* if ( PAPI_attach( EventSet, pid ) != PAPI_OK ) +* exit( 1 ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt\fP +.br + \fBPAPI_list_threads\fP +.br + \fBPAPI_thread_id\fP +.br + \fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_disable_component.3 b/man/man3/PAPI_disable_component.3 new file mode 100644 index 0000000..ab6f294 --- /dev/null +++ b/man/man3/PAPI_disable_component.3 @@ -0,0 +1,61 @@ +.TH "PAPI_disable_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_disable_component \- +.PP +disables the specified component + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval ENOCMP + component does not exist + @retval ENOINIT + cannot disable as PAPI has already been initialized + +@param cidx + component index of component to be disabled +@par Examples: + +.fi +.PP + +.PP +.nf + int cidx, result; + + cidx = PAPI_get_component_index("example"); + + if (cidx>=0) { + result = PAPI_disable_component(cidx); + if (result==PAPI_OK) + printf("The example component is disabled\n"); + } + // \&.\&.\&. + PAPI_library_init(); +* + +.fi +.PP + \fBPAPI_disable_component()\fP allows the user to disable components before \fBPAPI_library_init()\fP time\&. This is useful if the user knows they do not wish to use events from that component and want to reduce the PAPI library overhead\&. +.PP +\fBPAPI_disable_component()\fP must be called before \fBPAPI_library_init()\fP\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_event_component\fP +.PP +\fBPAPI_library_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_disable_component_by_name.3 b/man/man3/PAPI_disable_component_by_name.3 new file mode 100644 index 0000000..65bd018 --- /dev/null +++ b/man/man3/PAPI_disable_component_by_name.3 @@ -0,0 +1,55 @@ +.TH "PAPI_disable_component_by_name" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_disable_component_by_name \- +.PP +disables the named component + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +\retval ENOCMP + component does not exist +\retval ENOINIT + unable to disable the component, the library has already been initialized +\param component_name + name of the component to disable. +\par Example: + +.fi +.PP + +.PP +.nf + int result; + result = PAPI_disable_component_by_name("example"); + if (result==PAPI_OK) + printf("component \"example\" has been disabled\n"); + //\&.\&.\&. + PAPI_library_init(PAPI_VER_CURRENT); +* + +.fi +.PP + \fBPAPI_disable_component_by_name()\fP allows the user to disable a component before \fBPAPI_library_init()\fP time\&. This is useful if the user knows they do not with to use events from that component and want to reduce the PAPI library overhead\&. +.PP +\fBPAPI_disable_component_by_name()\fP must be called before \fBPAPI_library_init()\fP\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_library_init\fP +.PP +\fBPAPI_disable_component\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_dmem_info_t.3 b/man/man3/PAPI_dmem_info_t.3 new file mode 100644 index 0000000..7e47bc7 --- /dev/null +++ b/man/man3/PAPI_dmem_info_t.3 @@ -0,0 +1,55 @@ +.TH "PAPI_dmem_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_dmem_info_t \- +.PP +A pointer to the following is passed to \fBPAPI_get_dmem_info()\fP + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "long long \fBpeak\fP" +.br +.ti -1c +.RI "long long \fBsize\fP" +.br +.ti -1c +.RI "long long \fBresident\fP" +.br +.ti -1c +.RI "long long \fBhigh_water_mark\fP" +.br +.ti -1c +.RI "long long \fBshared\fP" +.br +.ti -1c +.RI "long long \fBtext\fP" +.br +.ti -1c +.RI "long long \fBlibrary\fP" +.br +.ti -1c +.RI "long long \fBheap\fP" +.br +.ti -1c +.RI "long long \fBlocked\fP" +.br +.ti -1c +.RI "long long \fBstack\fP" +.br +.ti -1c +.RI "long long \fBpagesize\fP" +.br +.ti -1c +.RI "long long \fBpte\fP" +.br +.in -1c + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_domain_option_t.3 b/man/man3/PAPI_domain_option_t.3 new file mode 100644 index 0000000..da336b6 --- /dev/null +++ b/man/man3/PAPI_domain_option_t.3 @@ -0,0 +1,32 @@ +.TH "PAPI_domain_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_domain_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBdef_cidx\fP" +.br +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "int \fBdomain\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "int PAPI_domain_option_t::def_cidx" +this structure requires a component index to set default domains + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_enum_cmp_event.3 b/man/man3/PAPI_enum_cmp_event.3 new file mode 100644 index 0000000..c81a5a9 --- /dev/null +++ b/man/man3/PAPI_enum_cmp_event.3 @@ -0,0 +1,173 @@ +.TH "PAPI_enum_cmp_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_enum_cmp_event \- +.PP +Enumerate PAPI preset or native events for a given component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_enum_cmp_event( int *EventCode, int modifer, int cidx ); + +Given an event code, PAPI_enum_event replaces the event +code with the next available event. + +The modifier argument affects which events are returned. +For all platforms and event types, a value of PAPI_ENUM_ALL (zero) +directs the function to return all possible events. @n + +For native events, the effect of the modifier argument may be + different on each platform. +See the discussion below for platform-specific definitions. + +@param *EventCode + A defined preset or native event such as PAPI_TOT_INS. +@param modifier + Modifies the search logic. See below for full list. + For native events, each platform behaves differently. + See platform-specific documentation for details. + + @param cidx + Specifies the component to search in + +@retval PAPI_ENOEVNT + The next requested PAPI preset or native event is not available on + the underlying hardware. + +@par Examples: + +.fi +.PP + +.PP +.nf +* // Scan for all supported native events on the first component +* printf( "Name\t\t\t Code\t Description\n" ); +* do { +* retval = PAPI_get_event_info( i, &info ); +* if ( retval == PAPI_OK ) { +* printf( "%-30s %#-10x\n%s\n", info\&.symbol, info\&.event_code, info\&.long_descr ); +* } +* } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_ALL, 0 ) == PAPI_OK ); +* + +.fi +.PP +.PP +\fBGeneric Modifiers\fP +.RS 4 +The following values are implemented for preset events +.PD 0 + +.IP "\(bu" 2 +PAPI_ENUM_EVENTS -- Enumerate all (default) +.IP "\(bu" 2 +PAPI_ENUM_FIRST -- Enumerate first event (preset or native) preset/native chosen based on type of EventCode +.PP +.RE +.PP +\fBNative Modifiers\fP +.RS 4 +The following values are implemented for native events +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_UMASKS -- Given an event, iterate through possible umasks one at a time +.IP "\(bu" 2 +PAPI_NTV_ENUM_UMASK_COMBOS -- Given an event, iterate through all possible combinations of umasks\&. This is not implemented on libpfm4\&. +.PP +.RE +.PP +\fBPreset Modifiers\fP +.RS 4 +The following values are implemented for preset events +.PD 0 + +.IP "\(bu" 2 +PAPI_PRESET_ENUM_AVAIL -- enumerate only available presets +.IP "\(bu" 2 +PAPI_PRESET_ENUM_MSC -- Miscellaneous preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_INS -- Instruction related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_IDL -- Stalled or Idle preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_BR -- Branch related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_CND -- Conditional preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_MEM -- Memory related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_CACH -- Cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L1 -- L1 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L2 -- L2 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L3 -- L3 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_TLB -- Translation Lookaside Buffer events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_FP -- Floating Point related preset events +.PP +.RE +.PP +\fBITANIUM Modifiers\fP +.RS 4 +The following values are implemented for modifier on Itanium: +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_IARR - Enumerate IAR (instruction address ranging) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_DARR - Enumerate DAR (data address ranging) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_OPCM - Enumerate OPC (opcode matching) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_IEAR - Enumerate IEAR (instr event address register) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_DEAR - Enumerate DEAR (data event address register) events +.PP +.RE +.PP +\fBPOWER Modifiers\fP +.RS 4 +The following values are implemented for POWER +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_GROUPS - Enumerate groups to which an event belongs +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +PAPI +.br + PAPIF +.br + \fBPAPI_enum_event\fP +.br + \fBPAPI_get_event_info\fP +.br + \fBPAPI_event_name_to_code\fP +.br + PAPI_preset +.br + PAPI_native +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_enum_event.3 b/man/man3/PAPI_enum_event.3 new file mode 100644 index 0000000..194e3b2 --- /dev/null +++ b/man/man3/PAPI_enum_event.3 @@ -0,0 +1,171 @@ +.TH "PAPI_enum_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_enum_event \- +.PP +Enumerate PAPI preset or native events\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_enum_event( int * EventCode, int modifer ); + +Given a preset or native event code, PAPI_enum_event replaces the event +code with the next available event in either the preset or native table. +The modifier argument affects which events are returned. +For all platforms and event types, a value of PAPI_ENUM_ALL (zero) +directs the function to return all possible events. @n + +For preset events, a TRUE (non-zero) value currently directs the function +to return event codes only for PAPI preset events available on this platform. +This may change in the future. +For native events, the effect of the modifier argument is different on each platform. +See the discussion below for platform-specific definitions. + +@param *EventCode + A defined preset or native event such as PAPI_TOT_INS. +@param modifier + Modifies the search logic. See below for full list. + For native events, each platform behaves differently. + See platform-specific documentation for details. + +@retval PAPI_ENOEVNT + The next requested PAPI preset or native event is not available on + the underlying hardware. + +@par Examples: + +.fi +.PP + +.PP +.nf +* // Scan for all supported native events on this platform +* printf( "Name\t\t\t Code\t Description\n" ); +* do { +* retval = PAPI_get_event_info( i, &info ); +* if ( retval == PAPI_OK ) { +* printf( "%-30s %#-10x\n%s\n", info\&.symbol, info\&.event_code, info\&.long_descr ); +* } +* } while ( PAPI_enum_event( &i, PAPI_ENUM_ALL ) == PAPI_OK ); +* + +.fi +.PP +.PP +\fBGeneric Modifiers\fP +.RS 4 +The following values are implemented for preset events +.PD 0 + +.IP "\(bu" 2 +PAPI_ENUM_EVENTS -- Enumerate all (default) +.IP "\(bu" 2 +PAPI_ENUM_FIRST -- Enumerate first event (preset or native) preset/native chosen based on type of EventCode +.PP +.RE +.PP +\fBNative Modifiers\fP +.RS 4 +The following values are implemented for native events +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_UMASKS -- Given an event, iterate through possible umasks one at a time +.IP "\(bu" 2 +PAPI_NTV_ENUM_UMASK_COMBOS -- Given an event, iterate through all possible combinations of umasks\&. This is not implemented on libpfm4\&. +.PP +.RE +.PP +\fBPreset Modifiers\fP +.RS 4 +The following values are implemented for preset events +.PD 0 + +.IP "\(bu" 2 +PAPI_PRESET_ENUM_AVAIL -- enumerate only available presets +.IP "\(bu" 2 +PAPI_PRESET_ENUM_MSC -- Miscellaneous preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_INS -- Instruction related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_IDL -- Stalled or Idle preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_BR -- Branch related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_CND -- Conditional preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_MEM -- Memory related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_CACH -- Cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L1 -- L1 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L2 -- L2 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_L3 -- L3 cache related preset events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_TLB -- Translation Lookaside Buffer events +.IP "\(bu" 2 +PAPI_PRESET_ENUM_FP -- Floating Point related preset events +.PP +.RE +.PP +\fBITANIUM Modifiers\fP +.RS 4 +The following values are implemented for modifier on Itanium: +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_IARR - Enumerate IAR (instruction address ranging) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_DARR - Enumerate DAR (data address ranging) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_OPCM - Enumerate OPC (opcode matching) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_IEAR - Enumerate IEAR (instr event address register) events +.IP "\(bu" 2 +PAPI_NTV_ENUM_DEAR - Enumerate DEAR (data event address register) events +.PP +.RE +.PP +\fBPOWER Modifiers\fP +.RS 4 +The following values are implemented for POWER +.PD 0 + +.IP "\(bu" 2 +PAPI_NTV_ENUM_GROUPS - Enumerate groups to which an event belongs +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +PAPI +.br + PAPIF +.br + \fBPAPI_enum_cmp_event\fP +.br + \fBPAPI_get_event_info\fP +.br + \fBPAPI_event_name_to_code\fP +.br + PAPI_preset +.br + PAPI_native +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_epc.3 b/man/man3/PAPI_epc.3 new file mode 100644 index 0000000..f70bc4e --- /dev/null +++ b/man/man3/PAPI_epc.3 @@ -0,0 +1,72 @@ +.TH "PAPI_epc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_epc \- +.PP +Simplified call to get arbitrary events per cycle, real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_epc( int event, float *rtime, float *ptime, long long *ref, long long *core, long long *evt, float *epc )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIevent\fP event code to be measured (0 defaults to PAPI_TOT_INS) +.br +\fI*rtime\fP total realtime since the first call +.br +\fI*ptime\fP total process time since the first call +.br +\fI*ref\fP incremental reference clock cycles since the last call +.br +\fI*core\fP incremental core clock cycles since the last call +.br +\fI*evt\fP total events since the first call +.br +\fI*epc\fP incremental events per cycle since the last call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_epc()\fP\&. +.br +\fIPAPI_ENOEVNT\fP One of the requested events does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_epc()\fP will initialize the PAPI High Level interface, set up the counters to monitor the user specified event, PAPI_TOT_CYC, and PAPI_REF_CYC (if it exists) and start the counters\&. +.PP +Subsequent calls will read the counters and return total real time, total process time, total event counts since the start of the measurement and the core and reference cycle count and EPC rate since the latest call to \fBPAPI_epc()\fP\&. +.PP +A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. +.PP +\fBPAPI_epc\fP can provide a more detailed look at algorithm efficiency in light of clock variability in modern cpus\&. MFLOPS is no longer an adequate description of peak performance if clock rates can arbitrarily speed up or slow down\&. By allowing a user specified event and reporting reference cycles, core cycles and real time, \fBPAPI_epc\fP provides the information to compute an accurate effective clock rate, and an accurate measure of computational throughput\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips()\fP +.PP +\fBPAPI_flops()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_stop_counters()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_event_code_to_name.3 b/man/man3/PAPI_event_code_to_name.3 new file mode 100644 index 0000000..6d44243 --- /dev/null +++ b/man/man3/PAPI_event_code_to_name.3 @@ -0,0 +1,87 @@ +.TH "PAPI_event_code_to_name" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_event_code_to_name \- +.PP +Convert a numeric hardware event code to a name\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_event_code_to_name( int EventCode, char * EventName ); + +PAPI_event_code_to_name is used to translate a 32-bit integer PAPI event +code into an ASCII PAPI event name. +Either Preset event codes or Native event codes can be passed to this routine. +Native event codes and names differ from platform to platform. + +@param EventCode + The numeric code for the event. +@param *EventName + A string containing the event name as listed in PAPI_presets or discussed in PAPI_native. + +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOTPRESET + The hardware event specified is not a valid PAPI preset. +@retval PAPI_ENOEVNT + The hardware event is not available on the underlying hardware. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventCode, EventSet = PAPI_NULL; +* int Event, number; +* char EventCodeStr[PAPI_MAX_STR_LEN]; +* // Create the EventSet +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) +* handle_error( 1 ); +* number = 1; +* if ( PAPI_list_events( EventSet, &Event, &number ) != PAPI_OK ) +* handle_error(1); +* // Convert integer code to name string +* if ( PAPI_event_code_to_name( Event, EventCodeStr ) != PAPI_OK ) +* handle_error( 1 ); +* printf( "Event Name: %s\n", EventCodeStr ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_name_to_code\fP +.PP +\fBPAPI_remove_event\fP +.PP +\fBPAPI_get_event_info\fP +.PP +\fBPAPI_enum_event\fP +.PP +\fBPAPI_add_event\fP +.PP +PAPI_presets +.PP +PAPI_native +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_event_info_t.3 b/man/man3/PAPI_event_info_t.3 new file mode 100644 index 0000000..60bcab6 --- /dev/null +++ b/man/man3/PAPI_event_info_t.3 @@ -0,0 +1,144 @@ +.TH "PAPI_event_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_event_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "unsigned int \fBevent_code\fP" +.br +.ti -1c +.RI "char \fBsymbol\fP [1024]" +.br +.ti -1c +.RI "char \fBshort_descr\fP [64]" +.br +.ti -1c +.RI "char \fBlong_descr\fP [1024]" +.br +.ti -1c +.RI "int \fBcomponent_index\fP" +.br +.ti -1c +.RI "char \fBunits\fP [64]" +.br +.ti -1c +.RI "int \fBlocation\fP" +.br +.ti -1c +.RI "int \fBdata_type\fP" +.br +.ti -1c +.RI "int \fBvalue_type\fP" +.br +.ti -1c +.RI "int \fBtimescope\fP" +.br +.ti -1c +.RI "int \fBupdate_type\fP" +.br +.ti -1c +.RI "int \fBupdate_freq\fP" +.br +.ti -1c +.RI "unsigned int \fBcount\fP" +.br +.ti -1c +.RI "unsigned int \fBevent_type\fP" +.br +.ti -1c +.RI "char \fBderived\fP [64]" +.br +.ti -1c +.RI "char \fBpostfix\fP [256]" +.br +.ti -1c +.RI "unsigned int \fBcode\fP [12]" +.br +.ti -1c +.RI "char \fBname\fP [12][256]" +.br +.ti -1c +.RI "char \fBnote\fP [1024]" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "unsigned int PAPI_event_info_t::code[12]" +array of values that further describe the event: +.IP "\(bu" 2 +presets: native event_code values +.IP "\(bu" 2 +native:, register values(?) +.PP + +.SS "int PAPI_event_info_t::component_index" +component this event belongs to +.SS "unsigned int PAPI_event_info_t::count" +number of terms (usually 1) in the code and name fields +.IP "\(bu" 2 +presets: these are native events +.IP "\(bu" 2 +native: these are unused +.PP + +.SS "int PAPI_event_info_t::data_type" +data type returned by PAPI +.SS "char PAPI_event_info_t::derived[64]" +name of the derived type +.IP "\(bu" 2 +presets: usually NOT_DERIVED +.IP "\(bu" 2 +native: empty string +.PP + +.SS "unsigned int PAPI_event_info_t::event_code" +preset (0x8xxxxxxx) or native (0x4xxxxxxx) event code +.SS "unsigned int PAPI_event_info_t::event_type" +event type or category for preset events only +.SS "int PAPI_event_info_t::location" +location event applies to +.SS "char PAPI_event_info_t::long_descr[1024]" +a longer description: typically a sentence for presets, possibly a paragraph from vendor docs for native events +.SS "char PAPI_event_info_t::name[12][256]" +< names of code terms: - presets: native event names, +.IP "\(bu" 2 +native: descriptive strings for each register value(?) +.PP + +.SS "char PAPI_event_info_t::note[1024]" + +.PP +.nf + an optional developer note + supplied with a preset event + to delineate platform specific + +.fi +.PP + anomalies or restrictions +.SS "char PAPI_event_info_t::postfix[256]" +string containing postfix operations; only defined for preset events of derived type DERIVED_POSTFIX +.SS "char PAPI_event_info_t::short_descr[64]" +a short description suitable for use as a label +.SS "char PAPI_event_info_t::symbol[1024]" +name of the event +.SS "int PAPI_event_info_t::timescope" +from start, etc\&. +.SS "char PAPI_event_info_t::units[64]" +units event is measured in +.SS "int PAPI_event_info_t::update_freq" +how frequently event is updated +.SS "int PAPI_event_info_t::update_type" +how event is updated +.SS "int PAPI_event_info_t::value_type" +sum or absolute + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_event_name_to_code.3 b/man/man3/PAPI_event_name_to_code.3 new file mode 100644 index 0000000..7a01ffe --- /dev/null +++ b/man/man3/PAPI_event_name_to_code.3 @@ -0,0 +1,83 @@ +.TH "PAPI_event_name_to_code" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_event_name_to_code \- +.PP +Convert a name to a numeric hardware event code\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_event_name_to_code( const char * EventName, int * EventCode ); + +PAPI_event_name_to_code is used to translate an ASCII PAPI event name +into an integer PAPI event code. + +@param *EventCode + The numeric code for the event. +@param *EventName + A string containing the event name as listed in PAPI_presets or discussed in PAPI_native. + +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOTPRESET + The hardware event specified is not a valid PAPI preset. +@retval PAPI_ENOINIT + The PAPI library has not been initialized. +@retval PAPI_ENOEVNT + The hardware event is not available on the underlying hardware. + +@par Examples: + +.fi +.PP + +.PP +.nf +* int EventCode, EventSet = PAPI_NULL; +* // Convert to integer +* if ( PAPI_event_name_to_code( "PAPI_TOT_INS", &EventCode ) != PAPI_OK ) +* handle_error( 1 ); +* // Create the EventSet +* if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) +* handle_error( 1 ); +* // Add Total Instructions Executed to our EventSet +* if ( PAPI_add_event( EventSet, EventCode ) != PAPI_OK ) +* handle_error( 1 ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_code_to_name\fP +.PP +\fBPAPI_remove_event\fP +.PP +\fBPAPI_get_event_info\fP +.PP +\fBPAPI_enum_event\fP +.PP +\fBPAPI_add_event\fP +.PP +\fBPAPI_add_named_event\fP +.PP +PAPI_presets +.PP +PAPI_native +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_exe_info_t.3 b/man/man3/PAPI_exe_info_t.3 new file mode 100644 index 0000000..899e249 --- /dev/null +++ b/man/man3/PAPI_exe_info_t.3 @@ -0,0 +1,31 @@ +.TH "PAPI_exe_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_exe_info_t \- +.PP +get the executable's info + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "char \fBfullname\fP [1024]" +.br +.ti -1c +.RI "\fBPAPI_address_map_t\fP \fBaddress_info\fP" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "\fBPAPI_address_map_t\fP PAPI_exe_info_t::address_info" +executable's address space info +.SS "char PAPI_exe_info_t::fullname[1024]" +path + name + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_flips.3 b/man/man3/PAPI_flips.3 new file mode 100644 index 0000000..9db5ecc --- /dev/null +++ b/man/man3/PAPI_flips.3 @@ -0,0 +1,64 @@ +.TH "PAPI_flips" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_flips \- +.PP +Simplified call to get Mflips/s (floating point instruction rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_flips( float *rtime, float *ptime, long long *flpins, float *mflips )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*rtime\fP total realtime since the first call +.br +\fI*ptime\fP total process time since the first call +.br +\fI*flpins\fP total floating point instructions since the first call +.br +\fI*mflips\fP incremental (Mega) floating point instructions per seconds since the last call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flips()\fP\&. +.br +\fIPAPI_ENOEVNT\fP The floating point instructions event does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_flips()\fP will initialize the PAPI High Level interface, set up the counters to monitor the PAPI_FP_INS event and start the counters\&. +.PP +Subsequent calls will read the counters and return total real time, total process time, total floating point instructions since the start of the measurement and the Mflip/s rate since latest call to \fBPAPI_flips()\fP\&. A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. +.PP +\fBPAPI_flips\fP returns information related to floating point instructions using the PAPI_FP_INS event\&. This is intended to measure instruction rate through the floating point pipe with no massaging\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flops()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_epc()\fP +.PP +\fBPAPI_stop_counters()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_flops.3 b/man/man3/PAPI_flops.3 new file mode 100644 index 0000000..a2601f3 --- /dev/null +++ b/man/man3/PAPI_flops.3 @@ -0,0 +1,64 @@ +.TH "PAPI_flops" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_flops \- +.PP +Simplified call to get Mflops/s (floating point operation rate), real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_flops( float *rtime, float *ptime, long long *flpops, float *mflops )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*rtime\fP total realtime since the first call +.br +\fI*ptime\fP total process time since the first call +.br +\fI*flpops\fP total floating point operations since the first call +.br +\fI*mflops\fP incremental (Mega) floating point operations per seconds since the last call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_flops()\fP\&. +.br +\fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_flops()\fP will initialize the PAPI High Level interface, set up the counters to monitor the PAPI_FP_OPS event and start the counters\&. +.PP +Subsequent calls will read the counters and return total real time, total process time, total floating point operations since the start of the measurement and the Mflop/s rate since latest call to \fBPAPI_flops()\fP\&. A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. +.PP +\fBPAPI_flops\fP returns information related to theoretical floating point operations rather than simple instructions\&. It uses the PAPI_FP_OPS event which attempts to 'correctly' account for, e\&.g\&., FMA undercounts and FP Store overcounts, etc\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips()\fP +.PP +\fBPAPI_ipc()\fP +.PP +\fBPAPI_epc()\fP +.PP +\fBPAPI_stop_counters()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_cmp_opt.3 b/man/man3/PAPI_get_cmp_opt.3 new file mode 100644 index 0000000..e4aba73 --- /dev/null +++ b/man/man3/PAPI_get_cmp_opt.3 @@ -0,0 +1,50 @@ +.TH "PAPI_get_cmp_opt" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_cmp_opt \- +.PP +Get component specific PAPI options\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBParameters:\fP +.RS 4 +\fIoption\fP is an input parameter describing the course of action\&. Possible values are defined in \fBpapi\&.h\fP and briefly described in the table below\&. The Fortran calls are implementations of specific options\&. +.br +\fIptr\fP is a pointer to a structure that acts as both an input and output parameter\&. +.br +\fIcidx\fP An integer identifier for a component\&. By convention, component 0 is always the cpu component\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.RE +.PP +\fBPAPI_get_opt()\fP and \fBPAPI_set_opt()\fP query or change the options of the PAPI library or a specific event set created by \fBPAPI_create_eventset\fP \&. Some options may require that the eventset be bound to a component before they can execute successfully\&. This can be done either by adding an event or by explicitly calling \fBPAPI_assign_eventset_component\fP \&. +.PP +The C interface for these functions passes a pointer to the \fBPAPI_option_t\fP structure\&. Not all options require or return information in this structure, and not all options are implemented for both get and set\&. Some options require a component index to be provided\&. These options are handled explicitly by the \fBPAPI_get_cmp_opt()\fP call for 'get' and implicitly through the option structure for 'set'\&. The Fortran interface is a series of calls implementing various subsets of the C interface\&. Not all options in C are available in Fortran\&. +.PP +\fBNote:\fP +.RS 4 +Some options, such as PAPI_DOMAIN and PAPI_MULTIPLEX, are also available as separate entry points in both C and Fortran\&. +.RE +.PP +The reader is urged to see the example code in the PAPI distribution for usage of \fBPAPI_get_opt\fP\&. The file \fBpapi\&.h\fP contains definitions for the structures unioned in the \fBPAPI_option_t\fP structure\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_debug\fP \fBPAPI_set_multiplex\fP \fBPAPI_set_domain\fP \fBPAPI_option_t\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_component_index.3 b/man/man3/PAPI_get_component_index.3 new file mode 100644 index 0000000..f4594a2 --- /dev/null +++ b/man/man3/PAPI_get_component_index.3 @@ -0,0 +1,49 @@ +.TH "PAPI_get_component_index" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_component_index \- +.PP +returns the component index for the named component + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval ENOCMP + component does not exist + +@param name + name of component to find index for +@par Examples: + +.fi +.PP + +.PP +.nf + int cidx; + cidx = PAPI_get_component_index("cuda"); + if (cidx==PAPI_OK) { + printf("The CUDA component is cidx %d\n",cidx); + } +* + +.fi +.PP + \fBPAPI_get_component_index()\fP returns the component index of the named component\&. This is useful for finding out if a specified component exists\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_event_component\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_component_info.3 b/man/man3/PAPI_get_component_info.3 new file mode 100644 index 0000000..3f2fec2 --- /dev/null +++ b/man/man3/PAPI_get_component_info.3 @@ -0,0 +1,62 @@ +.TH "PAPI_get_component_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_component_info \- +.PP +get information about a specific software component + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@param cidx + Component index + +This function returns a pointer to a structure containing detailed +information about a specific software component in the PAPI library. +This includes versioning information, preset and native event +information, and more. +For full details, see @ref PAPI_component_info_t. + +@par Examples: + +.fi +.PP + +.PP +.nf + const PAPI_component_info_t *cmpinfo = NULL; + if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) + exit(1); + if ((cmpinfo = PAPI_get_component_info(0)) == NULL) + exit(1); + printf("This component supports %d Preset Events and %d Native events\&.\n", + cmpinfo->num_preset_events, cmpinfo->num_native_events); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_executable_info\fP +.PP +\fBPAPI_get_hardware_info\fP +.PP +\fBPAPI_get_dmem_info\fP +.PP +\fBPAPI_get_opt\fP +.PP +\fBPAPI_component_info_t\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_dmem_info.3 b/man/man3/PAPI_get_dmem_info.3 new file mode 100644 index 0000000..8a7e87e --- /dev/null +++ b/man/man3/PAPI_get_dmem_info.3 @@ -0,0 +1,51 @@ +.TH "PAPI_get_dmem_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_dmem_info \- +.PP +Get information about the dynamic memory usage of the current program\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Prototype:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_get_dmem_info( PAPI_dmem_info_t *dest )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIdest\fP structure to be filled in \fBPAPI_dmem_info_t\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ECMP\fP The funtion is not implemented for the current component\&. +.br +\fIPAPI_EINVAL\fP Any value in the structure or array may be undefined as indicated by this error value\&. +.br +\fIPAPI_SYS\fP A system error occured\&. +.RE +.PP +\fBNote:\fP +.RS 4 +This function is only implemented for the Linux operating system\&. This function takes a pointer to a \fBPAPI_dmem_info_t\fP structure and returns with the structure fields filled in\&. A value of PAPI_EINVAL in any field indicates an undefined parameter\&. +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_executable_info\fP \fBPAPI_get_hardware_info\fP \fBPAPI_get_opt\fP \fBPAPI_library_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_event_component.3 b/man/man3/PAPI_get_event_component.3 new file mode 100644 index 0000000..10ae0ab --- /dev/null +++ b/man/man3/PAPI_get_event_component.3 @@ -0,0 +1,46 @@ +.TH "PAPI_get_event_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_event_component \- +.PP +return component an event belongs to + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval ENOCMP + component does not exist + +@param EventCode + EventCode for which we want to know the component index +@par Examples: + +.fi +.PP + +.PP +.nf + int cidx,eventcode; + cidx = PAPI_get_event_component(eventcode); +* + +.fi +.PP + \fBPAPI_get_event_component()\fP returns the component an event belongs to\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_event_info\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_event_info.3 b/man/man3/PAPI_get_event_info.3 new file mode 100644 index 0000000..073b1d8 --- /dev/null +++ b/man/man3/PAPI_get_event_info.3 @@ -0,0 +1,43 @@ +.TH "PAPI_get_event_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_event_info \- +.PP +Get the event's name and description info\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBParameters:\fP +.RS 4 +\fIEventCode\fP event code (preset or native) +.br +\fIinfo\fP structure with the event information \fBPAPI_event_info_t\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOTPRESET\fP The PAPI preset mask was set, but the hardware event specified is not a valid PAPI preset\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +This function fills the event information into a structure\&. In Fortran, some fields of the structure are returned explicitly\&. This function works with existing PAPI preset and native event codes\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_name_to_code\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_eventset_component.3 b/man/man3/PAPI_get_eventset_component.3 new file mode 100644 index 0000000..bba9f41 --- /dev/null +++ b/man/man3/PAPI_get_eventset_component.3 @@ -0,0 +1,50 @@ +.TH "PAPI_get_eventset_component" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_eventset_component \- +.PP +return index for component an eventset is assigned to + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval PAPI_ENOEVST + eventset does not exist +@retval PAPI_ENOCMP + component is invalid or does not exist +@retval positive value + valid component index + +@param EventSet + EventSet for which we want to know the component index +@par Examples: + +.fi +.PP + +.PP +.nf + int cidx,eventcode; + cidx = PAPI_get_eventset_component(eventset); +* + +.fi +.PP + \fBPAPI_get_eventset_component()\fP returns the component an event belongs to\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_event_component\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_executable_info.3 b/man/man3/PAPI_get_executable_info.3 new file mode 100644 index 0000000..4dfc0cd --- /dev/null +++ b/man/man3/PAPI_get_executable_info.3 @@ -0,0 +1,71 @@ +.TH "PAPI_get_executable_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_executable_info \- +.PP +Get the executable's address space info\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +const PAPI_exe_info_t *PAPI_get_executable_info( void ); + +This function returns a pointer to a structure containing information +about the current program. + +@param fullname + Fully qualified path + filename of the executable. +@param name + Filename of the executable with no path information. +@param text_start, text_end + Start and End addresses of program text segment. +@param data_start, data_end + Start and End addresses of program data segment. +@param bss_start, bss_end + Start and End addresses of program bss segment. + +@retval PAPI_EINVAL + One or more of the arguments is invalid. + +@par Examples: + +.fi +.PP + +.PP +.nf +* const PAPI_exe_info_t *prginfo = NULL; +* if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) +* exit( 1 ); +* printf( "Path+Program: %s\n", exeinfo->fullname ); +* printf( "Program: %s\n", exeinfo->address_info\&.name ); +* printf( "Text start: %p, Text end: %p\n", exeinfo->address_info\&.text_start, exeinfo->address_info\&.text_end) ; +* printf( "Data start: %p, Data end: %p\n", exeinfo->address_info\&.data_start, exeinfo->address_info\&.data_end ); +* printf( "Bss start: %p, Bss end: %p\n", exeinfo->address_info\&.bss_start, exeinfo->address_info\&.bss_end ); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_opt\fP +.PP +\fBPAPI_get_hardware_info\fP +.PP +\fBPAPI_exe_info_t\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_hardware_info.3 b/man/man3/PAPI_get_hardware_info.3 new file mode 100644 index 0000000..e3a4111 --- /dev/null +++ b/man/man3/PAPI_get_hardware_info.3 @@ -0,0 +1,59 @@ +.TH "PAPI_get_hardware_info" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_hardware_info \- +.PP +get information about the system hardware + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +In C, this function returns a pointer to a structure containing information about the hardware on which the program runs. + In Fortran, the values of the structure are returned explicitly. + +@retval PAPI_EINVAL + One or more of the arguments is invalid. + +.fi +.PP +.PP +.PP +.nf +@note The C structure contains detailed information about cache and TLB sizes. + This information is not available from Fortran. + +@par Examples: +.fi +.PP + +.PP +.nf + const PAPI_hw_info_t *hwinfo = NULL; + if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) + exit(1); + if ((hwinfo = PAPI_get_hardware_info()) == NULL) + exit(1); + printf("%d CPUs at %f Mhz\&.\en",hwinfo->totalcpus,hwinfo->mhz); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_hw_info_t\fP +.PP +\fBPAPI_get_executable_info\fP, \fBPAPI_get_opt\fP, \fBPAPI_get_dmem_info\fP, \fBPAPI_library_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_multiplex.3 b/man/man3/PAPI_get_multiplex.3 new file mode 100644 index 0000000..e7ccc22 --- /dev/null +++ b/man/man3/PAPI_get_multiplex.3 @@ -0,0 +1,97 @@ +.TH "PAPI_get_multiplex" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_multiplex \- +.PP +Get the multiplexing status of specified event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_get_multiplex( int EventSet )\fP; +.RE +.PP +\fBFortran Interface:\fP +.RS 4 +#include fpapi\&.h +.br + \fBPAPIF_get_multiplex( C_INT EventSet, C_INT check )\fP +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid, or the EventSet is already multiplexed\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +\fBPAPI_get_multiplex\fP tests the state of the PAPI_MULTIPLEXING flag in the specified event set, returning \fITRUE\fP if a PAPI event set is multiplexed, or FALSE if not\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int EventSet = PAPI_NULL; +* int ret; +* +* // Create an empty EventSet +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Bind it to the CPU component +* ret = PAPI_assign_eventset_component(EventSet, 0); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Check current multiplex status +* ret = PAPI_get_multiplex(EventSet); +* if (ret == TRUE) printf("This event set is ready for multiplexing\n\&.") +* if (ret == FALSE) printf("This event set is not enabled for multiplexing\n\&.") +* if (ret < 0) handle_error(ret); +* +* // Turn on multiplexing +* ret = PAPI_set_multiplex(EventSet); +* if ((ret == PAPI_EINVAL) && (PAPI_get_multiplex(EventSet) == TRUE)) +* printf("This event set already has multiplexing enabled\n"); +* else if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP + +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_multiplex_init\fP +.PP +\fBPAPI_set_opt\fP +.PP +\fBPAPI_create_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_opt.3 b/man/man3/PAPI_get_opt.3 new file mode 100644 index 0000000..3e54e38 --- /dev/null +++ b/man/man3/PAPI_get_opt.3 @@ -0,0 +1,100 @@ +.TH "PAPI_get_opt" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_opt \- +.PP +Get PAPI library or event set options\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_get_opt( int option, PAPI_option_t * ptr )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIoption\fP Defines the option to get\&. Possible values are briefly described in the table below\&. +.br +\fIptr\fP Pointer to a structure determined by the selected option\&. See \fBPAPI_option_t\fP for a description of possible structures\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP The specified option or parameter is invalid\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_ECMP\fP The option is not implemented for the current component\&. +.br +\fIPAPI_ENOINIT\fP PAPI has not been initialized\&. +.RE +.PP +\fBPAPI_get_opt()\fP queries the options of the PAPI library or a specific event set created by \fBPAPI_create_eventset\fP\&. Some options may require that the eventset be bound to a component before they can execute successfully\&. This can be done either by adding an event or by explicitly calling \fBPAPI_assign_eventset_component\fP\&. +.PP +Ptr is a pointer to the \fBPAPI_option_t\fP structure, which is actually a union of different structures for different options\&. Not all options require or return information in these structures\&. Each returns different values in the structure\&. Some options require a component index to be provided\&. These options are handled explicitly by the \fBPAPI_get_cmp_opt()\fP call\&. +.PP +\fBNote:\fP +.RS 4 +Some options, such as PAPI_DOMAIN and PAPI_MULTIPLEX are also available as separate entry points in both C and Fortran\&. +.RE +.PP +The reader is encouraged to peruse the ctests code in the PAPI distribution for examples of usage of \fBPAPI_set_opt\fP\&. +.PP +\fBPossible values for the PAPI_get_opt option parameter\fP +.RS 4 + +OPTION DEFINITION +PAPI_DEFDOM Get default counting domain for newly created event sets. Requires a component index. +PAPI_DEFGRN Get default counting granularity. Requires a component index. +PAPI_DEBUG Get the PAPI debug state and the debug handler. The debug state is specified in ptr->debug.level. The debug handler is specified in ptr->debug.handler. + For further information regarding debug states and the behavior of the handler, see PAPI_set_debug. +PAPI_MULTIPLEX Get current multiplexing state for specified EventSet. +PAPI_DEF_ITIMER Get the type of itimer used in software multiplexing, overflowing and profiling. +PAPI_DEF_MPX_NS Get the sampling time slice in nanoseconds for multiplexing and overflow. +PAPI_DEF_ITIMER_NS See PAPI_DEF_MPX_NS. +PAPI_ATTACH Get thread or process id to which event set is attached. Returns TRUE if currently attached. +PAPI_CPU_ATTACH Get ptr->cpu.cpu_num and Attach state for EventSet specified in ptr->cpu.eventset. +PAPI_DETACH Get thread or process id to which event set is attached. Returns TRUE if currently attached. +PAPI_DOMAIN Get domain for EventSet specified in ptr->domain.eventset. Will error if eventset is not bound to a component. +PAPI_GRANUL Get granularity for EventSet specified in ptr->granularity.eventset. Will error if eventset is not bound to a component. +PAPI_INHERIT Get current inheritance state for specified EventSet. +PAPI_PRELOAD Get LD_PRELOAD environment equivalent. +PAPI_CLOCKRATE Get clockrate in MHz. +PAPI_MAX_CPUS Get number of CPUs. +PAPI_EXEINFO Get Executable addresses for text/data/bss. +PAPI_HWINFO Get information about the hardware. +PAPI_LIB_VERSION Get the full PAPI version of the library. +PAPI_MAX_HWCTRS Get number of counters. Requires a component index. +PAPI_MAX_MPX_CTRS Get maximum number of multiplexing counters. Requires a component index. +PAPI_SHLIBINFO Get shared library information used by the program. +PAPI_COMPONENTINFO Get the PAPI features the specified component supports. Requires a component index. + +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_multiplex\fP +.PP +\fBPAPI_get_cmp_opt\fP +.PP +\fBPAPI_set_opt\fP +.PP +\fBPAPI_option_t\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_overflow_event_index.3 b/man/man3/PAPI_get_overflow_event_index.3 new file mode 100644 index 0000000..4e405d8 --- /dev/null +++ b/man/man3/PAPI_get_overflow_event_index.3 @@ -0,0 +1,65 @@ +.TH "PAPI_get_overflow_event_index" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_overflow_event_index \- +.PP +converts an overflow vector into an array of indexes to overflowing events + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@param EventSet + an integer handle to a PAPI event set as created by PAPI_create_eventset +@param overflow_vector + a vector with bits set for each counter that overflowed. + This vector is passed by the system to the overflow handler routine. +@param *array + an array of indexes for events in EventSet. + No more than *number indexes will be stored into the array. +@param *number + On input the variable determines the size of the array. + On output the variable contains the number of indexes in the array. + +@retval PAPI_EINVAL + One or more of the arguments is invalid. This could occur if the overflow_vector is empty (zero), if the array or number pointers are NULL, if the value of number is less than one, or if the EventSet is empty. +@retval PAPI_ENOEVST +The EventSet specified does not exist. +@par Examples + +.fi +.PP + +.PP +.nf + void handler(int EventSet, void *address, long_long overflow_vector, void *context){ + int Events[4], number, i; + int total = 0, retval; + printf("Overflow #%d\n Handler(%d) Overflow at %p! vector=%#llx\n", + total, EventSet, address, overflow_vector); + total++; + number = 4; + retval = PAPI_get_overflow_event_index(EventSet, + overflow_vector, Events, &number); + if(retval == PAPI_OK) + for(i=0; i @n + int PAPI_get_thr_specific( int tag, void **ptr ); + +@param tag + An identifier, the value of which is either PAPI_USR1_TLS or + PAPI_USR2_TLS. This identifier indicates which of several data + structures associated with this thread is to be accessed. +@param ptr + A pointer to the memory containing the data structure. + +@retval PAPI_OK +@retval PAPI_EINVAL + The @em tag argument is out of range. + +In C, PAPI_get_thr_specific PAPI_get_thr_specific will retrieve the pointer from the array with index @em tag. +There are 2 user available locations and @em tag can be either +PAPI_USR1_TLS or PAPI_USR2_TLS. +The array mentioned above is managed by PAPI and allocated to each +thread which has called PAPI_thread_init. +There is no Fortran equivalent function. + +@par Example: + +.fi +.PP + +.PP +.nf + int ret; + HighLevelInfo *state = NULL; + ret = PAPI_thread_init(pthread_self); + if (ret != PAPI_OK) handle_error(ret); + + // Do we have the thread specific data setup yet? + +ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); +if (ret != PAPI_OK || state == NULL) { + state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + if (state == NULL) return (PAPI_ESYS); + memset(state, 0, sizeof(HighLevelInfo)); + state->EventSet = PAPI_NULL; + ret = PAPI_create_eventset(&state->EventSet); + if (ret != PAPI_OK) return (PAPI_ESYS); + ret = PAPI_set_thr_specific(PAPI_USR1_TLS, state); + if (ret != PAPI_OK) return (ret); +} +* + +.fi +.PP + +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_register_thread\fP \fBPAPI_thread_init\fP \fBPAPI_thread_id\fP \fBPAPI_set_thr_specific\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_virt_cyc.3 b/man/man3/PAPI_get_virt_cyc.3 new file mode 100644 index 0000000..ccf28b4 --- /dev/null +++ b/man/man3/PAPI_get_virt_cyc.3 @@ -0,0 +1,55 @@ +.TH "PAPI_get_virt_cyc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_virt_cyc \- +.PP +get virtual time counter value in clock cycles + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval PAPI_ECNFLCT + If there is no master event set. + This will happen if the library has not been initialized, or + for threaded applications, if there has been no thread id + function defined by the PAPI_thread_init function. +@retval PAPI_ENOMEM + For threaded applications, if there has not yet been any thread + specific master event created for the current thread, and if + the allocation of such an event set fails, the call will return + PAPI_ENOMEM or PAPI_ESYS . + +This function returns the total number of virtual units from some +arbitrary starting point. +Virtual units accrue every time the process is running in user-mode on +behalf of the process. +Like the real time counters, this count is guaranteed to exist on every platform +PAPI supports. +However on some platforms, the resolution can be as bad as 1/Hz as defined +by the operating system. +@par Examples: + +.fi +.PP + +.PP +.nf + s = PAPI_get_virt_cyc(); + your_slow_code(); + e = PAPI_get_virt_cyc(); + printf("Process has run for cycles: %lld\en",e-s); +* + +.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_virt_nsec.3 b/man/man3/PAPI_get_virt_nsec.3 new file mode 100644 index 0000000..3a7d43f --- /dev/null +++ b/man/man3/PAPI_get_virt_nsec.3 @@ -0,0 +1,27 @@ +.TH "PAPI_get_virt_nsec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_virt_nsec \- +.PP +Get virtual time counter values in nanoseconds\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ECNFLCT\fP If there is no master event set\&. This will happen if the library has not been initialized, or for threaded applications, if there has been no thread id function defined by the \fBPAPI_thread_init\fP function\&. +.br +\fIPAPI_ENOMEM\fP For threaded applications, if there has not yet been any thread specific master event created for the current thread, and if the allocation of such an event set fails, the call will return PAPI_ENOMEM or PAPI_ESYS \&. +.RE +.PP +This function returns the total number of virtual units from some arbitrary starting point\&. Virtual units accrue every time the process is running in user-mode on behalf of the process\&. Like the real time counters, this count is guaranteed to exist on every platform PAPI supports\&. However on some platforms, the resolution can be as bad as 1/Hz as defined by the operating system\&. + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_get_virt_usec.3 b/man/man3/PAPI_get_virt_usec.3 new file mode 100644 index 0000000..f139bf1 --- /dev/null +++ b/man/man3/PAPI_get_virt_usec.3 @@ -0,0 +1,68 @@ +.TH "PAPI_get_virt_usec" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_get_virt_usec \- +.PP +get virtual time counter values in microseconds + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval PAPI_ECNFLCT + If there is no master event set. + This will happen if the library has not been initialized, or for threaded + applications, if there has been no thread id function defined by the + PAPI_thread_init function. +@retval PAPI_ENOMEM + For threaded applications, if there has not yet been any thread + specific master event created for the current thread, and if the + allocation of such an event set fails, the call will return PAPI_ENOMEM or PAPI_ESYS . + +This function returns the total number of virtual units from some +arbitrary starting point. +Virtual units accrue every time the process is running in user-mode on +behalf of the process. +Like the real time counters, this count is guaranteed to exist on every +platform PAPI supports. However on some platforms, the resolution can be +as bad as 1/Hz as defined by the operating system. +@par Examples: + +.fi +.PP + +.PP +.nf + s = PAPI_get_virt_cyc(); + your_slow_code(); + e = PAPI_get_virt_cyc(); + printf("Process has run for cycles: %lld\en",e-s); +* + +.fi +.PP + +.PP +\fBSee Also:\fP +.RS 4 +PAPIF +.PP +PAPI +.PP +PAPI +.PP +\fBPAPI_get_real_cyc\fP +.PP +\fBPAPI_get_virt_cyc\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_granularity_option_t.3 b/man/man3/PAPI_granularity_option_t.3 new file mode 100644 index 0000000..79a5d1a --- /dev/null +++ b/man/man3/PAPI_granularity_option_t.3 @@ -0,0 +1,32 @@ +.TH "PAPI_granularity_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_granularity_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBdef_cidx\fP" +.br +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "int \fBgranularity\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "int PAPI_granularity_option_t::def_cidx" +this structure requires a component index to set default granularity + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_hw_info_t.3 b/man/man3/PAPI_hw_info_t.3 new file mode 100644 index 0000000..a025bcc --- /dev/null +++ b/man/man3/PAPI_hw_info_t.3 @@ -0,0 +1,134 @@ +.TH "PAPI_hw_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_hw_info_t \- +.PP +Hardware info structure\&. + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBncpu\fP" +.br +.ti -1c +.RI "int \fBthreads\fP" +.br +.ti -1c +.RI "int \fBcores\fP" +.br +.ti -1c +.RI "int \fBsockets\fP" +.br +.ti -1c +.RI "int \fBnnodes\fP" +.br +.ti -1c +.RI "int \fBtotalcpus\fP" +.br +.ti -1c +.RI "int \fBvendor\fP" +.br +.ti -1c +.RI "char \fBvendor_string\fP [128]" +.br +.ti -1c +.RI "int \fBmodel\fP" +.br +.ti -1c +.RI "char \fBmodel_string\fP [128]" +.br +.ti -1c +.RI "float \fBrevision\fP" +.br +.ti -1c +.RI "int \fBcpuid_family\fP" +.br +.ti -1c +.RI "int \fBcpuid_model\fP" +.br +.ti -1c +.RI "int \fBcpuid_stepping\fP" +.br +.ti -1c +.RI "int \fBcpu_max_mhz\fP" +.br +.ti -1c +.RI "int \fBcpu_min_mhz\fP" +.br +.ti -1c +.RI "\fBPAPI_mh_info_t\fP \fBmem_hierarchy\fP" +.br +.ti -1c +.RI "int \fBvirtualized\fP" +.br +.ti -1c +.RI "char \fBvirtual_vendor_string\fP [128]" +.br +.ti -1c +.RI "char \fBvirtual_vendor_version\fP [128]" +.br +.ti -1c +.RI "float \fBmhz\fP" +.br +.ti -1c +.RI "int \fBclock_mhz\fP" +.br +.ti -1c +.RI "int \fBreserved\fP [8]" +.br +.in -1c +.SH "Field Documentation" +.PP +.SS "int PAPI_hw_info_t::clock_mhz" +Deprecated +.SS "int PAPI_hw_info_t::cores" +Number of cores per socket +.SS "int PAPI_hw_info_t::cpu_max_mhz" +Maximum supported CPU speed +.SS "int PAPI_hw_info_t::cpu_min_mhz" +Minimum supported CPU speed +.SS "int PAPI_hw_info_t::cpuid_family" +cpuid family +.SS "int PAPI_hw_info_t::cpuid_model" +cpuid model +.SS "int PAPI_hw_info_t::cpuid_stepping" +cpuid stepping +.SS "\fBPAPI_mh_info_t\fP PAPI_hw_info_t::mem_hierarchy" +PAPI memory heirarchy description +.SS "float PAPI_hw_info_t::mhz" +Deprecated +.SS "int PAPI_hw_info_t::model" +Model number of CPU +.SS "char PAPI_hw_info_t::model_string[128]" +Model string of CPU +.SS "int PAPI_hw_info_t::ncpu" +Number of CPUs per NUMA Node +.SS "int PAPI_hw_info_t::nnodes" +Total Number of NUMA Nodes +.SS "float PAPI_hw_info_t::revision" +Revision of CPU +.SS "int PAPI_hw_info_t::sockets" +Number of sockets +.SS "int PAPI_hw_info_t::threads" +Number of hdw threads per core +.SS "int PAPI_hw_info_t::totalcpus" +Total number of CPUs in the entire system +.SS "int PAPI_hw_info_t::vendor" +Vendor number of CPU +.SS "char PAPI_hw_info_t::vendor_string[128]" +Vendor string of CPU +.SS "char PAPI_hw_info_t::virtual_vendor_string[128]" +Vendor for virtual machine +.SS "char PAPI_hw_info_t::virtual_vendor_version[128]" +Version of virtual machine +.SS "int PAPI_hw_info_t::virtualized" +Running in virtual machine + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_inherit_option_t.3 b/man/man3/PAPI_inherit_option_t.3 new file mode 100644 index 0000000..b3e4ce5 --- /dev/null +++ b/man/man3/PAPI_inherit_option_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_inherit_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_inherit_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "int \fBinherit\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_ipc.3 b/man/man3/PAPI_ipc.3 new file mode 100644 index 0000000..8362469 --- /dev/null +++ b/man/man3/PAPI_ipc.3 @@ -0,0 +1,66 @@ +.TH "PAPI_ipc" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_ipc \- +.PP +Simplified call to get instructions per cycle, real and processor time\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface: \fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_ipc( float *rtime, float *ptime, long long *ins, float *ipc )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*rtime\fP total realtime since the first call +.br +\fI*ptime\fP total process time since the first call +.br +\fI*ins\fP total instructions since the first call +.br +\fI*ipc\fP incremental instructions per cycle since the last call +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP The counters were already started by something other than \fBPAPI_ipc()\fP\&. +.br +\fIPAPI_ENOEVNT\fP The floating point operations event does not exist\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.RE +.PP +The first call to \fBPAPI_ipc()\fP will initialize the PAPI High Level interface, set up the counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events and start the counters\&. +.PP +Subsequent calls will read the counters and return total real time, total process time, total instructions since the start of the measurement and the IPC rate since the latest call to \fBPAPI_ipc()\fP\&. +.PP +A call to \fBPAPI_stop_counters()\fP will stop the counters from running and then calls such as \fBPAPI_start_counters()\fP or other rate calls can safely be used\&. +.PP +\fBPAPI_ipc\fP should return a ratio greater than 1\&.0, indicating instruction level parallelism within the chip\&. The larger this ratio the more effeciently the program is running\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_flips()\fP +.PP +\fBPAPI_flops()\fP +.PP +\fBPAPI_epc()\fP +.PP +\fBPAPI_stop_counters()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_is_initialized.3 b/man/man3/PAPI_is_initialized.3 new file mode 100644 index 0000000..9af2c81 --- /dev/null +++ b/man/man3/PAPI_is_initialized.3 @@ -0,0 +1,63 @@ +.TH "PAPI_is_initialized" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_is_initialized \- +.PP +check for initialization + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval PAPI_NOT_INITED + Library has not been initialized +@retval PAPI_LOW_LEVEL_INITED + Low level has called library init +@retval PAPI_HIGH_LEVEL_INITED + High level has called library init +@retval PAPI_THREAD_LEVEL_INITED + Threads have been inited + +@param version + upon initialization, PAPI checks the argument against the internal value of PAPI_VER_CURRENT when the library was compiled. +This guards against portability problems when updating the PAPI shared libraries on your system. +@par Examples: + +.fi +.PP + +.PP +.nf + int retval; + retval = PAPI_library_init(PAPI_VER_CURRENT); + if (retval != PAPI_VER_CURRENT && retval > 0) { + fprintf(stderr,"PAPI library version mismatch!\en"); + exit(1); } + if (retval < 0) + handle_error(retval); + retval = PAPI_is_initialized(); + if (retval != PAPI_LOW_LEVEL_INITED) + handle_error(retval); +* + +.fi +.PP + \fBPAPI_is_initialized()\fP returns the status of the PAPI library\&. The PAPI library can be in one of four states, as described under RETURN VALUES\&. +.PP +\fBSee Also:\fP +.RS 4 +PAPI +.PP +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_itimer_option_t.3 b/man/man3/PAPI_itimer_option_t.3 new file mode 100644 index 0000000..0bd42e4 --- /dev/null +++ b/man/man3/PAPI_itimer_option_t.3 @@ -0,0 +1,31 @@ +.TH "PAPI_itimer_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_itimer_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBitimer_num\fP" +.br +.ti -1c +.RI "int \fBitimer_sig\fP" +.br +.ti -1c +.RI "int \fBns\fP" +.br +.ti -1c +.RI "int \fBflags\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_library_init.3 b/man/man3/PAPI_library_init.3 new file mode 100644 index 0000000..d811f5d --- /dev/null +++ b/man/man3/PAPI_library_init.3 @@ -0,0 +1,69 @@ +.TH "PAPI_library_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_library_init \- +.PP +initialize the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@param version + upon initialization, PAPI checks the argument against the internal + value of PAPI_VER_CURRENT when the library was compiled. + This guards against portability problems when updating the PAPI shared + libraries on your system. + +@retval PAPI_EINVAL + papi.h is different from the version used to compile the PAPI library. +@retval PAPI_ENOMEM + Insufficient memory to complete the operation. +@retval PAPI_ECMP + This component does not support the underlying hardware. +@retval PAPI_ESYS + A system or C library call failed inside PAPI, see the errno variable. + +PAPI_library_init() initializes the PAPI library. +PAPI_is_initialized() check for initialization. +It must be called before any low level PAPI functions can be used. +If your application is making use of threads PAPI_thread_init must also be +called prior to making any calls to the library other than PAPI_library_init() . +@par Examples: + +.fi +.PP + +.PP +.nf +* int retval; +* retval = PAPI_library_init(PAPI_VER_CURRENT); +* if (retval != PAPI_VER_CURRENT && retval > 0) { +* fprintf(stderr,"PAPI library version mismatch!\en"); +* exit(1); } +* if (retval < 0) +* handle_error(retval); +* retval = PAPI_is_initialized(); +* if (retval != PAPI_LOW_LEVEL_INITED) +* handle_error(retval) +* + +.fi +.PP + +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_thread_init\fP PAPI +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_list_events.3 b/man/man3/PAPI_list_events.3 new file mode 100644 index 0000000..7d770c5 --- /dev/null +++ b/man/man3/PAPI_list_events.3 @@ -0,0 +1,101 @@ +.TH "PAPI_list_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_list_events \- +.PP +list the events in an event set + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +List the events in an event set\&. +.PP +.PP +.nf +PAPI_list_events() returns an array of events and a count of the +.fi +.PP + total number of events in an event set\&. This call assumes an initialized PAPI library and a successfully created event set\&. +.PP +\fBC Interface\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_list_events(int *EventSet, int *Events, int *number )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP An integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fI*Events\fP A pointer to a preallocated array of codes for events, such as PAPI_INT_INS\&. No more than *number codes will be stored into the array\&. +.br +\fI*number\fP On input, the size of the Events array, or maximum number of event codes to be returned\&. A value of 0 can be used to probe an event set\&. On output, the number of events actually in the event set\&. This value may be greater than the actually stored number of event codes\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP +.br +\fIPAPI_ENOEVST\fP +.RE +.PP +\fBExamples:\fP +.RS 4 + +.PP +.nf + if (PAPI_event_name_to_code("PAPI_TOT_INS",&EventCode) != PAPI_OK) + exit(1); + if (PAPI_add_event(EventSet, EventCode) != PAPI_OK) + exit(1); + Convert a second event name to an event code + if (PAPI_event_name_to_code("PAPI_L1_LDM",&EventCode) != PAPI_OK) + exit(1); + if (PAPI_add_event(EventSet, EventCode) != PAPI_OK) + exit(1); + number = 0; + if(PAPI_list_events(EventSet, NULL, &number)) + exit(1); + if(number != 2) + exit(1); + if(PAPI_list_events(EventSet, Events, &number)) + exit(1); +* + +.fi +.PP + +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_event_code_to_name\fP +.PP +\fBPAPI_event_name_to_code\fP +.PP +\fBPAPI_add_event\fP +.PP +\fBPAPI_create_eventset\fP +.RE +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + \fBPAPI_list_events\fP( C_INT EventSet, C_INT(*) Events, C_INT number, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_list_events\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_list_threads.3 b/man/man3/PAPI_list_threads.3 new file mode 100644 index 0000000..4b11f77 --- /dev/null +++ b/man/man3/PAPI_list_threads.3 @@ -0,0 +1,56 @@ +.TH "PAPI_list_threads" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_list_threads \- +.PP +List the registered thread ids\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +\fBPAPI_list_threads()\fP returns to the caller a list of all thread IDs known to PAPI\&. +.PP +This call assumes an initialized PAPI library\&. +.PP +\fBC Interface\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_list_threads(PAPI_thread_id_t *tids, int * number )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*tids\fP -- A pointer to a preallocated array\&. This may be NULL to only return a count of threads\&. No more than *number codes will be stored in the array\&. +.br +\fI*number\fP -- An input and output parameter\&. Input specifies the number of allocated elements in *tids (if non-NULL) and output specifies the number of threads\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP The call returned successfully\&. +.br +\fIPAPI_EINVAL\fP *number has an improper value +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_thr_specific\fP +.PP +\fBPAPI_set_thr_specific\fP +.PP +\fBPAPI_register_thread\fP +.PP +\fBPAPI_unregister_thread\fP +.PP +\fBPAPI_thread_init\fP \fBPAPI_thread_id\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_lock.3 b/man/man3/PAPI_lock.3 new file mode 100644 index 0000000..76e7dbf --- /dev/null +++ b/man/man3/PAPI_lock.3 @@ -0,0 +1,44 @@ +.TH "PAPI_lock" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_lock \- +.PP +Lock one of two mutex variables defined in \fBpapi\&.h\fP\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +\fBPAPI_lock()\fP grabs access to one of the two PAPI mutex variables\&. This function is provided to the user to have a platform independent call to a (hopefully) efficiently implemented mutex\&. +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +void \fBPAPI_lock(int lock)\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIlock\fP -- an integer value specifying one of the two user locks: PAPI_USR1_LOCK or PAPI_USR2_LOCK +.RE +.PP +\fBReturns:\fP +.RS 4 +There is no return value for this call\&. Upon return from \fBPAPI_lock\fP the current thread has acquired exclusive access to the specified PAPI mutex\&. +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_unlock\fP +.PP +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_mh_cache_info_t.3 b/man/man3/PAPI_mh_cache_info_t.3 new file mode 100644 index 0000000..5a139f3 --- /dev/null +++ b/man/man3/PAPI_mh_cache_info_t.3 @@ -0,0 +1,38 @@ +.TH "PAPI_mh_cache_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_mh_cache_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBtype\fP" +.br +.ti -1c +.RI "int \fBsize\fP" +.br +.ti -1c +.RI "int \fBline_size\fP" +.br +.ti -1c +.RI "int \fBnum_lines\fP" +.br +.ti -1c +.RI "int \fBassociativity\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "int PAPI_mh_cache_info_t::type" +Empty, instr, data, vector, trace, unified + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_mh_info_t.3 b/man/man3/PAPI_mh_info_t.3 new file mode 100644 index 0000000..afb7240 --- /dev/null +++ b/man/man3/PAPI_mh_info_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_mh_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_mh_info_t \- +.PP +mh for mem hierarchy maybe? + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBlevels\fP" +.br +.ti -1c +.RI "\fBPAPI_mh_level_t\fP \fBlevel\fP [4]" +.br +.in -1c + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_mh_level_t.3 b/man/man3/PAPI_mh_level_t.3 new file mode 100644 index 0000000..e007d16 --- /dev/null +++ b/man/man3/PAPI_mh_level_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_mh_level_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_mh_level_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "\fBPAPI_mh_tlb_info_t\fP \fBtlb\fP [6]" +.br +.ti -1c +.RI "\fBPAPI_mh_cache_info_t\fP \fBcache\fP [6]" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_mh_tlb_info_t.3 b/man/man3/PAPI_mh_tlb_info_t.3 new file mode 100644 index 0000000..b8f96cf --- /dev/null +++ b/man/man3/PAPI_mh_tlb_info_t.3 @@ -0,0 +1,35 @@ +.TH "PAPI_mh_tlb_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_mh_tlb_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBtype\fP" +.br +.ti -1c +.RI "int \fBnum_entries\fP" +.br +.ti -1c +.RI "int \fBpage_size\fP" +.br +.ti -1c +.RI "int \fBassociativity\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "int PAPI_mh_tlb_info_t::type" +Empty, instr, data, vector, unified + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_mpx_info_t.3 b/man/man3/PAPI_mpx_info_t.3 new file mode 100644 index 0000000..3243b86 --- /dev/null +++ b/man/man3/PAPI_mpx_info_t.3 @@ -0,0 +1,36 @@ +.TH "PAPI_mpx_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_mpx_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBtimer_sig\fP" +.br +.ti -1c +.RI "int \fBtimer_num\fP" +.br +.ti -1c +.RI "int \fBtimer_us\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "int PAPI_mpx_info_t::timer_num" +Number of the itimer or POSIX 1 timer used by the multiplex timer: PAPI_ITIMER +.SS "int PAPI_mpx_info_t::timer_sig" +Signal number used by the multiplex timer, 0 if not: PAPI_SIGNAL +.SS "int PAPI_mpx_info_t::timer_us" +uS between switching of sets: PAPI_MPX_DEF_US + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_multiplex_init.3 b/man/man3/PAPI_multiplex_init.3 new file mode 100644 index 0000000..df04006 --- /dev/null +++ b/man/man3/PAPI_multiplex_init.3 @@ -0,0 +1,63 @@ +.TH "PAPI_multiplex_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_multiplex_init \- +.PP +Initialize multiplex support in the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +PAPI_multiplex_init() enables and initializes multiplex support in + the PAPI library. +Multiplexing allows a user to count more events than total physical + counters by time sharing the existing counters at some loss in + precision. +Applications that make no use of multiplexing do not need to call + this routine. + +.fi +.PP +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_multiplex_init\fP (void); +.RE +.PP +\fBExamples\fP +.RS 4 + +.PP +.nf +* retval = PAPI_multiplex_init(); +* + +.fi +.PP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP This call always returns PAPI_OK +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_multiplex\fP +.PP +\fBPAPI_get_multiplex\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_multiplex_option_t.3 b/man/man3/PAPI_multiplex_option_t.3 new file mode 100644 index 0000000..5ed44a0 --- /dev/null +++ b/man/man3/PAPI_multiplex_option_t.3 @@ -0,0 +1,28 @@ +.TH "PAPI_multiplex_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_multiplex_option_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "int \fBeventset\fP" +.br +.ti -1c +.RI "int \fBns\fP" +.br +.ti -1c +.RI "int \fBflags\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_num_cmp_hwctrs.3 b/man/man3/PAPI_num_cmp_hwctrs.3 new file mode 100644 index 0000000..fbba417 --- /dev/null +++ b/man/man3/PAPI_num_cmp_hwctrs.3 @@ -0,0 +1,56 @@ +.TH "PAPI_num_cmp_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_num_cmp_hwctrs \- +.PP +Return the number of hardware counters for the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +\fBPAPI_num_cmp_hwctrs()\fP returns the number of counters present in the specified component\&. By convention, component 0 is always the cpu\&. +.PP +On some components, especially for CPUs, the value returned is a theoretical maximum for estimation purposes only\&. It might not be possible to easily create an EventSet that contains the full number of events\&. This can be due to a variety of reasons: 1)\&. Some CPUs (especially Intel and POWER) have the notion of fixed counters that can only measure one thing, usually cycles\&. 2)\&. Some CPUs have very explicit rules about which event can run in which counter\&. In this case it might not be possible to add a wanted event even if counters are free\&. 3)\&. Some CPUs halve the number of counters available when running with SMT (multiple CPU threads) enabled\&. 4)\&. Some operating systems 'steal' a counter to use for things such as NMI Watchdog timers\&. The only sure way to see if events will fit is to attempt adding events to an EventSet, and doing something sensible if an error is generated\&. +.PP +\fBPAPI_library_init()\fP must be called in order for this function to return anything greater than 0\&. +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_num_cmp_hwctrs(int cidx )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIcidx\fP -- An integer identifier for a component\&. By convention, component 0 is always the cpu component\&. +.RE +.PP +\fBExample\fP +.RS 4 + +.PP +.nf +* // Query the cpu component for the number of counters\&. +* printf(\"%d hardware counters found\&.\\n\", PAPI_num_cmp_hwctrs(0)); +* + +.fi +.PP +.RE +.PP +\fBReturns:\fP +.RS 4 +On success, this function returns a value greater than zero\&. +.br + A zero result usually means the library has not been initialized\&. +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_num_components.3 b/man/man3/PAPI_num_components.3 new file mode 100644 index 0000000..b64c196 --- /dev/null +++ b/man/man3/PAPI_num_components.3 @@ -0,0 +1,34 @@ +.TH "PAPI_num_components" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_num_components \- +.PP +Get the number of components available on the system\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@return + Number of components available on the system + +.fi +.PP +.PP +.PP +.nf +// Query the library for a component count\&. +printf("%d components installed\&., PAPI_num_components() ); + * +.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_num_counters.3 b/man/man3/PAPI_num_counters.3 new file mode 100644 index 0000000..ba36be0 --- /dev/null +++ b/man/man3/PAPI_num_counters.3 @@ -0,0 +1,67 @@ +.TH "PAPI_num_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_num_counters \- +.PP +Get the number of hardware counters available on the system\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_num_counters( void ); + +.fi +.PP +.PP +\fBPostcondition:\fP +.RS 4 +Initializes the library to PAPI_HIGH_LEVEL_INITED if necessary\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP \fBpapi\&.h\fP is different from the version used to compile the PAPI library\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.RE +.PP +\fBExamples:\fP +.RS 4 + +.PP +.nf +* int num_hwcntrs; +* // The installation does not support PAPI +* if ((num_hwcntrs = PAPI_num_counters()) < 0 ) +* handle_error(1); +* // The installation supports PAPI, but has no counters +* if ((num_hwcntrs = PAPI_num_counters()) == 0 ) +* fprintf(stderr,"Info:: This machine does not provide hardware counters\&.\n"); +* + +.fi +.PP +.RE +.PP +\fBPAPI_num_counters()\fP returns the optimal length of the values array for the high level functions\&. This value corresponds to the number of hardware counters supported by the current CPU component\&. +.PP +\fBNote:\fP +.RS 4 +This function only works for the CPU component\&. To determine the number of counters on another component, use the low level \fBPAPI_num_cmp_hwctrs()\fP\&. +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_num_events.3 b/man/man3/PAPI_num_events.3 new file mode 100644 index 0000000..313c9fc --- /dev/null +++ b/man/man3/PAPI_num_events.3 @@ -0,0 +1,63 @@ +.TH "PAPI_num_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_num_events \- +.PP +Return the number of events in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +\fBPAPI_num_events()\fP returns the number of preset and/or native events contained in an event set\&. The event set should be created by \fBPAPI_create_eventset\fP \&. +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_num_events(int EventSet )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI event set created by \fBPAPI_create_eventset\fP\&. +.br +\fI*count\fP -- (Fortran only) On output the variable contains the number of events in the event set +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIOn\fP success, this function returns the positive number of events in the event set\&. +.br +\fIPAPI_EINVAL\fP The event count is zero; only if code is compiled with debug enabled\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.RE +.PP +\fBExample\fP +.RS 4 + +.PP +.nf +* // Count the events in our EventSet +* printf(\"%d events found in EventSet\&.\\n\", PAPI_num_events(EventSet)); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_add_event\fP +.PP +\fBPAPI_create_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_num_hwctrs.3 b/man/man3/PAPI_num_hwctrs.3 new file mode 100644 index 0000000..207a8c1 --- /dev/null +++ b/man/man3/PAPI_num_hwctrs.3 @@ -0,0 +1,25 @@ +.TH "PAPI_num_hwctrs" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_num_hwctrs \- +.PP +Return the number of hardware counters on the cpu\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_num_cmp_hwctrs\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_option_t.3 b/man/man3/PAPI_option_t.3 new file mode 100644 index 0000000..6884909 --- /dev/null +++ b/man/man3/PAPI_option_t.3 @@ -0,0 +1,70 @@ +.TH "PAPI_option_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_option_t \- +.PP +A pointer to the following is passed to PAPI_set/get_opt() + +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "\fBPAPI_preload_info_t\fP \fBpreload\fP" +.br +.ti -1c +.RI "\fBPAPI_debug_option_t\fP \fBdebug\fP" +.br +.ti -1c +.RI "\fBPAPI_inherit_option_t\fP \fBinherit\fP" +.br +.ti -1c +.RI "\fBPAPI_granularity_option_t\fP \fBgranularity\fP" +.br +.ti -1c +.RI "\fBPAPI_granularity_option_t\fP \fBdefgranularity\fP" +.br +.ti -1c +.RI "\fBPAPI_domain_option_t\fP \fBdomain\fP" +.br +.ti -1c +.RI "\fBPAPI_domain_option_t\fP \fBdefdomain\fP" +.br +.ti -1c +.RI "\fBPAPI_attach_option_t\fP \fBattach\fP" +.br +.ti -1c +.RI "\fBPAPI_cpu_option_t\fP \fBcpu\fP" +.br +.ti -1c +.RI "\fBPAPI_multiplex_option_t\fP \fBmultiplex\fP" +.br +.ti -1c +.RI "\fBPAPI_itimer_option_t\fP \fBitimer\fP" +.br +.ti -1c +.RI "\fBPAPI_hw_info_t\fP * \fBhw_info\fP" +.br +.ti -1c +.RI "\fBPAPI_shlib_info_t\fP * \fBshlib_info\fP" +.br +.ti -1c +.RI "\fBPAPI_exe_info_t\fP * \fBexe_info\fP" +.br +.ti -1c +.RI "\fBPAPI_component_info_t\fP * \fBcmp_info\fP" +.br +.ti -1c +.RI "\fBPAPI_addr_range_option_t\fP \fBaddr\fP" +.br +.ti -1c +.RI "PAPI_user_defined_events_file_t \fBevents_file\fP" +.br +.in -1c + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_overflow.3 b/man/man3/PAPI_overflow.3 new file mode 100644 index 0000000..14a25cd --- /dev/null +++ b/man/man3/PAPI_overflow.3 @@ -0,0 +1,102 @@ +.TH "PAPI_overflow" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_overflow \- +.PP +Set up an event set to begin registering overflows\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +\fBPAPI_overflow()\fP marks a specific EventCode in an EventSet to generate an overflow signal after every threshold events are counted\&. More than one event in an event set can be used to trigger overflows\&. In such cases, the user must call this function once for each overflowing event\&. To turn off overflow on a specified event, call this function with a threshold value of 0\&. +.PP +Overflows can be implemented in either software or hardware, but the scope is the entire event set\&. PAPI defaults to hardware overflow if it is available\&. In the case of software overflow, a periodic timer interrupt causes PAPI to compare the event counts against the threshold values and call the overflow handler if one or more events have exceeded their threshold\&. In the case of hardware overflow, the counters are typically set to the negative of the threshold value and count up to 0\&. This zero-crossing triggers a hardware interrupt that calls the overflow handler\&. Because of this counter interrupt, the counter values for overflowing counters may be very small or even negative numbers, and cannot be relied upon as accurate\&. In such cases the overflow handler can approximate the counts by supplying the threshold value whenever an overflow occurs\&. +.PP +_papi_overflow_handler() is a placeholder for a user-defined function to process overflow events\&. A pointer to this function is passed to the \fBPAPI_overflow\fP routine, where it is invoked whenever a software or hardware overflow occurs\&. This handler receives the EventSet of the overflowing event, the Program Counter address when the interrupt occured, an overflow_vector that can be processed to determined which event(s) caused the overflow, and a pointer to the machine context, which can be used in a platform-specific manor to extract register information about what was happening when the overflow occured\&. +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_overflow\fP (int EventSet, int EventCode, int threshold, int flags, PAPI_overflow_handler_t handler ); +.br + +.br + (*PAPI_overflow_handler_t) _papi_overflow_handler (int EventSet, void *address, long_long overflow_vector, void *context ); +.RE +.PP +\fBFortran Interface:\fP +.RS 4 +Not implemented +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle to a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fIEventCode\fP -- the preset or native event code to be set for overflow detection\&. This event must have already been added to the EventSet\&. +.br +\fIthreshold\fP -- the overflow threshold value for this EventCode\&. +.br +\fIflags\fP -- bitmap that controls the overflow mode of operation\&. Set to PAPI_OVERFLOW_FORCE_SW to force software overflowing, even if hardware overflow support is available\&. If hardware overflow support is available on a given system, it will be the default mode of operation\&. There are situations where it is advantageous to use software overflow instead\&. Although software overflow is inherently less accurate, with more latency and processing overhead, it does allow for overflowing on derived events, and for the accurate recording of overflowing event counts\&. These two features are typically not available with hardware overflow\&. Only one type of overflow is allowed per event set, so setting one event to hardware overflow and another to forced software overflow will result in an error being returned\&. +.br +\fIhandler\fP -- pointer to the user supplied handler function to call upon overflow +.br +\fIaddress\fP -- the Program Counter address at the time of the overflow +.br +\fIoverflow_vector\fP -- a long long word containing flag bits to indicate which hardware counter(s) caused the overflow +.br +\fI*context\fP -- pointer to a machine specific structure that defines the register context at the time of overflow\&. This parameter is often unused and can be ignored in the user function\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP On success, \fBPAPI_overflow\fP returns PAPI_OK\&. +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. Most likely a bad threshold value\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_ECNFLCT\fP The underlying counter hardware cannot count this event and other events in the EventSet simultaneously\&. Also can happen if you are trying to overflow both by hardware and by forced software at the same time\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI event is not available on the underlying hardware\&. +.RE +.PP +\fBExample\fP +.RS 4 + +.PP +.nf +* // Define a simple overflow handler: +* void handler(int EventSet, void *address, long_long overflow_vector, void *context) +* { +* fprintf(stderr,\"Overflow at %p! bit=%#llx \\n\", +* address,overflow_vector); +* } +* +* // Call PAPI_overflow for an EventSet containing PAPI_TOT_INS, +* // setting the threshold to 100000\&. Use the handler defined above\&. +* retval = PAPI_overflow(EventSet, PAPI_TOT_INS, 100000, 0, handler); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_get_overflow_event_index\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_perror.3 b/man/man3/PAPI_perror.3 new file mode 100644 index 0000000..fe47775 --- /dev/null +++ b/man/man3/PAPI_perror.3 @@ -0,0 +1,70 @@ +.TH "PAPI_perror" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_perror \- +.PP +Produces a string on standard error, describing the last library error\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + void \fBPAPI_perror( const char *s )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIs\fP -- Optional message to print before the string describing the last error message\&. +.RE +.PP +The routine \fBPAPI_perror()\fP produces a message on the standard error output, describing the last error encountered during a call to PAPI\&. If s is not NULL, s is printed, followed by a colon and a space\&. Then the error message and a new-line are printed\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int ret; +* int EventSet = PAPI_NULL; +* int native = 0x0; +* +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) +* { +* fprintf(stderr, \"PAPI error %d: %s\\n\", ret, PAPI_strerror(retval)); +* exit(1); +* } +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) +* { +* PAPI_perror( "PAPI_add_event" ); +* exit(1); +* } +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_strerror\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_preload_info_t.3 b/man/man3/PAPI_preload_info_t.3 new file mode 100644 index 0000000..9ccf0c4 --- /dev/null +++ b/man/man3/PAPI_preload_info_t.3 @@ -0,0 +1,31 @@ +.TH "PAPI_preload_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_preload_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "char \fBlib_preload_env\fP [128]" +.br +.ti -1c +.RI "char \fBlib_preload_sep\fP" +.br +.ti -1c +.RI "char \fBlib_dir_env\fP [128]" +.br +.ti -1c +.RI "char \fBlib_dir_sep\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_profil.3 b/man/man3/PAPI_profil.3 new file mode 100644 index 0000000..1a3c0b8 --- /dev/null +++ b/man/man3/PAPI_profil.3 @@ -0,0 +1,185 @@ +.TH "PAPI_profil" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_profil \- +.PP +Generate a histogram of hardware counter overflows vs\&. PC addresses\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_profil\fP(void *buf, unsigned bufsiz, unsigned long offset, unsigned scale, int EventSet, int EventCode, int threshold, int flags ); +.RE +.PP +\fBFortran Interface\fP +.RS 4 +The profiling routines have no Fortran interface\&. +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*buf\fP -- pointer to a buffer of bufsiz bytes in which the histogram counts are stored in an array of unsigned short, unsigned int, or unsigned long long values, or 'buckets'\&. The size of the buckets is determined by values in the flags argument\&. +.br +\fIbufsiz\fP -- the size of the histogram buffer in bytes\&. It is computed from the length of the code region to be profiled, the size of the buckets, and the scale factor as discussed above\&. +.br +\fIoffset\fP -- the start address of the region to be profiled\&. +.br +\fIscale\fP -- broadly and historically speaking, a contraction factor that indicates how much smaller the histogram buffer is than the region to be profiled\&. More precisely, scale is interpreted as an unsigned 16-bit fixed-point fraction with the decimal point implied on the left\&. Its value is the reciprocal of the number of addresses in a subdivision, per counter of histogram buffer\&. Below is a table of representative values for scale\&. +.br +\fIEventSet\fP -- The PAPI EventSet to profile\&. This EventSet is marked as profiling-ready, but profiling doesn't actually start until a \fBPAPI_start()\fP call is issued\&. +.br +\fIEventCode\fP -- Code of the Event in the EventSet to profile\&. This event must already be a member of the EventSet\&. +.br +\fIthreshold\fP -- minimum number of events that must occur before the PC is sampled\&. If hardware overflow is supported for your component, this threshold will trigger an interrupt when reached\&. Otherwise, the counters will be sampled periodically and the PC will be recorded for the first sample that exceeds the threshold\&. If the value of threshold is 0, profiling will be disabled for this event\&. +.br +\fIflags\fP -- bit pattern to control profiling behavior\&. Defined values are shown in the table above\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_ECNFLCT\fP The underlying counter hardware can not count this event and other events in the EventSet simultaneously\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBPAPI_profil()\fP provides hardware event statistics by profiling the occurence of specified hardware counter events\&. It is designed to mimic the UNIX SVR4 profil call\&. +.PP +The statistics are generated by creating a histogram of hardware counter event overflows vs\&. program counter addresses for the current process\&. The histogram is defined for a specific region of program code to be profiled, and the identified region is logically broken up into a set of equal size subdivisions, each of which corresponds to a count in the histogram\&. +.PP +With each hardware event overflow, the current subdivision is identified and its corresponding histogram count is incremented\&. These counts establish a relative measure of how many hardware counter events are occuring in each code subdivision\&. +.PP +The resulting histogram counts for a profiled region can be used to identify those program addresses that generate a disproportionately high percentage of the event of interest\&. +.PP +Events to be profiled are specified with the EventSet and EventCode parameters\&. More than one event can be simultaneously profiled by calling \fBPAPI_profil()\fP several times with different EventCode values\&. Profiling can be turned off for a given event by calling \fBPAPI_profil()\fP with a threshold value of 0\&. +.PP +\fBRepresentative values for the scale variable\fP +.RS 4 + + HEX DECIMAL DEFININTION + 0x20000 131072 Maps precisely one instruction address to a unique bucket in buf. + 0x10000 65536 Maps precisely two instruction addresses to a unique bucket in buf. + 0x0FFFF 65535 Maps approximately two instruction addresses to a unique bucket in buf. + 0x08000 32768 Maps every four instruction addresses to a bucket in buf. + 0x04000 16384 Maps every eight instruction addresses to a bucket in buf. + 0x00002 2 Maps all instruction addresses to the same bucket in buf. + 0x00001 1 Undefined. + 0x00000 0 Undefined. + +.RE +.PP +Historically, the scale factor was introduced to allow the allocation of buffers smaller than the code size to be profiled\&. Data and instruction sizes were assumed to be multiples of 16-bits\&. These assumptions are no longer necessarily true\&. \fBPAPI_profil()\fP has preserved the traditional definition of scale where appropriate, but deprecated the definitions for 0 and 1 (disable scaling) and extended the range of scale to include 65536 and 131072 to allow for exactly two addresses and exactly one address per profiling bucket\&. +.PP +The value of bufsiz is computed as follows: +.PP +bufsiz = (end - start)*(bucket_size/2)*(scale/65536) where +.PD 0 + +.IP "\(bu" 2 +bufsiz - the size of the buffer in bytes +.IP "\(bu" 2 +end, start - the ending and starting addresses of the profiled region +.IP "\(bu" 2 +bucket_size - the size of each bucket in bytes; 2, 4, or 8 as defined in flags +.PP +\fBDefined bits for the flags variable:\fP +.RS 4 + +.PD 0 + +.IP "\(bu" 2 +PAPI_PROFIL_POSIX Default type of profiling, similar to profil (3)\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_RANDOM Drop a random 25% of the samples\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_WEIGHTED Weight the samples by their value\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_COMPRESS Ignore samples as values in the hash buckets get big\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_BUCKET_16 Use unsigned short (16 bit) buckets, This is the default bucket\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_BUCKET_32 Use unsigned int (32 bit) buckets\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_BUCKET_64 Use unsigned long long (64 bit) buckets\&. +.br + +.IP "\(bu" 2 +PAPI_PROFIL_FORCE_SW Force software overflow in profiling\&. +.br + +.PP +.RE +.PP +\fBExample\fP +.RS 4 + +.PP +.nf +* int retval; +* unsigned long length; +* PAPI_exe_info_t *prginfo; +* unsigned short *profbuf; +* +* if ((prginfo = PAPI_get_executable_info()) == NULL) +* handle_error(1); +* +* length = (unsigned long)(prginfo->text_end - prginfo->text_start); +* +* profbuf = (unsigned short *)malloc(length); +* if (profbuf == NULL) +* handle_error(1); +* memset(profbuf,0x00,length); +* +* if ((retval = PAPI_profil(profbuf, length, start, 65536, EventSet, +* PAPI_FP_INS, 1000000, PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16)) +* != PAPI_OK) +* handle_error(retval); +* + +.fi +.PP +.RE +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_overflow\fP +.PP +\fBPAPI_sprofil\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_query_event.3 b/man/man3/PAPI_query_event.3 new file mode 100644 index 0000000..1bf78ec --- /dev/null +++ b/man/man3/PAPI_query_event.3 @@ -0,0 +1,74 @@ +.TH "PAPI_query_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_query_event \- +.PP +Query if PAPI event exists\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_query_event(int EventCode)\fP; +.RE +.PP +\fBPAPI_query_event()\fP asks the PAPI library if the PAPI Preset event can be counted on this architecture\&. If the event CAN be counted, the function returns PAPI_OK\&. If the event CANNOT be counted, the function returns an error code\&. This function also can be used to check the syntax of native and user events\&. +.PP +\fBParameters:\fP +.RS 4 +\fIEventCode\fP -- a defined event such as PAPI_TOT_INS\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBExamples\fP +.RS 4 + +.PP +.nf +* int retval; +* // Initialize the library +* retval = PAPI_library_init(PAPI_VER_CURRENT); +* if (retval != PAPI_VER_CURRENT) { +* fprintf(stderr,\"PAPI library init error!\\n\"); +* exit(1); +* } +* if (PAPI_query_event(PAPI_TOT_INS) != PAPI_OK) { +* fprintf(stderr,\"No instruction counter? How lame\&.\\n\"); +* exit(1); +* } +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_remove_event\fP +.PP +\fBPAPI_remove_events\fP +.PP +PAPI_presets +.PP +PAPI_native +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_query_named_event.3 b/man/man3/PAPI_query_named_event.3 new file mode 100644 index 0000000..6e03c10 --- /dev/null +++ b/man/man3/PAPI_query_named_event.3 @@ -0,0 +1,68 @@ +.TH "PAPI_query_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_query_named_event \- +.PP +Query if a named PAPI event exists\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_query_named_event(const char *EventName)\fP; +.RE +.PP +\fBPAPI_query_named_event()\fP asks the PAPI library if the PAPI named event can be counted on this architecture\&. If the event CAN be counted, the function returns PAPI_OK\&. If the event CANNOT be counted, the function returns an error code\&. This function also can be used to check the syntax of native and user events\&. +.PP +\fBParameters:\fP +.RS 4 +\fIEventName\fP -- a defined event such as PAPI_TOT_INS\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBExamples\fP +.RS 4 + +.PP +.nf +* int retval; +* // Initialize the library +* retval = PAPI_library_init(PAPI_VER_CURRENT); +* if (retval != PAPI_VER_CURRENT) { +* fprintf(stderr,\"PAPI library init error!\\n\"); +* exit(1); +* } +* if (PAPI_query_named_event("PAPI_TOT_INS") != PAPI_OK) { +* fprintf(stderr,\"No instruction counter? How lame\&.\\n\"); +* exit(1); +* } +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_query_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_read.3 b/man/man3/PAPI_read.3 new file mode 100644 index 0000000..da8c643 --- /dev/null +++ b/man/man3/PAPI_read.3 @@ -0,0 +1,86 @@ +.TH "PAPI_read" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_read \- +.PP +Read hardware counters from an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_read(int EventSet, long_long * values )\fP; +.RE +.PP +\fBPAPI_read()\fP copies the counters of the indicated event set into the provided array\&. +.PP +The counters continue counting after the read\&. +.PP +Note the differences between \fBPAPI_read()\fP and \fBPAPI_accum()\fP, specifically that \fBPAPI_accum()\fP resets the values array to zero\&. +.PP +\fBPAPI_read()\fP assumes an initialized PAPI library and a properly added event set\&. +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI Event Set as created by \fBPAPI_create_eventset()\fP +.br +\fI*values\fP -- an array to hold the counter values of the counting events +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ENOEVST\fP The event set specified does not exist\&. +.RE +.PP +\fBExamples\fP +.RS 4 + +.PP +.nf +* do_100events(); +* if (PAPI_read(EventSet, values) != PAPI_OK) +* handle_error(1); +* // values[0] now equals 100 +* do_100events(); +* if (PAPI_accum(EventSet, values) != PAPI_OK) +* handle_error(1); +* // values[0] now equals 200 +* values[0] = -100; +* do_100events(); +* if (PAPI_accum(EventSet, values) != PAPI_OK) +* handle_error(1); +* // values[0] now equals 0 +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_accum\fP +.PP +\fBPAPI_start\fP +.PP +\fBPAPI_stop\fP +.PP +\fBPAPI_reset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_read_counters.3 b/man/man3/PAPI_read_counters.3 new file mode 100644 index 0000000..f7e45fc --- /dev/null +++ b/man/man3/PAPI_read_counters.3 @@ -0,0 +1,90 @@ +.TH "PAPI_read_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_read_counters \- +.PP +Read and reset counters\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_read_counters( long long *values, int array_len ); + +.fi +.PP +.PP +\fBParameters:\fP +.RS 4 +\fI*values\fP an array to hold the counter values of the counting events +.br +\fIarry_len\fP the number of items in the *events array +.RE +.PP +\fBPrecondition:\fP +.RS 4 +These calls assume an initialized PAPI library and a properly added event set\&. +.RE +.PP +\fBPostcondition:\fP +.RS 4 +The counters are reset and left running after the call\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.RE +.PP +\fBPAPI_read_counters()\fP copies the event counters into the array *values\&. +.PP +.PP +.nf +do_100events(); +if ( PAPI_read_counters( values, num_hwcntrs ) != PAPI_OK ) + handlw_error(1); +// values[0] now equals 100 +do_100events(); +if ( PAPI_accum_counters( values, num_hwcntrs ) != PAPI_OK ) + handle_error(1); +// values[0] now equals 200 +values[0] = -100; +do_100events(); +if ( PAPI_accum_counters(values, num_hwcntrs ) != PAPI_OK ) + handle_error(); +// values[0] now equals 0 + * +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_opt()\fP \fBPAPI_start_counters()\fP +.RE +.PP +\fBFortran Interface:\fP +.RS 4 +#include 'fpapi\&.h' +.br + PAPIF_read_counters( C_LONG_LONG(*) values, C_INT array_len, C_INT check ) +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read_counters\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_read_ts.3 b/man/man3/PAPI_read_ts.3 new file mode 100644 index 0000000..61bda71 --- /dev/null +++ b/man/man3/PAPI_read_ts.3 @@ -0,0 +1,75 @@ +.TH "PAPI_read_ts" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_read_ts \- +.PP +Read hardware counters with a timestamp\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_read_ts(int EventSet, long long *values, long long *cycles )\fP; +.RE +.PP +\fBPAPI_read_ts()\fP copies the counters of the indicated event set into the provided array\&. It also places a real-time cycle timestamp into the cycles array\&. +.PP +The counters continue counting after the read\&. +.PP +\fBPAPI_read_ts()\fP assumes an initialized PAPI library and a properly added event set\&. +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI Event Set as created by \fBPAPI_create_eventset()\fP +.br +\fI*values\fP -- an array to hold the counter values of the counting events +.br +\fI*cycles\fP -- an array to hold the timestamp values +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ENOEVST\fP The event set specified does not exist\&. +.RE +.PP +\fBExamples\fP +.RS 4 + +.PP +.nf +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read\fP +.PP +\fBPAPI_accum\fP +.PP +\fBPAPI_start\fP +.PP +\fBPAPI_stop\fP +.PP +\fBPAPI_reset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_register_thread.3 b/man/man3/PAPI_register_thread.3 new file mode 100644 index 0000000..1321b93 --- /dev/null +++ b/man/man3/PAPI_register_thread.3 @@ -0,0 +1,49 @@ +.TH "PAPI_register_thread" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_register_thread \- +.PP +Notify PAPI that a thread has 'appeared'\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br +int \fBPAPI_register_thread\fP (void); +.RE +.PP +\fBPAPI_register_thread()\fP should be called when the user wants to force PAPI to initialize a thread that PAPI has not seen before\&. +.PP +Usually this is not necessary as PAPI implicitly detects the thread when an eventset is created or other thread local PAPI functions are called\&. However, it can be useful for debugging and performance enhancements in the run-time systems of performance tools\&. +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOMEM\fP Space could not be allocated to store the new thread information\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ECMP\fP Hardware counters for this thread could not be initialized\&. +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_unregister_thread\fP +.PP +\fBPAPI_thread_id\fP +.PP +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_remove_event.3 b/man/man3/PAPI_remove_event.3 new file mode 100644 index 0000000..a005c13 --- /dev/null +++ b/man/man3/PAPI_remove_event.3 @@ -0,0 +1,101 @@ +.TH "PAPI_remove_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_remove_event \- +.PP +removes a hardware event from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +A hardware event can be either a PAPI Preset or a native hardware +event code. For a list of PAPI preset events, see PAPI_presets or +run the papi_avail utility in the PAPI distribution. PAPI Presets +can be passed to PAPI_query_event to see if they exist on the +underlying architecture. For a list of native events available on +the current platform, run papi_native_avail in the PAPI distribution. + +@par C Interface: +\#include @n +int PAPI_remove_event( int EventSet, int EventCode ); + +@param[in] EventSet + -- an integer handle for a PAPI event set as created + by PAPI_create_eventset +@param[in] EventCode + -- a defined event such as PAPI_TOT_INS or a native event. + +@retval PAPI_OK + Everything worked. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOEVST + The EventSet specified does not exist. +@retval PAPI_EISRUN + The EventSet is currently counting events. +@retval PAPI_ECNFLCT + The underlying counter hardware can not count this + event and other events in the EventSet simultaneously. +@retval PAPI_ENOEVNT + The PAPI preset is not available on the underlying hardware. + +@par Example: + +.fi +.PP + +.PP +.nf +* int EventSet = PAPI_NULL; +* int ret; +* +* // Create an empty EventSet +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Stop counting, ignore values +* ret = PAPI_stop(EventSet, NULL); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Remove event +* ret = PAPI_remove_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_cleanup_eventset\fP +.PP +\fBPAPI_destroy_eventset\fP +.PP +\fBPAPI_event_name_to_code\fP +.PP +PAPI_presets +.PP +\fBPAPI_add_event\fP +.PP +\fBPAPI_add_events\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_remove_events.3 b/man/man3/PAPI_remove_events.3 new file mode 100644 index 0000000..f5afdc8 --- /dev/null +++ b/man/man3/PAPI_remove_events.3 @@ -0,0 +1,90 @@ +.TH "PAPI_remove_events" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_remove_events \- +.PP +Remove an array of hardware event codes from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP +A hardware event can be either a PAPI Preset or a native hardware event code\&. For a list of PAPI preset events, see PAPI_presets or run the papi_avail utility in the PAPI distribution\&. PAPI Presets can be passed to \fBPAPI_query_event\fP to see if they exist on the underlying architecture\&. For a list of native events available on current platform, run papi_native_avail in the PAPI distribution\&. It should be noted that \fBPAPI_remove_events\fP can partially succeed, exactly like \fBPAPI_add_events\fP\&. +.PP +\fBC Prototype:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_remove_events( int EventSet, int * EventCode, int number )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fI*Events\fP an array of defined events +.br +\fInumber\fP an integer indicating the number of events in the array *EventCode +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPositive\fP integer The number of consecutive elements that succeeded before the error\&. +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_ECNFLCT\fP The underlying counter hardware can not count this event and other events in the EventSet simultaneously\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int EventSet = PAPI_NULL; +int Events[] = {PAPI_TOT_INS, PAPI_FP_OPS}; +int ret; + + // Create an empty EventSet +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + +// Add two events to our EventSet +ret = PAPI_add_events(EventSet, Events, 2); +if (ret != PAPI_OK) handle_error(ret); + +// Start counting +ret = PAPI_start(EventSet); +if (ret != PAPI_OK) handle_error(ret); + +// Stop counting, ignore values +ret = PAPI_stop(EventSet, NULL); +if (ret != PAPI_OK) handle_error(ret); + +// Remove event +ret = PAPI_remove_events(EventSet, Events, 2); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.RE +.PP +.PP +.nf +@see PAPI_cleanup_eventset PAPI_destroy_eventset PAPI_event_name_to_code + PAPI_presets PAPI_add_event PAPI_add_events.fi +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_remove_named_event.3 b/man/man3/PAPI_remove_named_event.3 new file mode 100644 index 0000000..325a6ce --- /dev/null +++ b/man/man3/PAPI_remove_named_event.3 @@ -0,0 +1,98 @@ +.TH "PAPI_remove_named_event" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_remove_named_event \- +.PP +removes a named hardware event from a PAPI event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +A hardware event can be either a PAPI Preset or a native hardware +event code. For a list of PAPI preset events, see PAPI_presets or +run the papi_avail utility in the PAPI distribution. PAPI Presets +can be passed to PAPI_query_event to see if they exist on the +underlying architecture. For a list of native events available on +the current platform, run papi_native_avail in the PAPI distribution. + +@par C Interface: +\#include @n +int PAPI_remove_named_event( int EventSet, const char *EventName ); + +@param[in] EventSet + -- an integer handle for a PAPI event set as created + by PAPI_create_eventset +@param[in] EventName + -- a defined event such as PAPI_TOT_INS or a native event. + +@retval PAPI_OK + Everything worked. +@retval PAPI_EINVAL + One or more of the arguments is invalid. +@retval PAPI_ENOINIT + The PAPI library has not been initialized. +@retval PAPI_ENOEVST + The EventSet specified does not exist. +@retval PAPI_EISRUN + The EventSet is currently counting events. +@retval PAPI_ECNFLCT + The underlying counter hardware can not count this + event and other events in the EventSet simultaneously. +@retval PAPI_ENOEVNT + The PAPI preset is not available on the underlying hardware. + +@par Example: + +.fi +.PP + +.PP +.nf +* char EventName = "PAPI_TOT_INS"; +* int EventSet = PAPI_NULL; +* int ret; +* +* // Create an empty EventSet +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_named_event(EventSet, EventName); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Stop counting, ignore values +* ret = PAPI_stop(EventSet, NULL); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Remove event +* ret = PAPI_remove_named_event(EventSet, EventName); +* if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_remove_event\fP +.br + \fBPAPI_query_named_event\fP +.br + \fBPAPI_add_named_event\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_reset.3 b/man/man3/PAPI_reset.3 new file mode 100644 index 0000000..92fc7e6 --- /dev/null +++ b/man/man3/PAPI_reset.3 @@ -0,0 +1,79 @@ +.TH "PAPI_reset" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_reset \- +.PP +Reset the hardware event counts in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Prototype: + \#include @n + int PAPI_reset( int EventSet ); + +@param EventSet + an integer handle for a PAPI event set as created by PAPI_create_eventset + +@retval PAPI_OK +@retval PAPI_ESYS + A system or C library call failed inside PAPI, see the errno variable. +@retval PAPI_ENOEVST + The EventSet specified does not exist. + + + +PAPI_reset() zeroes the values of the counters contained in EventSet. +This call assumes an initialized PAPI library and a properly added event set + +@par Example: + +.fi +.PP + +.PP +.nf +int EventSet = PAPI_NULL; +int Events[] = {PAPI_TOT_INS, PAPI_FP_OPS}; +int ret; + +// Create an empty EventSet +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + +// Add two events to our EventSet +ret = PAPI_add_events(EventSet, Events, 2); +if (ret != PAPI_OK) handle_error(ret); + +// Start counting +ret = PAPI_start(EventSet); +if (ret != PAPI_OK) handle_error(ret); + +// Stop counting, ignore values +ret = PAPI_stop(EventSet, NULL); +if (ret != PAPI_OK) handle_error(ret); + +// reset the counters in this EventSet +ret = PAPI_reset(EventSet); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_create_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_cmp_domain.3 b/man/man3/PAPI_set_cmp_domain.3 new file mode 100644 index 0000000..a31b474 --- /dev/null +++ b/man/man3/PAPI_set_cmp_domain.3 @@ -0,0 +1,88 @@ +.TH "PAPI_set_cmp_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_cmp_domain \- +.PP +Set the default counting domain for new event sets bound to the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Prototype: + \#include @n + int PAPI_set_cmp_domain( int domain, int cidx ); + +@param domain one of the following constants as defined in the papi.h header file +@arg PAPI_DOM_USER User context counted +@arg PAPI_DOM_KERNEL Kernel/OS context counted +@arg PAPI_DOM_OTHER Exception/transient mode counted +@arg PAPI_DOM_SUPERVISOR Supervisor/hypervisor context counted +@arg PAPI_DOM_ALL All above contexts counted +@arg PAPI_DOM_MIN The smallest available context +@arg PAPI_DOM_MAX The largest available context +@arg PAPI_DOM_HWSPEC Something other than CPU like stuff. Individual components can decode +low order bits for more meaning + +@param cidx + An integer identifier for a component. + By convention, component 0 is always the cpu component. + +.fi +.PP + + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOCMP\fP The argument cidx is not a valid component\&. + +.RE +.PP +\fBPAPI_set_cmp_domain\fP sets the default counting domain for all new event sets in all threads, and requires an explicit component argument\&. Event sets that are already in existence are not affected\&. To change the domain of an existing event set, please see \fBPAPI_set_opt\fP\&. The reader should note that the domain of an event set affects only the mode in which the counter continues to run\&. Counts are still aggregated for the current process, and not for any other processes in the system\&. Thus when requesting PAPI_DOM_KERNEL , the user is asking for events that occur on behalf of the process, inside the kernel\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int ret; + +// Initialize the library +ret = PAPI_library_init(PAPI_VER_CURRENT); +if (ret > 0 && ret != PAPI_VER_CURRENT) { + fprintf(stderr,"PAPI library version mismatch!\n"); + exit(1); +} +if (ret < 0) handle_error(ret); + +// Set the default domain for the cpu component +ret = PAPI_set_cmp_domain(PAPI_DOM_KERNEL,0); +if (ret != PAPI_OK) handle_error(ret); +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_domain\fP \fBPAPI_set_granularity\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_cmp_granularity.3 b/man/man3/PAPI_set_cmp_granularity.3 new file mode 100644 index 0000000..d212db6 --- /dev/null +++ b/man/man3/PAPI_set_cmp_granularity.3 @@ -0,0 +1,88 @@ +.TH "PAPI_set_cmp_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_cmp_granularity \- +.PP +Set the default counting granularity for eventsets bound to the specified component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Prototype: + \#include @n + int PAPI_set_cmp_granularity( int granularity, int cidx ); + +@param granularity one of the following constants as defined in the papi.h header file +@arg PAPI_GRN_THR Count each individual thread +@arg PAPI_GRN_PROC Count each individual process +@arg PAPI_GRN_PROCG Count each individual process group +@arg PAPI_GRN_SYS Count the current CPU +@arg PAPI_GRN_SYS_CPU Count all CPUs individually +@arg PAPI_GRN_MIN The finest available granularity +@arg PAPI_GRN_MAX The coarsest available granularity + +@param cidx + An integer identifier for a component. + By convention, component 0 is always the cpu component. + +.fi +.PP + + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOCMP\fP The argument cidx is not a valid component\&. + +.RE +.PP +\fBPAPI_set_cmp_granularity\fP sets the default counting granularity for all new event sets, and requires an explicit component argument\&. Event sets that are already in existence are not affected\&. +.PP +To change the granularity of an existing event set, please see \fBPAPI_set_opt\fP\&. The reader should note that the granularity of an event set affects only the mode in which the counter continues to run\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int ret; + +// Initialize the library +ret = PAPI_library_init(PAPI_VER_CURRENT); +if (ret > 0 && ret != PAPI_VER_CURRENT) { + fprintf(stderr,"PAPI library version mismatch!\n"); + exit(1); +} +if (ret < 0) handle_error(ret); + +// Set the default granularity for the cpu component +ret = PAPI_set_cmp_granularity(PAPI_GRN_PROC, 0); +if (ret != PAPI_OK) handle_error(ret); +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_granularity\fP \fBPAPI_set_domain\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_debug.3 b/man/man3/PAPI_set_debug.3 new file mode 100644 index 0000000..6ef79b6 --- /dev/null +++ b/man/man3/PAPI_set_debug.3 @@ -0,0 +1,104 @@ +.TH "PAPI_set_debug" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_debug \- +.PP +Set the current debug level for error output from PAPI\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Prototype:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_set_debug( int level )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIlevel\fP one of the constants shown in the table below and defined in the \fBpapi\&.h\fP header file\&. +.br + The possible debug levels for debugging are shown below\&. +.PD 0 + +.IP "\(bu" 2 +PAPI_QUIET Do not print anything, just return the error code +.IP "\(bu" 2 +PAPI_VERB_ECONT Print error message and continue +.IP "\(bu" 2 +PAPI_VERB_ESTOP Print error message and exit +.br + +.PP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP The debug level is invalid\&. +.br + +.br + The current debug level is used by both the internal error and debug message handler subroutines\&. +.br + The debug handler is only used if the library was compiled with -DDEBUG\&. +.br + The debug handler is called when there is an error upon a call to the PAPI API\&. +.br + The error handler is always active and its behavior cannot be modified except for whether or not it prints anything\&. +.RE +.PP +The default PAPI debug handler prints out messages in the following form: +.br + PAPI Error: Error Code code, symbol, description +.PP +If the error was caused from a system call and the return code is PAPI_ESYS, the message will have a colon space and the error string as reported by strerror() appended to the end\&. +.PP +The PAPI error handler prints out messages in the following form: +.br + PAPI Error: message\&. +.br + +.PP +\fBNote:\fP +.RS 4 +This is the ONLY function that may be called BEFORE \fBPAPI_library_init()\fP\&. +.br + +.RE +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int ret; +ret = PAPI_set_debug(PAPI_VERB_ECONT); +if ( ret != PAPI_OK ) handle_error(); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_library_init\fP +.PP +\fBPAPI_get_opt\fP +.PP +\fBPAPI_set_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_domain.3 b/man/man3/PAPI_set_domain.3 new file mode 100644 index 0000000..2be6949 --- /dev/null +++ b/man/man3/PAPI_set_domain.3 @@ -0,0 +1,80 @@ +.TH "PAPI_set_domain" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_domain \- +.PP +Set the default counting domain for new event sets bound to the cpu component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Prototype: + \#include @n + int PAPI_set_domain( int domain ); + +@param domain one of the following constants as defined in the papi.h header file +@arg PAPI_DOM_USER User context counted +@arg PAPI_DOM_KERNEL Kernel/OS context counted +@arg PAPI_DOM_OTHER Exception/transient mode counted +@arg PAPI_DOM_SUPERVISOR Supervisor/hypervisor context counted +@arg PAPI_DOM_ALL All above contexts counted +@arg PAPI_DOM_MIN The smallest available context +@arg PAPI_DOM_MAX The largest available context + +.fi +.PP + + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. + +.RE +.PP +\fBPAPI_set_domain\fP sets the default counting domain for all new event sets created by \fBPAPI_create_eventset\fP in all threads\&. This call implicitly sets the domain for the cpu component (component 0) and is included to preserve backward compatibility\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int ret; + +// Initialize the library +ret = PAPI_library_init(PAPI_VER_CURRENT); +if (ret > 0 && ret != PAPI_VER_CURRENT) { + fprintf(stderr,"PAPI library version mismatch!\n"); + exit(1); +} +if (ret < 0) handle_error(ret); + +// Set the default domain for the cpu component +ret = PAPI_set_domain(PAPI_DOM_KERNEL); +if (ret != PAPI_OK) handle_error(ret); +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_cmp_domain\fP \fBPAPI_set_granularity\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_granularity.3 b/man/man3/PAPI_set_granularity.3 new file mode 100644 index 0000000..08dfa86 --- /dev/null +++ b/man/man3/PAPI_set_granularity.3 @@ -0,0 +1,80 @@ +.TH "PAPI_set_granularity" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_granularity \- +.PP +Set the default counting granularity for eventsets bound to the cpu component\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Prototype: + \#include @n + int PAPI_set_granularity( int granularity ); + +@param -- granularity one of the following constants as defined in the papi.h header file +@arg PAPI_GRN_THR -- Count each individual thread +@arg PAPI_GRN_PROC -- Count each individual process +@arg PAPI_GRN_PROCG -- Count each individual process group +@arg PAPI_GRN_SYS -- Count the current CPU +@arg PAPI_GRN_SYS_CPU -- Count all CPUs individually +@arg PAPI_GRN_MIN -- The finest available granularity +@arg PAPI_GRN_MAX -- The coarsest available granularity + +.fi +.PP + + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. + +.RE +.PP +\fBPAPI_set_granularity\fP sets the default counting granularity for all new event sets created by \fBPAPI_create_eventset\fP\&. This call implicitly sets the granularity for the cpu component (component 0) and is included to preserve backward compatibility\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +int ret; + +// Initialize the library +ret = PAPI_library_init(PAPI_VER_CURRENT); +if (ret > 0 && ret != PAPI_VER_CURRENT) { + fprintf(stderr,"PAPI library version mismatch!\n"); + exit(1); +} +if (ret < 0) handle_error(ret); + +// Set the default granularity for the cpu component +ret = PAPI_set_granularity(PAPI_GRN_PROC); +if (ret != PAPI_OK) handle_error(ret); +ret = PAPI_create_eventset(&EventSet); +if (ret != PAPI_OK) handle_error(ret); + * + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_cmp_granularity\fP \fBPAPI_set_domain\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_multiplex.3 b/man/man3/PAPI_set_multiplex.3 new file mode 100644 index 0000000..8c03d39 --- /dev/null +++ b/man/man3/PAPI_set_multiplex.3 @@ -0,0 +1,100 @@ +.TH "PAPI_set_multiplex" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_multiplex \- +.PP +Convert a standard event set to a multiplexed event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_set_multiplex( int EventSet )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP -- One or more of the arguments is invalid, or the EventSet is already multiplexed\&. +.br +\fIPAPI_ENOCMP\fP -- The EventSet specified is not yet bound to a component\&. +.br +\fIPAPI_ENOEVST\fP -- The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP -- The EventSet is currently counting events\&. +.br +\fIPAPI_ENOMEM\fP -- Insufficient memory to complete the operation\&. +.RE +.PP +\fBPAPI_set_multiplex\fP converts a standard PAPI event set created by a call to \fBPAPI_create_eventset\fP into an event set capable of handling multiplexed events\&. This must be done after calling \fBPAPI_multiplex_init\fP, and either \fBPAPI_add_event\fP or \fBPAPI_assign_eventset_component\fP, but prior to calling \fBPAPI_start()\fP\&. +.PP +Events can be added to an event set either before or after converting it into a multiplexed set, but the conversion must be done prior to using it as a multiplexed set\&. +.PP +\fBNote:\fP +.RS 4 +Multiplexing can't be enabled until PAPI knows which component is targeted\&. Due to the late binding nature of PAPI event sets, this only happens after adding an event to an event set or explicitly binding the component with a call to \fBPAPI_assign_eventset_component\fP\&. +.RE +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int EventSet = PAPI_NULL; +* int ret; +* +* // Create an empty EventSet +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Bind it to the CPU component +* ret = PAPI_assign_eventset_component(EventSet, 0); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Check current multiplex status +* ret = PAPI_get_multiplex(EventSet); +* if (ret == TRUE) printf("This event set is ready for multiplexing\n\&.") +* if (ret == FALSE) printf("This event set is not enabled for multiplexing\n\&.") +* if (ret < 0) handle_error(ret); +* +* // Turn on multiplexing +* ret = PAPI_set_multiplex(EventSet); +* if ((ret == PAPI_EINVAL) && (PAPI_get_multiplex(EventSet) == TRUE)) +* printf("This event set already has multiplexing enabled\n"); +* else if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_multiplex_init\fP +.PP +\fBPAPI_get_multiplex\fP +.PP +\fBPAPI_set_opt\fP +.PP +\fBPAPI_create_eventset\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_opt.3 b/man/man3/PAPI_set_opt.3 new file mode 100644 index 0000000..a92c8a6 --- /dev/null +++ b/man/man3/PAPI_set_opt.3 @@ -0,0 +1,109 @@ +.TH "PAPI_set_opt" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_opt \- +.PP +Set PAPI library or event set options\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_set_opt( int option, PAPI_option_t * ptr )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIoption\fP Defines the option to be set\&. Possible values are briefly described in the table below\&. +.br +\fIptr\fP Pointer to a structure determined by the selected option\&. See \fBPAPI_option_t\fP for a description of possible structures\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP The specified option or parameter is invalid\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP The EventSet is currently counting events\&. +.br +\fIPAPI_ECMP\fP The option is not implemented for the current component\&. +.br +\fIPAPI_ENOINIT\fP PAPI has not been initialized\&. +.br +\fIPAPI_EINVAL_DOM\fP Invalid domain has been requested\&. +.RE +.PP +\fBPAPI_set_opt()\fP changes the options of the PAPI library or a specific EventSet created by \fBPAPI_create_eventset\fP\&. Some options may require that the EventSet be bound to a component before they can execute successfully\&. This can be done either by adding an event or by explicitly calling \fBPAPI_assign_eventset_component\fP\&. +.PP +Ptr is a pointer to the \fBPAPI_option_t\fP structure, which is actually a union of different structures for different options\&. Not all options require or return information in these structures\&. Each requires different values to be set\&. Some options require a component index to be provided\&. These options are handled implicitly through the option structures\&. +.PP +\fBNote:\fP +.RS 4 +Some options, such as PAPI_DOMAIN and PAPI_MULTIPLEX are also available as separate entry points in both C and Fortran\&. +.RE +.PP +The reader is encouraged to peruse the ctests code in the PAPI distribution for examples of usage of \fBPAPI_set_opt\fP\&. +.PP +\fBPossible values for the PAPI_set_opt option parameter\fP +.RS 4 + +OPTION DEFINITION +PAPI_DEFDOM Set default counting domain for newly created event sets. Requires a + component index. +PAPI_DEFGRN Set default counting granularity. Requires a component index. +PAPI_DEBUG Set the PAPI debug state and the debug handler. The debug state is + specified in ptr->debug.level. The debug handler is specified in + ptr->debug.handler. For further information regarding debug states and + the behavior of the handler, see PAPI_set_debug. +PAPI_MULTIPLEX Enable specified EventSet for multiplexing. +PAPI_DEF_ITIMER Set the type of itimer used in software multiplexing, overflowing + and profiling. +PAPI_DEF_MPX_NS Set the sampling time slice in nanoseconds for multiplexing and overflow. +PAPI_DEF_ITIMER_NS See PAPI_DEF_MPX_NS. +PAPI_ATTACH Attach EventSet specified in ptr->attach.eventset to thread or process id + specified in in ptr->attach.tid. +PAPI_CPU_ATTACH Attach EventSet specified in ptr->cpu.eventset to cpu specified in in + ptr->cpu.cpu_num. +PAPI_DETACH Detach EventSet specified in ptr->attach.eventset from any thread + or process id. +PAPI_DOMAIN Set domain for EventSet specified in ptr->domain.eventset. + Will error if eventset is not bound to a component. +PAPI_GRANUL Set granularity for EventSet specified in ptr->granularity.eventset. + Will error if eventset is not bound to a component. +PAPI_INHERIT Enable or disable inheritance for specified EventSet. +PAPI_DATA_ADDRESS Set data address range to restrict event counting for EventSet specified + in ptr->addr.eventset. Starting and ending addresses are specified in + ptr->addr.start and ptr->addr.end, respectively. If exact addresses + cannot be instantiated, offsets are returned in ptr->addr.start_off and + ptr->addr.end_off. Currently implemented on Itanium only. +PAPI_INSTR_ADDRESS Set instruction address range as described above. Itanium only. + +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_set_debug\fP +.PP +\fBPAPI_set_multiplex\fP +.PP +\fBPAPI_set_domain\fP +.PP +\fBPAPI_option_t\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_set_thr_specific.3 b/man/man3/PAPI_set_thr_specific.3 new file mode 100644 index 0000000..c9ec61b --- /dev/null +++ b/man/man3/PAPI_set_thr_specific.3 @@ -0,0 +1,79 @@ +.TH "PAPI_set_thr_specific" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_set_thr_specific \- +.PP +Store a pointer to a thread specific data structure\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par Prototype: + \#include @n + int PAPI_set_thr_specific( int tag, void *ptr ); + +@param tag + An identifier, the value of which is either PAPI_USR1_TLS or + PAPI_USR2_TLS. This identifier indicates which of several data + structures associated with this thread is to be accessed. +@param ptr + A pointer to the memory containing the data structure. + +@retval PAPI_OK +@retval PAPI_EINVAL + The @em tag argument is out of range. + +In C, PAPI_set_thr_specific will save @em ptr into an array indexed by @em tag. +There are 2 user available locations and @em tag can be either +PAPI_USR1_TLS or PAPI_USR2_TLS. +The array mentioned above is managed by PAPI and allocated to each +thread which has called PAPI_thread_init. +There is no Fortran equivalent function. + +@par Example: + +.fi +.PP + +.PP +.nf +int ret; +HighLevelInfo *state = NULL; +ret = PAPI_thread_init(pthread_self); +if (ret != PAPI_OK) handle_error(ret); + +// Do we have the thread specific data setup yet? + +ret = PAPI_get_thr_specific(PAPI_USR1_TLS, (void *) &state); +if (ret != PAPI_OK || state == NULL) { + state = (HighLevelInfo *) malloc(sizeof(HighLevelInfo)); + if (state == NULL) return (PAPI_ESYS); + memset(state, 0, sizeof(HighLevelInfo)); + state->EventSet = PAPI_NULL; + ret = PAPI_create_eventset(&state->EventSet); + if (ret != PAPI_OK) return (PAPI_ESYS); + ret = PAPI_set_thr_specific(PAPI_USR1_TLS, state); + if (ret != PAPI_OK) return (ret); +} + * + +.fi +.PP + +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_register_thread\fP \fBPAPI_thread_init\fP \fBPAPI_thread_id\fP \fBPAPI_get_thr_specific\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_shlib_info_t.3 b/man/man3/PAPI_shlib_info_t.3 new file mode 100644 index 0000000..b0ca5f7 --- /dev/null +++ b/man/man3/PAPI_shlib_info_t.3 @@ -0,0 +1,25 @@ +.TH "PAPI_shlib_info_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_shlib_info_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "\fBPAPI_address_map_t\fP * \fBmap\fP" +.br +.ti -1c +.RI "int \fBcount\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_shutdown.3 b/man/man3/PAPI_shutdown.3 new file mode 100644 index 0000000..db4eb19 --- /dev/null +++ b/man/man3/PAPI_shutdown.3 @@ -0,0 +1,34 @@ +.TH "PAPI_shutdown" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_shutdown \- +.PP +Finish using PAPI and free all related resources\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Prototype:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + void \fBPAPI_shutdown( void )\fP; +.RE +.PP +\fBPAPI_shutdown()\fP is an exit function used by the PAPI Library to free resources and shut down when certain error conditions arise\&. It is not necessary for the user to call this function, but doing so allows the user to have the capability to free memory and resources used by the PAPI Library\&. +.PP +\fBSee Also:\fP +.RS 4 +PAPI_init_library +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_sprofil.3 b/man/man3/PAPI_sprofil.3 new file mode 100644 index 0000000..0576546 --- /dev/null +++ b/man/man3/PAPI_sprofil.3 @@ -0,0 +1,120 @@ +.TH "PAPI_sprofil" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_sprofil \- +.PP +Generate PC histogram data from multiple code regions where hardware counter overflow occurs\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_sprofil( PAPI_sprofil_t * prof, int profcnt, int EventSet, int EventCode, int threshold, int flags )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fI*prof\fP pointer to an array of \fBPAPI_sprofil_t\fP structures\&. Each copy of the structure contains the following: +.PD 0 + +.IP "\(bu" 2 +buf -- pointer to a buffer of bufsiz bytes in which the histogram counts are stored in an array of unsigned short, unsigned int, or unsigned long long values, or 'buckets'\&. The size of the buckets is determined by values in the flags argument\&. +.IP "\(bu" 2 +bufsiz -- the size of the histogram buffer in bytes\&. It is computed from the length of the code region to be profiled, the size of the buckets, and the scale factor as discussed below\&. +.IP "\(bu" 2 +offset -- the start address of the region to be profiled\&. +.IP "\(bu" 2 +scale -- broadly and historically speaking, a contraction factor that indicates how much smaller the histogram buffer is than the region to be profiled\&. More precisely, scale is interpreted as an unsigned 16-bit fixed-point fraction with the decimal point implied on the left\&. Its value is the reciprocal of the number of addresses in a subdivision, per counter of histogram buffer\&. +.PP +.br +\fIprofcnt\fP number of structures in the prof array for hardware profiling\&. +.br +\fIEventSet\fP The PAPI EventSet to profile\&. This EventSet is marked as profiling-ready, but profiling doesn't actually start until a \fBPAPI_start()\fP call is issued\&. +.br +\fIEventCode\fP Code of the Event in the EventSet to profile\&. This event must already be a member of the EventSet\&. +.br +\fIthreshold\fP minimum number of events that must occur before the PC is sampled\&. If hardware overflow is supported for your component, this threshold will trigger an interrupt when reached\&. Otherwise, the counters will be sampled periodically and the PC will be recorded for the first sample that exceeds the threshold\&. If the value of threshold is 0, profiling will be disabled for this event\&. +.br +\fIflags\fP bit pattern to control profiling behavior\&. Defined values are given in a table in the documentation for PAPI_pofil + + +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIReturn\fP values for \fBPAPI_sprofil()\fP are identical to those for \fBPAPI_profil\fP\&. Please refer to that page for further details\&. + + +.RE +.PP +\fBPAPI_sprofil()\fP is a structure driven profiler that profiles one or more disjoint regions of code in a single call\&. It accepts a pointer to a preinitialized array of sprofil structures, and initiates profiling based on the values contained in the array\&. Each structure in the array defines the profiling parameters that are normally passed to \fBPAPI_profil()\fP\&. For more information on profiling, \fBPAPI_profil\fP + + +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int retval; +* unsigned long length; +* PAPI_exe_info_t *prginfo; +* unsigned short *profbuf1, *profbuf2, profbucket; +* PAPI_sprofil_t sprof[3]; +* +* prginfo = PAPI_get_executable_info(); +* if (prginfo == NULL) handle_error( NULL ); +* length = (unsigned long)(prginfo->text_end - prginfo->text_start); +* // Allocate 2 buffers of equal length +* profbuf1 = (unsigned short *)malloc(length); +* profbuf2 = (unsigned short *)malloc(length); +* if ((profbuf1 == NULL) || (profbuf2 == NULL)) +* handle_error( NULL ); +* memset(profbuf1,0x00,length); +* memset(profbuf2,0x00,length); +* // First buffer +* sprof[0]\&.pr_base = profbuf1; +* sprof[0]\&.pr_size = length; +* sprof[0]\&.pr_off = (caddr_t) DO_FLOPS; +* sprof[0]\&.pr_scale = 0x10000; +* // Second buffer +* sprof[1]\&.pr_base = profbuf2; +* sprof[1]\&.pr_size = length; +* sprof[1]\&.pr_off = (caddr_t) DO_READS; +* sprof[1]\&.pr_scale = 0x10000; +* // Overflow bucket +* sprof[2]\&.pr_base = profbucket; +* sprof[2]\&.pr_size = 1; +* sprof[2]\&.pr_off = 0; +* sprof[2]\&.pr_scale = 0x0002; +* retval = PAPI_sprofil(sprof, EventSet, PAPI_FP_INS, 1000000, +* PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16)) != PAPI_OK) +* if ( retval != PAPI_OK ) handle_error( retval ); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_overflow\fP +.PP +\fBPAPI_get_executable_info\fP +.PP +\fBPAPI_profil\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_sprofil_t.3 b/man/man3/PAPI_sprofil_t.3 new file mode 100644 index 0000000..1e05bfd --- /dev/null +++ b/man/man3/PAPI_sprofil_t.3 @@ -0,0 +1,41 @@ +.TH "PAPI_sprofil_t" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_sprofil_t \- +.SH SYNOPSIS +.br +.PP +.SS "Data Fields" + +.in +1c +.ti -1c +.RI "void * \fBpr_base\fP" +.br +.ti -1c +.RI "unsigned \fBpr_size\fP" +.br +.ti -1c +.RI "caddr_t \fBpr_off\fP" +.br +.ti -1c +.RI "unsigned \fBpr_scale\fP" +.br +.in -1c +.SH "Detailed Description" +.PP + +.SH "Field Documentation" +.PP +.SS "void* PAPI_sprofil_t::pr_base" +buffer base +.SS "caddr_t PAPI_sprofil_t::pr_off" +pc start address (offset) +.SS "unsigned PAPI_sprofil_t::pr_scale" +pc scaling factor: fixed point fraction 0xffff ~= 1, 0x8000 == \&.5, 0x4000 == \&.25, etc\&. also, two extensions 0x1000 == 1, 0x2000 == 2 +.SS "unsigned PAPI_sprofil_t::pr_size" +buffer size + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_start.3 b/man/man3/PAPI_start.3 new file mode 100644 index 0000000..001a0f4 --- /dev/null +++ b/man/man3/PAPI_start.3 @@ -0,0 +1,85 @@ +.TH "PAPI_start" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_start \- +.PP +Start counting hardware events in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_start( int EventSet )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP -- One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP -- A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ENOEVST\fP -- The EventSet specified does not exist\&. +.br +\fIPAPI_EISRUN\fP -- The EventSet is currently counting events\&. +.br +\fIPAPI_ECNFLCT\fP -- The underlying counter hardware can not count this event and other events in the EventSet simultaneously\&. +.br +\fIPAPI_ENOEVNT\fP -- The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBPAPI_start\fP starts counting all of the hardware events contained in the previously defined EventSet\&. All counters are implicitly set to zero before counting\&. Assumes an initialized PAPI library and a properly added event set\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int EventSet = PAPI_NULL; +* long long values[2]; +* int ret; +* +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* poorly_tuned_function(); +* ret = PAPI_stop(EventSet, values); +* if (ret != PAPI_OK) handle_error(ret); +* printf("%lld\\n",values[0]); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_create_eventset\fP \fBPAPI_add_event\fP \fBPAPI_stop\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_start_counters.3 b/man/man3/PAPI_start_counters.3 new file mode 100644 index 0000000..7f5dccb --- /dev/null +++ b/man/man3/PAPI_start_counters.3 @@ -0,0 +1,65 @@ +.TH "PAPI_start_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_start_counters \- +.PP +Start counting hardware events\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_start_counters( int *events, int array_len ); + +.fi +.PP +.PP +\fBParameters:\fP +.RS 4 +\fI*events\fP an array of codes for events such as PAPI_INT_INS or a native event code +.br +\fIarray_len\fP the number of items in the *events array +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_EISRUN\fP Counters have already been started, you must call \fBPAPI_stop_counters()\fP before you call this function again\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ENOMEM\fP Insufficient memory to complete the operation\&. +.br +\fIPAPI_ECNFLCT\fP The underlying counter hardware cannot count this event and other events in the EventSet simultaneously\&. +.br +\fIPAPI_ENOEVNT\fP The PAPI preset is not available on the underlying hardware\&. +.RE +.PP +\fBPAPI_start_counters()\fP starts counting the events named in the *events array\&. This function cannot be called if the counters have already been started\&. The user must call \fBPAPI_stop_counters()\fP to stop the events explicitly if he/she wants to call this function again\&. It is the user's responsibility to choose events that can be counted simultaneously by reading the vendor's documentation\&. The length of the *events array should be no longer than the value returned by \fBPAPI_num_counters()\fP\&. +.PP +.PP +.nf +if( PAPI_start_counters( Events, num_hwcntrs ) != PAPI_OK ) + handle_error(1); + * +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_stop_counters()\fP \fBPAPI_add_event()\fP \fBPAPI_create_eventset()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_state.3 b/man/man3/PAPI_state.3 new file mode 100644 index 0000000..91a51fa --- /dev/null +++ b/man/man3/PAPI_state.3 @@ -0,0 +1,105 @@ +.TH "PAPI_state" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_state \- +.PP +Return the counting state of an EventSet\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_state( int EventSet, int * status )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fIstatus\fP -- an integer containing a boolean combination of one or more of the following nonzero constants as defined in the PAPI header file \fBpapi\&.h\fP: +.PD 0 + +.IP "\(bu" 2 +PAPI_STOPPED -- EventSet is stopped +.IP "\(bu" 2 +PAPI_RUNNING -- EventSet is running +.IP "\(bu" 2 +PAPI_PAUSED -- EventSet temporarily disabled by the library +.IP "\(bu" 2 +PAPI_NOT_INIT -- EventSet defined, but not initialized +.IP "\(bu" 2 +PAPI_OVERFLOWING -- EventSet has overflowing enabled +.IP "\(bu" 2 +PAPI_PROFILING -- EventSet has profiling enabled +.IP "\(bu" 2 +PAPI_MULTIPLEXING -- EventSet has multiplexing enabled +.IP "\(bu" 2 +PAPI_ACCUMULATING -- reserved for future use +.IP "\(bu" 2 +PAPI_HWPROFILING -- reserved for future use + +.PP +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. + +.RE +.PP +\fBPAPI_state()\fP returns the counting state of the specified event set\&. + +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int EventSet = PAPI_NULL; +* int status = 0; +* int ret; +* +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Start counting +* ret = PAPI_state(EventSet, &status); +* if (ret != PAPI_OK) handle_error(ret); +* printf("State is now %d\n",status); +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* ret = PAPI_state(EventSet, &status); +* if (ret != PAPI_OK) handle_error(ret); +* printf("State is now %d\n",status); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_stop\fP \fBPAPI_start\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_stop.3 b/man/man3/PAPI_stop.3 new file mode 100644 index 0000000..a0c44b4 --- /dev/null +++ b/man/man3/PAPI_stop.3 @@ -0,0 +1,83 @@ +.TH "PAPI_stop" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_stop \- +.PP +Stop counting hardware events in an event set\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + int \fBPAPI_stop( int EventSet, long long * values )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP -- an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fIvalues\fP -- an array to hold the counter values of the counting events +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_OK\fP +.br +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_ENOTRUN\fP The EventSet is currently not running\&. +.RE +.PP +\fBPAPI_stop\fP halts the counting of a previously defined event set and the counter values contained in that EventSet are copied into the values array Assumes an initialized PAPI library and a properly added event set\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int EventSet = PAPI_NULL; +* long long values[2]; +* int ret; +* +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) handle_error(ret); +* +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* poorly_tuned_function(); +* ret = PAPI_stop(EventSet, values); +* if (ret != PAPI_OK) handle_error(ret); +* printf("%lld\\n",values[0]); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_create_eventset\fP \fBPAPI_start\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_stop_counters.3 b/man/man3/PAPI_stop_counters.3 new file mode 100644 index 0000000..d6a0704 --- /dev/null +++ b/man/man3/PAPI_stop_counters.3 @@ -0,0 +1,69 @@ +.TH "PAPI_stop_counters" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_stop_counters \- +.PP +Stop counting hardware events and reset values to zero\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@par C Interface: +\#include @n +int PAPI_stop_counters( long long *values, int array_len ); + +.fi +.PP +.PP +\fBParameters:\fP +.RS 4 +\fI*values\fP an array where to put the counter values +.br +\fIarray_len\fP the number of items in the *values array +.RE +.PP +\fBPostcondition:\fP +.RS 4 +After this function is called, the values are reset to zero\&. +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_EINVAL\fP One or more of the arguments is invalid\&. +.br +\fIPAPI_ENOTRUN\fP The EventSet is not started yet\&. +.br +\fIPAPI_ENOEVST\fP The EventSet has not been added yet\&. +.RE +.PP +The \fBPAPI_stop_counters()\fP function stops the counters and copies the counts into the *values array\&. The counters must have been started by a previous call to \fBPAPI_start_counters()\fP\&. +.PP +.PP +.nf +int Events[2] = { PAPI_TOT_CYC, PAPI_TOT_INS }; +long long values[2]; +if ( PAPI_start_counters( Events, 2 ) != PAPI_OK ) + handle_error(1); +your_slow_code(); +if ( PAPI_stop_counters( values, 2 ) != PAPI_OK ) + handle_error(1); + * +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read_counters()\fP \fBPAPI_start_counters()\fP \fBPAPI_set_opt()\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_strerror.3 b/man/man3/PAPI_strerror.3 new file mode 100644 index 0000000..720adb9 --- /dev/null +++ b/man/man3/PAPI_strerror.3 @@ -0,0 +1,79 @@ +.TH "PAPI_strerror" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_strerror \- +.PP +Returns a string describing the PAPI error code\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBC Interface:\fP +.RS 4 +#include <\fBpapi\&.h\fP> +.br + char * \fBPAPI_strerror( int errorCode )\fP; +.RE +.PP +\fBParameters:\fP +.RS 4 +\fIcode\fP -- the error code to interpret +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fI*error\fP -- a pointer to the error string\&. +.br +\fINULL\fP -- the input error code to \fBPAPI_strerror()\fP is invalid\&. +.RE +.PP +\fBPAPI_strerror()\fP returns a pointer to the error message corresponding to the error code code\&. If the call fails the function returns the NULL pointer\&. This function is not implemented in Fortran\&. +.PP +\fBExample:\fP +.RS 4 + +.PP +.nf +* int ret; +* int EventSet = PAPI_NULL; +* int native = 0x0; +* char error_str[PAPI_MAX_STR_LEN]; +* +* ret = PAPI_create_eventset(&EventSet); +* if (ret != PAPI_OK) +* { +* fprintf(stderr, "PAPI error %d: %s\n", ret, PAPI_strerror(retval)); +* exit(1); +* } +* // Add Total Instructions Executed to our EventSet +* ret = PAPI_add_event(EventSet, PAPI_TOT_INS); +* if (ret != PAPI_OK) +* { +* PAPI_perror( "PAPI_add_event"); +* fprintf(stderr,"PAPI_error %d: %s\n", ret, error_str); +* exit(1); +* } +* // Start counting +* ret = PAPI_start(EventSet); +* if (ret != PAPI_OK) handle_error(ret); +* + +.fi +.PP +.RE +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_perror\fP \fBPAPI_set_opt\fP \fBPAPI_get_opt\fP \fBPAPI_shutdown\fP \fBPAPI_set_debug\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_thread_id.3 b/man/man3/PAPI_thread_id.3 new file mode 100644 index 0000000..4738bea --- /dev/null +++ b/man/man3/PAPI_thread_id.3 @@ -0,0 +1,51 @@ +.TH "PAPI_thread_id" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_thread_id \- +.PP +Get the thread identifier of the current thread\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@retval PAPI_EMISC + is returned if there are no threads registered. +@retval -1 + is returned if the thread id function returns an error. + +This function returns a valid thread identifier. +It calls the function registered with PAPI through a call to +PAPI_thread_init(). + +.fi +.PP +.PP +.PP +.nf +unsigned long tid; + +if ((tid = PAPI_thread_id()) == (unsigned long int)-1 ) + exit(1); + +printf("Initial thread id is: %lu\n", tid ); + * +.fi +.PP + +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_thread_init.3 b/man/man3/PAPI_thread_init.3 new file mode 100644 index 0000000..f132cbc --- /dev/null +++ b/man/man3/PAPI_thread_init.3 @@ -0,0 +1,50 @@ +.TH "PAPI_thread_init" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_thread_init \- +.PP +Initialize thread support in the PAPI library\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +.nf +@param *id_fn + Pointer to a function that returns current thread ID. + +PAPI_thread_init initializes thread support in the PAPI library. +Applications that make no use of threads do not need to call this routine. +This function MUST return a UNIQUE thread ID for every new thread/LWP created. +The OpenMP call omp_get_thread_num() violates this rule, as the underlying +LWPs may have been killed off by the run-time system or by a call to omp_set_num_threads() . +In that case, it may still possible to use omp_get_thread_num() in +conjunction with PAPI_unregister_thread() when the OpenMP thread has finished. +However it is much better to use the underlying thread subsystem's call, +which is pthread_self() on Linux platforms. + +.fi +.PP +.PP +.PP +.nf +if ( PAPI_thread_init(pthread_self) != PAPI_OK ) + exit(1); + * +.fi +.PP +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_register_thread\fP \fBPAPI_unregister_thread\fP \fBPAPI_get_thr_specific\fP \fBPAPI_set_thr_specific\fP \fBPAPI_thread_id\fP \fBPAPI_list_threads\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_unlock.3 b/man/man3/PAPI_unlock.3 new file mode 100644 index 0000000..6743467 --- /dev/null +++ b/man/man3/PAPI_unlock.3 @@ -0,0 +1,32 @@ +.TH "PAPI_unlock" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_unlock \- +.PP +Unlock one of the mutex variables defined in \fBpapi\&.h\fP\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBParameters:\fP +.RS 4 +\fIlck\fP an integer value specifying one of the two user locks: PAPI_USR1_LOCK or PAPI_USR2_LOCK +.RE +.PP +\fBPAPI_unlock()\fP unlocks the mutex acquired by a call to \fBPAPI_lock\fP \&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_thread_init\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_unregister_thread.3 b/man/man3/PAPI_unregister_thread.3 new file mode 100644 index 0000000..f1f579e --- /dev/null +++ b/man/man3/PAPI_unregister_thread.3 @@ -0,0 +1,29 @@ +.TH "PAPI_unregister_thread" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_unregister_thread \- +.PP +Notify PAPI that a thread has 'disappeared'\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOMEM\fP Space could not be allocated to store the new thread information\&. +.br +\fIPAPI_ESYS\fP A system or C library call failed inside PAPI, see the errno variable\&. +.br +\fIPAPI_ECMP\fP Hardware counters for this thread could not be initialized\&. +.RE +.PP +\fBPAPI_unregister_thread\fP should be called when the user wants to shutdown a particular thread and free the associated thread ID\&. THIS IS IMPORTANT IF YOUR THREAD LIBRARY REUSES THE SAME THREAD ID FOR A NEW KERNEL LWP\&. OpenMP does this\&. OpenMP parallel regions, if separated by a call to omp_set_num_threads() will often kill off the underlying kernel LWPs and then start new ones for the next region\&. However, omp_get_thread_id() does not reflect this, as the thread IDs for the new LWPs will be the same as the old LWPs\&. PAPI needs to know that the underlying LWP has changed so it can set up the counters for that new thread\&. This is accomplished by calling this function\&. + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/man/man3/PAPI_write.3 b/man/man3/PAPI_write.3 new file mode 100644 index 0000000..5c46110 --- /dev/null +++ b/man/man3/PAPI_write.3 @@ -0,0 +1,43 @@ +.TH "PAPI_write" 3 "Mon Dec 18 2017" "Version 5.6.0.0" "PAPI" \" -*- nroff -*- +.ad l +.nh +.SH NAME +PAPI_write \- +.PP +Write counter values into counters\&. + +.SH SYNOPSIS +.br +.PP +.SH "Detailed Description" +.PP + +.PP +\fBParameters:\fP +.RS 4 +\fIEventSet\fP an integer handle for a PAPI event set as created by \fBPAPI_create_eventset\fP +.br +\fI*values\fP an array to hold the counter values of the counting events +.RE +.PP +\fBReturn values:\fP +.RS 4 +\fIPAPI_ENOEVST\fP The EventSet specified does not exist\&. +.br +\fIPAPI_ECMP\fP \fBPAPI_write()\fP is not implemented for this architecture\&. +.br +\fIPAPI_ESYS\fP The EventSet is currently counting events and the component could not change the values of the running counters\&. +.RE +.PP +\fBPAPI_write()\fP writes the counter values provided in the array values into the event set EventSet\&. The virtual counters managed by the PAPI library will be set to the values provided\&. If the event set is running, an attempt will be made to write the values to the running counters\&. This operation is not permitted by all components and may result in a run-time error\&. +.PP +\fBSee Also:\fP +.RS 4 +\fBPAPI_read\fP +.RE +.PP + + +.SH "Author" +.PP +Generated automatically by Doxygen for PAPI from the source code\&. diff --git a/papi.spec b/papi.spec new file mode 100644 index 0000000..dd45942 --- /dev/null +++ b/papi.spec @@ -0,0 +1,101 @@ +Summary: Performance Application Programming Interface +Name: papi +Version: 5.6.0.0 +Release: 1%{?dist} +License: BSD +Group: Development/System +URL: http://icl.utk.edu/papi/ +Source0: http://icl.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root +BuildRequires: ncurses-devel +BuildRequires: gcc-gfortran +BuildRequires: kernel-headers >= 2.6.32 +BuildRequires: chrpath +#Right now libpfm does not know anything about s390 and will fail +ExcludeArch: s390 s390x + +%description +PAPI provides a programmer interface to monitor the performance of +running programs. + +%package devel +Summary: Header files for the compiling programs with PAPI +Group: Development/System +Requires: papi = %{version}-%{release} +%description devel +PAPI-devel includes the C header files that specify the PAPI userspace +libraries and interfaces. This is required for rebuilding any program +that uses PAPI. + +%prep +%setup -q + +%build +cd src +%configure --with-static-lib=no --with-shared-lib=yes --with-shlib +#DBG workaround to make sure libpfm just uses the normal CFLAGS +DBG="" make + +#%check +#cd src +#make fulltest + +%install +rm -rf $RPM_BUILD_ROOT +cd src +make DESTDIR=$RPM_BUILD_ROOT install + +chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so* + +# Remove the static libraries. Static libraries are undesirable: +# https://fedoraproject.org/wiki/Packaging/Guidelines#Packaging_Static_Libraries +rm -rf $RPM_BUILD_ROOT%{_libdir}/*.a + +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%{_bindir}/* +%{_libdir}/*.so.* +/usr/share/papi +%doc INSTALL.txt README LICENSE.txt RELEASENOTES.txt + +%files devel +%defattr(-,root,root,-) +%{_includedir}/*.h +%{_includedir}/perfmon +%{_libdir}/*.so +%doc %{_mandir}/man3/* +%doc %{_mandir}/man1/* + +%changelog +* Tue Jan 31 2012 Dan Terpstra - 4.2.1 +- Rebase to papi-4.2.1 + +* Wed Dec 8 2010 Dan Terpstra - 4.1.2-1 +- Rebase to papi-4.1.2 + +* Mon Jun 8 2010 William Cohen - 4.1.0-1 +- Rebase to papi-4.1.0 + +* Mon May 17 2010 William Cohen - 4.0.0-5 +- Test run with upstream cvs version. + +* Wed Feb 10 2010 William Cohen - 4.0.0-4 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (correct ExcludeArch). + +* Wed Feb 10 2010 William Cohen - 4.0.0-3 +- Resolves: rhbz562935 Rebase to papi-4.0.0 (bump nvr). + +* Wed Feb 10 2010 William Cohen - 4.0.0-2 +- correct the ctests/shlib test +- have PAPI_set_multiplex() return proper value +- properly handle event unit masks +- correct PAPI_name_to_code() to match events +- Resolves: rhbz562935 Rebase to papi-4.0.0 + +* Wed Jan 13 2010 William Cohen - 4.0.0-1 +- Generate papi.spec file for papi-4.0.0. diff --git a/src/.indent.pro b/src/.indent.pro new file mode 100644 index 0000000..543bcd7 --- /dev/null +++ b/src/.indent.pro @@ -0,0 +1,183 @@ +/** + * PAPI - Indent profile.

+ * + * The purpose of this file is to standardize the PAPI's source code style. + * Every new/modified source should be formatted with indent using this + * profile before it is checked in again. + * + * @name .indent.pro + * + * @version $Revision$
+ * $Date$
+ * $Author$ + * + * @author Heike Jagode + */ + + + + +/* use tabs */ +--use-tabs + +/* set tab size to 4 spaces */ +--tab-size4 + +/* set indentation level to 4 spaces, and these will be turned into + * tabs by default */ +--indent-level4 + +/* don't put variables in column 16 */ +//--declaration-indentation16 + + + + +/* maximum length of a line is 80 */ +--line-length80 + +/* breakup the procedure type */ +--procnames-start-lines +// --dont-break-procedure-type + +/* break long lines after the boolean operators && and || */ +--break-after-boolean-operator + +/* if long lines are already broken up, GNU indent won't touch them */ +--honour-newlines + +/* If a line has a left parenthesis which is not closed on that line, + * then continuation lines will be lined up to start at the character + * position just after the left parenthesis */ +--continue-at-parentheses + +/* NO! (see --continue-at-parentheses) */ +--continuation-indentation0 + + + + +/* put braces on line with if, etc.*/ +--braces-on-if-line +//--braces-after-if-line + +/* put braces on the line after struct declaration lines */ +--braces-after-struct-decl-line + +/* put braces on the line after function definition lines */ +--braces-after-func-def-line + +/* indent braces 0 spaces */ +--brace-indent0 + +/* NO extra struct/union brace indentation */ +--struct-brace-indentation0 + +/* NO extra case brace indentation! */ +--case-brace-indentation0 + +/* put a space after and before every parenthesis */ +--space-after-parentheses + +/* NO extra parentheses indentation in broken lines */ +--paren-indentation0 + + + + +/* blank line causes problems with multi parameter function prototypes */ +--no-blank-lines-after-declarations + +/* forces blank line after every procedure body */ +--blank-lines-after-procedures + +/* NO newline is forced after each comma in a declaration */ +--no-blank-lines-after-commas + +/* allow optional blank lines */ +--leave-optional-blank-lines +// --swallow-optional-blank-lines + + + + +/* do not put comment delimiters on blank lines */ +--no-comment-delimiters-on-blank-lines + +/* the maximum comment column is 79 */ +--comment-line-length79 + +/* do not touch comments starting at column 0 */ +--dont-format-first-column-comments + +/* no extra line comment indentation */ +--line-comments-indentation0 + +/* dont star comments */ +--dont-star-comments +// --start-left-side-of-comments + +/* comments to the right of the code start at column 30 */ +--comment-indentation30 + +/* comments after declarations start at column 40 */ +--declaration-comment-column40 + +/* comments after #else #endif start at column 8 */ +--else-endif-column8 + + + + +/* Do not cuddle } and the while of a do {} while; */ +--dont-cuddle-do-while + +/* Do cuddle } and else */ +--cuddle-else +//--dont-cuddle-else + +/* a case label indentation of 0 */ +--case-indentation0 + +/* put no space after a cast operator */ +//--no-space-after-casts + +/* no space after function call names; + * but space after keywords for, it, while */ +--no-space-after-function-call-names +//--no-space-after-for +//--no-space-after-if +//--no-space-after-while + +/* Do not force space between special statements and semicolon */ +--dont-space-special-semicolon +// --space-special-semicolon + +/* put a space between sizeof and its argument :TODO: check */ +--blank-before-sizeof + +/* enable verbose mode */ +--verbose +// --no-verbosity + + + + +/* NO space between # and preprocessor directives */ +// --leave-preprocessor-space + +/* format some comments but not all */ +// --dont-format-comments + +/* NO gnu style as default */ +// --gun_style + +/* K&R default style */ +--k-and-r-style + +/* NO Berkeley default style */ +// --original + +/* read this profile :-) */ +// --ignore-profile + diff --git a/src/CreatePresetTbl.sh b/src/CreatePresetTbl.sh new file mode 100755 index 0000000..a0b84d3 --- /dev/null +++ b/src/CreatePresetTbl.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# This is a shell script to help create the man page for PAPI_presets + +cat << EOF +.TS +box, tab(&); +lt | lw(50). += +EOF +./tests/avail | grep 'PAPI_' | sed 's/(.*)//g' | sort | \ + awk '{ printf("%s&T{\n", $1); for(i=5;i<=NF;i++) { printf("%s ",$i) } ; printf("\nT}\n_\n") }' +echo ".TE" diff --git a/src/INSTALL b/src/INSTALL new file mode 100644 index 0000000..88bbc9b --- /dev/null +++ b/src/INSTALL @@ -0,0 +1,8 @@ +/* +* File: papi/src/README +* CVS: $Id$ +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +Please see the INSTALL.txt in the root directory. diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..7ef9e6b --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,97 @@ +PAPIVER=5 +PAPIREV=6 +PAPIAGE=0 +PAPIINC=0 +PREFIX = @prefix@ +prefix = $(PREFIX) +exec_prefix = $(EPREFIX) +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +ALTIX = @altix@ +AR = @AR@ +ARCH = @arch@ +ARCH_EVENTS = @ARCH_EVENTS@ +ARG64 = @ARG64@ +BGP_SYSDIR = @BGP_SYSDIR@ +BINDIR = @bindir@ +BITFLAGS = @BITFLAGS@ +CC = @CC@ +CC_R = @CC_R@ +CC_SHR = @CC_SHR@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +COMPONENT_RULES = @COMPONENT_RULES@ +COMPONENTS = @COMPONENTS@ +CPU = @CPU@ +CPU_MODEL = @CPU_MODEL@ +cpu_option = @cpu_option@ +CTEST_TARGETS = @CTEST_TARGETS@ +datarootdir = @datarootdir@ +DATADIR = @datadir@/${PACKAGE_TARNAME} +DESCR = @DESCR@ +DOCDIR = @docdir@ +EPREFIX = @exec_prefix@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FLAGS = @FLAGS@ +FILENAME = @FILENAME@ +FTEST_TARGETS = @FTEST_TARGETS@ +INCDIR = @includedir@ +LDFLAGS = @LDFLAGS@ +LIBCFLAGS = @PAPICFLAGS@ +LIBDIR = @libdir@ +LIBRARY = @LIBRARY@ +LIBS = @papiLIBS@ +LINKLIB = @LINKLIB@ +MAKEVER = @MAKEVER@ +MANDIR = @mandir@ +MISCHDRS = @MISCHDRS@ +MISCOBJS = @MISCOBJS@ +MISCSRCS = @MISCSRCS@ +MPICC = @MPICC@ +NOOPT = @NOOPT@ +OMPCFLGS = @OMPCFLGS@ +OPTFLAGS = @OPTFLAGS@ +OSFILESSRC = @OSFILESSRC@ +OSFILESOBJ = @OSFILESOBJ@ +OSFILESHDR = @OSFILESHDR@ +OSLOCK = @OSLOCK@ +OSCONTEXT = @OSCONTEXT@ +PAPI_EVENTS = @PAPI_EVENTS@ +PAPI_EVENTS_CSV = @PAPI_EVENTS_CSV@ +PEPATH = @PEPATH@ +PERFCTR_INC_PATH = @perfctr_incdir@ +PERFCTR_LIB_PATH = @perfctr_libdir@ +PERFCTR_PREFIX = @perfctr_prefix@ +PERFCTR_ROOT = @perfctr_root@ +PFM_INC_PATH = @pfm_incdir@ +PFM_LIB_PATH = @pfm_libdir@ +PFM_OLD_PFMV2 = @old_pfmv2@ +BGPM_INSTALL_DIR = @BGPM_INSTALL_DIR@ +PFM_PREFIX = @pfm_prefix@ +PFM_ROOT = @pfm_root@ +POST_BUILD = @POST_BUILD@ +PMAPI = @PMAPI@ +PMINIT = @PMINIT@ +SETPATH = @SETPATH@ +SHLIB = @SHLIB@ +VLIB = @VLIB@ +SHLIBDEPS = @SHLIBDEPS@ +SHOW_CONF = @SHOW_CONF@ +SMPCFLGS = @SMPCFLGS@ +STATIC = @STATIC@ +CPUCOMPONENT_NAME = @CPUCOMPONENT_NAME@ +CPUCOMPONENT_C = @CPUCOMPONENT_C@ +CPUCOMPONENT_OBJ = @CPUCOMPONENT_OBJ@ +TESTS = @tests@ +TOPTFLAGS = @TOPTFLAGS@ +FTOPTFLAGS = @TOPTFLAGS@ +UTIL_TARGETS = @UTIL_TARGETS@ +VERSION = @VERSION@ +LDL = @LDL@ +HAVE_NO_OVERRIDE_INIT = @HAVE_NO_OVERRIDE_INIT@ +CC_COMMON_NAME = @CC_COMMON_NAME@ +MIC = @MIC@ + +include $(FILENAME) + diff --git a/src/Makefile.inc b/src/Makefile.inc new file mode 100644 index 0000000..5f267ad --- /dev/null +++ b/src/Makefile.inc @@ -0,0 +1,363 @@ +PAPI_SRCDIR = $(PWD) +SOURCES = $(MISCSRCS) papi.c papi_internal.c papi_hl.c extras.c sw_multiplex.c \ + papi_fwrappers.c papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c \ + threads.c cpus.c $(OSFILESSRC) $(CPUCOMPONENT_C) papi_preset.c \ + papi_vector.c papi_memory.c $(COMPSRCS) +OBJECTS = $(MISCOBJS) papi.o papi_internal.o papi_hl.o extras.o sw_multiplex.o \ + papi_fwrappers.o papi_fwrappers_.o papi_fwrappers__.o upper_PAPI_FWRAPPERS.o \ + threads.o cpus.o $(OSFILESOBJ) $(CPUCOMPONENT_OBJ) papi_preset.o \ + papi_vector.o papi_memory.o $(COMPOBJS) +PAPI_EVENTS_TABLE = papi_events_table.h +HEADERS = $(MISCHDRS) $(OSFILESHDR) $(PAPI_EVENTS_TABLE) \ + papi.h papi_internal.h papiStdEventDefs.h \ + papi_preset.h threads.h cpus.h papi_vector.h \ + papi_memory.h config.h \ + extras.h sw_multiplex.h papi_hl.h \ + papi_common_strings.h components_config.h + +LIBCFLAGS += -I. $(CFLAGS) -DOSLOCK=\"$(OSLOCK)\" -DOSCONTEXT=\"$(OSCONTEXT)\" +FHEADERS = fpapi.h f77papi.h f90papi.h +# pkgconfig directory +LIBPC = $(LIBDIR)/pkgconfig + +all: $(SHOW_CONF) $(LIBS) utils tests +.PHONY : all test fulltest tests testlib utils ctests ftests comp_tests validation_tests null + +include $(COMPONENT_RULES) + +showconf: + @echo "Host architecture : $(DESCR)"; + @echo "Host CPU component : $(CPUCOMPONENT_NAME)"; + @echo "Installation DESTDIR: $(DESTDIR)"; + @echo "Installation PREFIX : $(PREFIX)"; + @echo "Installation EPREFIX: $(EPREFIX)"; + @echo "Installation INCDIR : $(INCDIR)"; + @echo "Installation LIBDIR : $(LIBDIR)"; + @echo "Installation BINDIR : $(BINDIR)"; + @echo "Installation MANDIR : $(MANDIR)"; + @echo "Installation DOCDIR : $(DOCDIR)"; + @echo "Installation DATADIR: $(DATADIR)"; + @echo + +show_bgp_conf: + @echo; + @echo "BG/P System Path : $(BGP_SYSDIR)"; + @echo "BG/P Install Path : $(BGP_INSTALLDIR)"; + @echo "BG/P GNU/Linux Path: $(BGP_GNU_LINUX_PATH)"; + @echo "BG/P ARCH Path : $(BGP_ARCH_PATH)"; + @echo "BG/P Runtime Path : $(BGP_RUNTIME_PATH)"; + @echo + +static: $(LIBRARY) + +$(LIBRARY): $(OBJECTS) + rm -f $(LIBRARY) + $(AR) $(ARG64) rv $(LIBRARY) $(OBJECTS) + +shared: libpapi.so libpapi.so.$(PAPIVER) + +libpapi.so libpapi.so.$(PAPIVER): $(SHLIB) + ln -sf $(SHLIB) $@ + +$(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS) + rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPIVER); + $(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS) + @if test "$(POST_BUILD)" != "" ; then \ + -$(POST_BUILD) ; \ + fi + +papi_fwrappers_.c: papi_fwrappers.c $(HEADERS) + $(CPP) -DFORTRANUNDERSCORE papi_fwrappers.c > papi_fwrappers_.c + +papi_fwrappers__.c: papi_fwrappers.c $(HEADERS) + $(CPP) -DFORTRANDOUBLEUNDERSCORE papi_fwrappers.c > papi_fwrappers__.c + +upper_PAPI_FWRAPPERS.c: papi_fwrappers.c $(HEADERS) + $(CPP) -DFORTRANCAPS papi_fwrappers.c > upper_PAPI_FWRAPPERS.c + +papi_fwrappers.o: papi_fwrappers.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_fwrappers.c -o papi_fwrappers.o + +papi_fwrappers_.o: papi_fwrappers_.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_fwrappers_.c -o papi_fwrappers_.o + +papi_fwrappers__.o: papi_fwrappers__.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_fwrappers__.c -o papi_fwrappers__.o + +upper_PAPI_FWRAPPERS.o: upper_PAPI_FWRAPPERS.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c upper_PAPI_FWRAPPERS.c -o upper_PAPI_FWRAPPERS.o + +papi.o: papi.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi.c -o papi.o + +papi_internal.o: papi_internal.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_internal.c -o papi_internal.o + +threads.o: threads.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c threads.c -o threads.o + +cpus.o: cpus.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c cpus.c -o cpus.o + +papi_hl.o: papi_hl.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_hl.c -o papi_hl.o + +aix-memory.o: aix-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c aix-memory.c -o aix-memory.o + +solaris-memory.o: solaris-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c solaris-memory.c -o solaris-memory.o + +solaris-common.o: solaris-common.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c solaris-common.c -o solaris-common.o + +linux-bgp-memory.o: linux-bgp-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c linux-bgp-memory.c -o linux-bgp-memory.o + +linux-bgq-memory.o: linux-bgq-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c linux-bgq-memory.c -o linux-bgq-memory.o + +darwin-memory.o: darwin-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c darwin-memory.c -o darwin-memory.o + +darwin-common.o: darwin-common.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c darwin-common.c -o darwin-common.o + +linux-memory.o: linux-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c linux-memory.c -o linux-memory.o + +linux-timer.o: linux-timer.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c linux-timer.c -o linux-timer.o + +linux-common.o: linux-common.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c linux-common.c -o linux-common.o + +extras.o: extras.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c extras.c -o extras.o + +papi_memory.o: papi_memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_memory.c -o papi_memory.o + +papi_vector.o: papi_vector.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_vector.c -o papi_vector.o + +papi_preset.o: papi_preset.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_preset.c -o papi_preset.o + +sw_multiplex.o: sw_multiplex.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c sw_multiplex.c -o sw_multiplex.o + +$(CPUCOMPONENT_OBJ): $(CPUCOMPONENT_C) $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c $(CPUCOMPONENT_C) -o $(CPUCOMPONENT_OBJ) + +x86_cpuid_info.o: x86_cpuid_info.c x86_cpuid_info.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c x86_cpuid_info.c -o x86_cpuid_info.o + +$(PAPI_EVENTS_TABLE): $(PAPI_EVENTS_CSV) papi_events_table.sh + sh papi_events_table.sh $(PAPI_EVENTS_CSV) > $@ + +$(ARCH_EVENTS)_map.o: $(ARCH_EVENTS)_map.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c $(ARCH_EVENTS)_map.c -o $(ARCH_EVENTS)_map.o + +# Required for BGP +.SUFFIXES: .rts.o +.c.rts.o: + $(CC) $(CFLAGS) -c $< -o $@ + +bgp_tests:$(LIBRARY) null + $(SETPATH) cd ctests/bgp; $(MAKE) CC="$(CC)" CC_R="$(CC_R)" MPICC="$(MPICC)" CFLAGS="-I.. -I../.. $(CFLAGS)" TOPTFLAGS="$(TOPTFLAGS)" SMPCFLGS="$(SMPCFLGS)" OMPCFLGS="$(OMPCFLGS)" NOOPT="$(NOOPT)" LDFLAGS="$(LDFLAGS) $(STATIC)" LIBRARY="../../$(LINKLIB)" bgp_tests + +#Required for freebsd +freebsd-memory.o: freebsd-memory.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map.o: freebsd/map.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-unknown.o: freebsd/map-unknown.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p6.o: freebsd/map-p6.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p6-m.o: freebsd/map-p6-m.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p6-3.o: freebsd/map-p6-3.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p6-2.o: freebsd/map-p6-2.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p6-c.o: freebsd/map-p6-c.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-k7.o: freebsd/map-k7.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-k8.o: freebsd/map-k8.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-p4.o: freebsd/map-p4.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-atom.o: freebsd/map-atom.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-core.o: freebsd/map-core.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-core2.o: freebsd/map-core2.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-core2-extreme.o: freebsd/map-core2-extreme.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-i7.o: freebsd/map-i7.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +freebsd/map-westmere.o: freebsd/map-westmere.c $(HEADERS) + $(CC) $(LIBCFLAGS) -c $< -o $@ + +test: ctests + $(SETPATH) ctests/zero + +fulltest: tests + $(SETPATH) sh run_tests.sh + +tests: $(TESTS) + +testlib: + $(SETPATH) cd testlib; $(MAKE) + +utils: $(LIBS) testlib + cd utils && $(MAKE) + +validation_tests: $(LIBS) testlib + $(SETPATH) cd validation_tests; $(MAKE) + +ctests: $(LIBS) testlib validation_tests + $(SETPATH) cd ctests; $(MAKE) + +ftests: $(LIBS) testlib + $(SETPATH) cd ftests; $(MAKE) + + + + +# compile tests added to components +comp_tests: $(LIBS) testlib +ifneq (${COMPONENTS},) + @for comp in ${COMPONENTS} ; do \ + $$comp_tests: $(LIBS) testlib null ; \ + $(SETPATH) cd components/$$comp/tests ; $(MAKE) ; \ + $(SETPATH) cd ../../.. ; done +endif + +clean: comp_tests_clean native_clean + -rm -rf $(LIBRARY) $(SHLIB) libpapi.so libpapi.so.$(PAPIVER) $(OBJECTS) core rii_files genpapifdef *~ so_locations papi_fwrappers_.c papi_fwrappers__.c upper_PAPI_FWRAPPERS.c + -cd ../doc; $(MAKE) clean + -cd ctests; $(MAKE) clean + -cd ftests; $(MAKE) clean + -cd testlib; $(MAKE) clean + -cd utils; $(MAKE) clean + -cd validation_tests; $(MAKE) clean + +# Component tests cleaning +comp_tests_clean: +ifneq (${COMPONENTS},) + @for comp in ${COMPONENTS} ; do \ + cd components/$$comp/tests ; \ + $(MAKE) clean ; \ + cd ../../.. ; \ + done +endif + +clobber distclean: clean native_clobber + -cd ../doc; $(MAKE) distclean + -cd ctests; $(MAKE) distclean + -cd ftests; $(MAKE) distclean + -cd testlib; $(MAKE) distclean + -cd utils; $(MAKE) distclean + -cd validation_tests; $(MAKE) distclean + -cd components; $(MAKE) -f Makefile_comp_tests distclean + -rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so papi.pc components_config.h $(PAPI_EVENTS_TABLE) + -rm -f config.log config.status f77papi.h f90papi.h fpapi.h + +null: + +dist: + $(MAKE) install-all PREFIX=`pwd`/papi-$(CPUCOMPONENT_NAME) + tar cfv ./papi-$(CPUCOMPONENT).tar ./papi-$(CPUCOMPONENT_NAME) + gzip ./papi-$(CPUCOMPONENT_NAME).tar + rm -rf ./papi-$(CPUCOMPONENT_NAME) + +install-all: install install-tests + +install: install-lib install-man install-utils install-pkgconf + +install-lib: native_install + @echo "Headers (INCDIR) being installed in: \"$(DESTDIR)$(INCDIR)\""; + -mkdir -p $(DESTDIR)$(INCDIR) + -chmod go+rx $(DESTDIR)$(INCDIR) + cp $(FHEADERS) papi.h papiStdEventDefs.h $(DESTDIR)$(INCDIR) + cd $(DESTDIR)$(INCDIR); chmod go+r $(FHEADERS) papi.h papiStdEventDefs.h + @echo "Libraries (LIBDIR) being installed in: \"$(DESTDIR)$(LIBDIR)\""; + -mkdir -p $(DESTDIR)$(LIBDIR) + -chmod go+rx $(DESTDIR)$(LIBDIR) + @if test -r $(LIBRARY) ; then \ + cp $(LIBRARY) $(DESTDIR)$(LIBDIR); \ + chmod go+r $(DESTDIR)$(LIBDIR)/$(LIBRARY); \ + fi + @if test -r $(SHLIB) ; then \ + cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC); \ + chmod go+r $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) ; \ + cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE); \ + cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER); \ + cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so; \ + fi + +install-man: + cd ../man && $(MAKE) DOCDIR=$(DESTDIR)$(DOCDIR) MANDIR=$(DESTDIR)$(MANDIR) install + +install-utils: + $(SETPATH) cd utils; $(MAKE) BINDIR="$(DESTDIR)$(BINDIR)" CC="$(CC)" CC_R="$(CC_R)" CFLAGS="-I.. $(CFLAGS)" TOPTFLAGS="$(TOPTFLAGS)" SMPCFLGS="$(SMPCFLGS)" OMPCFLGS="$(OMPCFLGS)" NOOPT="$(NOOPT)" LDFLAGS="$(LDFLAGS) $(STATIC)" LIBRARY="../$(LINKLIB)" install + +install-tests: install-comp_tests + $(SETPATH) cd testlib; $(MAKE) install + $(SETPATH) cd ctests; $(MAKE) install + $(SETPATH) cd ftests; $(MAKE) install + $(SETPATH) cd validation_tests; $(MAKE) install + -cp run_tests.sh $(DESTDIR)$(DATADIR) + -cp run_tests_exclude_cuda.txt $(DESTDIR)$(DATADIR) + -cp run_tests_exclude.txt $(DESTDIR)$(DATADIR) + -chmod go+rx $(DESTDIR)$(DATADIR)/run_tests.sh + -chmod go+r $(DESTDIR)$(DATADIR)/run_tests_exclude_cuda.txt $(DESTDIR)$(DATADIR)/run_tests_exclude.txt + +# Component tests installing +install-comp_tests: +ifneq (${COMPONENTS},) + @for comp in ${COMPONENTS} ; do \ + cd components/$$comp/tests ; \ + $(MAKE) DATADIR="$(DESTDIR)$(DATADIR)/components" install ; \ + cd ../../.. ; \ + done +endif + +install-pkgconf: + @echo "pkcongfig being installed in: \"$(DESTDIR)$(LIBPC)\""; + -mkdir -p $(DESTDIR)$(LIBPC) + -chmod go+rx $(DESTDIR)$(LIBPC) + cp papi.pc $(DESTDIR)$(LIBPC)/papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc + cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).pc; + cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi-$(PAPIVER).pc; + cd $(DESTDIR)$(LIBPC); ln -sf papi-$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC).pc papi.pc; + +# +# Dummy targets for configurations that do not also include a Rules file with targets +# +native_clean: + +native_install: + +native_clobber: diff --git a/src/Matlab/FlopsInnerProduct.m b/src/Matlab/FlopsInnerProduct.m new file mode 100644 index 0000000..8ef9297 --- /dev/null +++ b/src/Matlab/FlopsInnerProduct.m @@ -0,0 +1,24 @@ +function FlopsInnerProduct + +% Compute an Inner Product (c = a * x) +% on elements sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI flops call to measure the floating point operations performed. +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\nPAPI Inner Product Test'); +fprintf(1,'\nUsing flops'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(1,n);x=rand(n,1); + flops(0); + c=a*x; + [ops, mflops] = flops; + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,ops,2*n,ops - 2*n, (1.0 - ((2*n) / ops)) * 100,mflops) +end diff --git a/src/Matlab/FlopsMatrixMatrix.m b/src/Matlab/FlopsMatrixMatrix.m new file mode 100644 index 0000000..c63400d --- /dev/null +++ b/src/Matlab/FlopsMatrixMatrix.m @@ -0,0 +1,23 @@ +function FlopsMatrixMatrix + +% Compute a Matrix Matrix multiply +% on square arrays sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI flops call to measure the floating point operations performed. +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(n);b=rand(n);c=rand(n); + flops(0); + c=c+a*b; + [count,mflops]=flops; + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^3,count - 2*n^3, (1.0 - ((2*n^3) / count)) * 100,mflops) +end \ No newline at end of file diff --git a/src/Matlab/FlopsMatrixVector.m b/src/Matlab/FlopsMatrixVector.m new file mode 100644 index 0000000..ba527ba --- /dev/null +++ b/src/Matlab/FlopsMatrixVector.m @@ -0,0 +1,23 @@ +function FlopsMatrixVector + +% Compute a Matrix Vector multiply +% on arrays and vectors sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI flops call to measure the floating point operations performed. +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\nPAPI Matrix Vector Multiply Test'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^2', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(n);x=rand(n,1); + flops(0); + b=a*x; + [count,mflops]=flops; + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^2,count - 2*n^2, (1.0 - ((2*n^2) / count)) * 100,mflops) +end \ No newline at end of file diff --git a/src/Matlab/FlopsSampler.m b/src/Matlab/FlopsSampler.m new file mode 100644 index 0000000..5812953 --- /dev/null +++ b/src/Matlab/FlopsSampler.m @@ -0,0 +1,135 @@ +function FlopsSampler(n) + +% A Sampler of Matlab functions that consume Floating Point Operations +% in increasing order of floating point intensity. +% +% FlopsSampler(n) - where n == array or vector size +% + +fprintf(1,'\nCounts Using PAPI\n'); +fprintf(1,'\n%24s %12s %14s %12s\n', 'Operations', 'n', 'fl pt ops', 'Mflop/s' ) +s1=rand(1,1);s2=rand(1,1); +x=rand(n,1);y=rand(n,1); +a=rand(n); +b=a; +c=a*a'; + +fprintf(1,'%25s', 'calling PAPI flops') +flops(0); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'dot product') +flops(0); +x'*y; +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'matrix vector') +flops(0); +a*x; +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'random matrix') +flops(0); +a=rand(n); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'chol(a)') +flops(0); +chol(c); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'lu(a)') +flops(0); +lu(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'x=a\y') +flops(0); +x=a\y; +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'condest(a)') +flops(0); +condest(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'qr(a)') +flops(0); +qr(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'matrix multiply') +flops(0); +a*b; +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'inv(a)') +flops(0); +inv(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'svd(a)') +flops(0); +svd(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'cond(a)') +flops(0); +cond(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'hess(a)') +flops(0); +hess(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'eig(a)') +flops(0); +eig(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', '[u,s,v]=svd(a)') +flops(0); +[u,s,v]=svd(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 'pinv(a)') +flops(0); +pinv(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', 's=gsvd(a)') +flops(0); +s=gsvd(a,b); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', '[x,e]=eig(a)') +flops(0); +[x,e]=eig(a); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + +fprintf(1,'%25s', ' [u,v,x,c,s]=gsvd(a,b)') +flops(0); +[u,v,x,c,s]=gsvd(a,b); +[ops,mflops]=flops; +fprintf(1,'%12d %14d %12.2f\n', n, ops, mflops ) + diff --git a/src/Matlab/PAPI.m b/src/Matlab/PAPI.m new file mode 100644 index 0000000..d6eef09 --- /dev/null +++ b/src/Matlab/PAPI.m @@ -0,0 +1,84 @@ +% PAPI Performance API. +% PAPI provides access to one of 8 Hardware Performance Monitoring functions. +% +% ctrs = PAPI('num') - Return the number of hardware counters. +% PAPI('start', 'event', ...) - +% Begin counting the specified events. +% [val, ...] = PAPI('stop') - Stop counting and return the current values. +% [val, ...] = PAPI('read') - Read the current values of the active counters. +% [val, ...] = PAPI('accum') - Add the current values of the active counters +% to the input values. +% PAPI('ipc') - Begin counting instructions. +% ins = PAPI('ipc') - Return the number of instructions executed +% since the first call. +% [ins, ipc] = PAPI('ipc') - Return both the total number of instructions +% executed since the first call, and the +% incremental rate of instruction execution +% since the last call. +% PAPI('flips') +% PAPI('flops') - Begin counting floating point +% instructions or operations. +% ins = PAPI('flips') +% ops = PAPI('flops') - Return the number of floating point instruc- +% tions or operations since the first call. +% [ins, mflips] = PAPI('flips') +% [ops, mflops] = PAPI('flops') - +% Return both the number of floating point +% instructions or operations since the first +% call, and the incremental rate of floating +% point execution since since the last call. +% +% DESCRIPTION +% The PAPI function provides access to the PAPI Performance API. +% PAPI takes advantage of the fact that most modern microprocessors +% have built-in hardware support for counting a variety of basic operations +% or events. PAPI uses these counters to track things like instructions +% executed, cycles elapsed, floating point instructions performed and +% a variety of other events. +% +% There are 8 subfunctions within the PAPI call, as described below: +% 'num' - provides information on the number of hardware counters built +% into this platform. The result of this call specifies how many +% events can be counted at once. +% 'start' - programs the counters with the named events and begins +% counting. The names of the events can be found in the PAPI +% documentation. If a named event cannot be found, or cannot +% be mapped, an error message is displayed. +% 'stop' - stops counting and returns the values of the counters in the +% same order as events were specified in the start command. +% 'stop' also can be used to reset the counters for the ipc +% flips and flops subfunctions described below. +% 'read' - return the values of the counters without stopping them. +% 'accum' - adds the values of the counters to the input parameters and +% returns them in the output parameters. Counting is not stopped. +% 'ipc' - returns the total instructions executed since the first call +% to this subfunction, and the rate of execution of instructions +% (as instructions per cycle) since the last call. +% 'flips' - returns the total floating point instructions executed since +% the first call to this subfunction, and the rate of execution +% of floating point instructions (as mega-floating point +% instructions per second, or mflips) since the last call. +% A floating point instruction is defined as whatever this cpu +% naturally counts as floating point instructions. +% 'flops' - identical to 'flips', except it measures floating point +% operations rather than instructions. In many cases these two +% counts may be identical. In some cases 'flops' will be a +% derived value that attempts to reproduce that which is +% traditionally considered a floating point operation. For +% example, a fused multiply-add would be counted as two +% operations, even if it was only a single instruction. +% +% In typical usage, the first five subfunctions: 'num', 'start', 'stop', +% 'read', and 'accum' are used together. 'num establishes the maximum number +% of events that can be supplied to 'start'. After a 'start' is issued, +% 'read' and 'accum' can be intermixed until a 'stop' is issued. +% +% The three rate calls, 'ipc', 'flips', and 'flops' are intended to be used +% independently. They cannot be mixed, because they use the same counter +% resources. They can be used serially if they are separated by a 'stop' +% call, which can also be used to reset the counters. +% +% Copyright 2001 - 2004 The Innovative Computing Laboratory, +% University of Tennessee. +% $Revision$ $Date$ + diff --git a/src/Matlab/PAPIInnerProduct.m b/src/Matlab/PAPIInnerProduct.m new file mode 100644 index 0000000..95c3ee8 --- /dev/null +++ b/src/Matlab/PAPIInnerProduct.m @@ -0,0 +1,40 @@ +function PAPIInnerProduct + +% Compute an Inner Product (c = a * x) +% on elements sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI mex function with two different methods: +% - The PAPI High Level flops call +% - PAPI High Level start/stop calls +% +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\n\nPAPI Inner Product Test'); +fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(1,n);x=rand(n,1); + PAPI('stop'); % reset the counters to zero + PAPI('flops'); % start counting flops + c=a*x; + [ops, mflops] = PAPI('flops'); % read the flops data + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,ops,2*n,ops - 2*n, (1.0 - ((2*n) / ops)) * 100,mflops) +end +PAPI('stop'); + +fprintf(1,'\n\nPAPI Inner Product Test'); +fprintf(1,'\nUsing PAPI start and stop'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n', 'difference', '% error', 'flops/cycle') +for n=50:50:500, + a=rand(1,n);x=rand(n,1); + PAPI('start', 'PAPI_TOT_CYC', 'PAPI_FP_OPS'); + c=a*x; + [cyc, ops] = PAPI('stop'); + fprintf(1,'%12d %12d %12d %12d %12.2f %12.6f\n',n,ops,2*n,ops - 2*n, (1.0 - ((2*n) / ops)) * 100,ops/cyc) +end \ No newline at end of file diff --git a/src/Matlab/PAPIMatrixMatrix.m b/src/Matlab/PAPIMatrixMatrix.m new file mode 100644 index 0000000..5402c4a --- /dev/null +++ b/src/Matlab/PAPIMatrixMatrix.m @@ -0,0 +1,40 @@ +function PAPIMatrixMatrix + +% Compute a Matrix Matrix multiply +% on square arrays sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI mex function with two different methods: +% - The PAPI High Level flops call +% - PAPI High Level start/stop calls +% +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); +fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(n);b=rand(n);c=rand(n); + PAPI('stop'); % reset the counters to zero + PAPI('flops'); % start counting flops + c=c+a*b; + [count, mflops] = PAPI('flops'); % read the flops data + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^3,count - 2*n^3, (1.0 - ((2*n^3) / count)) * 100,mflops) +end +PAPI('stop'); + +fprintf(1,'\nPAPI Matrix Matrix Multiply Test'); +fprintf(1,'\nUsing PAPI start and stop'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^3', 'difference', '% error', 'flops/cycle') +for n=50:50:500, + a=rand(n);b=rand(n);c=rand(n); + PAPI('start', 'PAPI_TOT_CYC', 'PAPI_FP_OPS'); + c=c+a*b; + [cyc, ops] = PAPI('stop'); + fprintf(1,'%12d %12d %12d %12d %12.2f %12.6f\n',n,ops,2*n^3,ops - 2*n^3, (1.0 - ((2*n^3) / ops)) * 100,ops/cyc) +end \ No newline at end of file diff --git a/src/Matlab/PAPIMatrixVector.m b/src/Matlab/PAPIMatrixVector.m new file mode 100644 index 0000000..58a7c74 --- /dev/null +++ b/src/Matlab/PAPIMatrixVector.m @@ -0,0 +1,40 @@ +function PAPIMatrixVector + +% Compute a Matrix Vector multiply +% on arrays and vectors sized from 50 to 500, +% in steps of 50. +% +% Use the PAPI mex function with two different methods: +% - The PAPI High Level flops call +% - PAPI High Level start/stop calls +% +% For each size, display: +% - number of floating point operations +% - theoretical number of operations +% - difference +% - per cent error +% - mflops/s + +fprintf(1,'\nPAPI Matrix Vector Multiply Test'); +fprintf(1,'\nUsing the High Level PAPI("flops") call'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^2', 'difference', '% error', 'mflops') +for n=50:50:500, + a=rand(n);x=rand(n,1); + PAPI('stop'); % reset the counters to zero + PAPI('flops'); % start counting flops + b=a*x; + [count, mflops] = PAPI('flops'); % read the flops data + fprintf(1,'%12d %12d %12d %12d %12.2f %12.2f\n',n,count,2*n^2,count - 2*n^2, (1.0 - ((2*n^2) / count)) * 100,mflops) +end +PAPI('stop'); + +fprintf(1,'\nPAPI Matrix Vector Multiply Test'); +fprintf(1,'\nUsing PAPI start and stop'); +fprintf(1,'\n%12s %12s %12s %12s %12s %12s\n', 'n', 'ops', '2n^2', 'difference', '% error', 'flops/cycle') +for n=50:50:500, + a=rand(n);x=rand(n,1); + PAPI('start', 'PAPI_TOT_CYC', 'PAPI_FP_OPS'); + c=a*x; + [cyc, ops] = PAPI('stop'); + fprintf(1,'%12d %12d %12d %12d %12.2f %12.6f\n',n,ops,2*n^2,ops - 2*n^2, (1.0 - ((2*n^2) / ops)) * 100,ops/cyc) +end \ No newline at end of file diff --git a/src/Matlab/PAPI_Matlab.c b/src/Matlab/PAPI_Matlab.c new file mode 100755 index 0000000..8e73bfd --- /dev/null +++ b/src/Matlab/PAPI_Matlab.c @@ -0,0 +1,249 @@ + +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file: PAPI_Matlab.c + * CVS: $Id$ + * @author Joseph Thomas + * + * @brief PAPI Matlab integration. + * See PAPI_Matlab.readme for more information. + */ +#include "mex.h" +#include "matrix.h" +#include "papi.h" + +static long long accum_error = 0; +static long long start_time = 0; + +void mexFunction(int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) { + float real_time, proc_time, rate; + int i; + int number_of_counters; + unsigned int mrows, nchars; + unsigned int *events; + unsigned int flop_events[2]; + long long ins = 0, *values, flop_values[2]; + long long elapsed_time; + int result; + char *input, *temp; + char one_output[] = "This function produces one output per running counter."; + char no_input[] = "This function expects no input."; + char error_reading[] = "Error reading the running counters."; + + /* Check for proper number of arguments. */ + if(nrhs < 1) { + mexErrMsgTxt("This function expects input."); + } + nchars = mxGetNumberOfElements(prhs[0]); + input = (char *)mxCalloc(nchars, sizeof(char) + 1); + input = mxArrayToString(prhs[0]); + + if(!strncmp(input, "num", 3)) { + if(nrhs != 1) { + mexErrMsgTxt(no_input); + } + else if(nlhs != 1) { + mexErrMsgTxt("This function produces one and only one output: counters."); + } + result = PAPI_num_counters(); + if(result < PAPI_OK) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error reading counters."); + } + plhs[0] = mxCreateDoubleScalar((double)result); + } + + else if((!strncmp(input, "flip", 4)) || (!strncmp(input, "flop", 4))) { + if(nrhs != 1) { + mexErrMsgTxt(no_input); + } else if(nlhs > 2) { + if (input[2] == 'i') + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflips]."); + else + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); + } + if (input[2] == 'i') { + if(result = PAPI_flips( &real_time, &proc_time, &ins, &rate) 0) { + plhs[0] = mxCreateDoubleScalar((double)(ins - accum_error)); + /* this call adds 7 fp instructions to the total */ + /* but apparently not on Pentium M with Matlab 7.0.4 */ +/* accum_error += 7; */ + if(nlhs == 2) { + plhs[1] = mxCreateDoubleScalar((double)rate); + /* the second call adds 4 fp instructions to the total */ + /* but apparently not on Pentium M with Matlab 7.0.4 */ +/* accum_error += 4; */ + } + } + } + + else if(!strncmp(input, "start", 5)) { + if(nlhs != 0) { + mexErrMsgTxt("This function produces no output."); + } + if(nrhs > (PAPI_num_counters() + 1)) { + mexErrMsgTxt(one_output); + } + mrows = mxGetM(prhs[1]); + events = (unsigned int *)mxCalloc(nrhs - 1, sizeof(int) + 1); + for(i = 1; i < nrhs; i++) { + if(mxIsComplex(prhs[i]) || !(mrows == 1) ) { + mexErrMsgTxt("Input must be a list of strings."); + } + if(mxIsChar(prhs[i])) { + nchars = mxGetNumberOfElements(prhs[i]); + temp = (char *)mxCalloc(nchars, sizeof(char) + 1); + temp = mxArrayToString(prhs[i]); + if(result = PAPI_event_name_to_code(temp, &(events[i - 1])) < PAPI_OK) { + mxFree(temp); + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Incorrect PAPI code given."); + } + mxFree(temp); + } + else { + events[i - 1] = (unsigned int)mxGetScalar(prhs[i]); + } + } + if((result = PAPI_start_counters(events, nrhs - 1)) < PAPI_OK) { + mxFree(events); + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error initializing counters."); + } + mxFree(events); + } + + else if(!strncmp(input, "stop", 4)) { + if(nrhs != 1) { + mexErrMsgTxt(no_input); + } + number_of_counters = PAPI_num_counters(); + if(nlhs > number_of_counters ) { + mexErrMsgTxt(one_output); + } + if (nlhs == 0) + values = (long long*)mxCalloc(number_of_counters, sizeof(long long)); + else + values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); + + if (start_time == 0) { + if (nlhs == 0) + result = PAPI_stop_counters(values, number_of_counters); + else + result = PAPI_stop_counters(values, nlhs); + } else { + start_time = 0; + result = PAPI_stop_counters(flop_values, 2); + } + + if(result < PAPI_OK) { + if(result != PAPI_ENOTRUN) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt("Error stopping the running counters."); + } + } + accum_error = 0; + for(i = 0; i < nlhs; i++) { + plhs[i] = mxCreateDoubleScalar((double)values[i]); + } + mxFree(values); + } + + else if(!strncmp(input, "read", 4)) { + if(nrhs != 1) { + mexErrMsgTxt(no_input); + } + if(nlhs > PAPI_num_counters()) { + mexErrMsgTxt(one_output); + } + values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); + if((result = PAPI_read_counters(values, nlhs)) < PAPI_OK) { + mexPrintf("%d\n", result); + mexErrMsgTxt(error_reading); + } + for(i = 0; i < nlhs; i++) { + plhs[i] = mxCreateDoubleScalar((double)values[i]); + } + mxFree(values); + } + + else if(!strncmp(input, "accum", 5)) { + if(nrhs > PAPI_num_counters() + 1) { + mexErrMsgTxt(no_input); + } + if(nlhs > PAPI_num_counters()) { + mexErrMsgTxt(one_output); + } + values = (long long *)mxCalloc(nlhs, sizeof(long long) + 1); + for(i = 0; i < nrhs - 1; i++) { + values[i] = (long long)(*(mxGetPr(prhs[i + 1]))); + } + if(result = PAPI_accum_counters(values, nlhs) < PAPI_OK) { + mexPrintf("Error code: %d\n", result); + mexErrMsgTxt(error_reading); + } + for(i = 0; i < nlhs; i++) { + plhs[i] = mxCreateDoubleScalar((double)values[i]); + } + mxFree(values); + } + + else if(!strncmp(input, "ipc", 3)) { + if(nrhs != 1) { + mexErrMsgTxt(no_input); + } else if(nlhs > 2) { + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, ipc]."); + } + if(PAPI_ipc(&real_time, &proc_time, &ins, &rate) 0) { + plhs[0] = mxCreateDoubleScalar((double)ins); + if(nlhs == 2) { + plhs[1] = mxCreateDoubleScalar((double)rate); + } + } + } + + else { + mexPrintf("Cannot find the command you specified.\n"); + mexErrMsgTxt("See the included readme file."); + } +} diff --git a/src/Matlab/PAPI_Matlab.dsw b/src/Matlab/PAPI_Matlab.dsw new file mode 100644 index 0000000..79f4172 --- /dev/null +++ b/src/Matlab/PAPI_Matlab.dsw @@ -0,0 +1,41 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00 +# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! + +############################################################################### + +Project: "PAPI_Matlab"=.\PAPI_Matlab.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Project: "PAPI_flops"=.\PAPI_flops.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + diff --git a/src/Matlab/PAPI_Matlab.readme b/src/Matlab/PAPI_Matlab.readme new file mode 100755 index 0000000..b935ad8 --- /dev/null +++ b/src/Matlab/PAPI_Matlab.readme @@ -0,0 +1,184 @@ +Running PAPI's High Level API in the MATLAB Environment + +If you have the desire to do this, you most likely already +know why you want to make calls to PAPI inside of a +MATLAB environment. + +If you don't know much about what composes PAPI's high +level API, you should probably take a look at this: + +http://icl.cs.utk.edu/projects/papi/files/documentation/PAPI_USER_GUIDE_23.htm#WHAT_IS_HIGH_LEVEL_API + +This section of the PAPI user guide covers C and FORTRAN +calls, but at the moment, you can only make C calls +from the MATLAB environment. + +There is one overall function to call from Matlab; from there, +you specify which of the 6 specific functions you want to call, +and then the arguments to each. Here are some examples: + +PAPI_num_counters - Returns the number of available + hardware counters on the system. + +Ex: num_counters = PAPI('num') + + +PAPI_flips - Has 3 possibilities: + Initialize FLIP counting with: + PAPI('flips') + Record the number of floating point instructions since + initialization: + ops = PAPI('flips') + Record the number of floating point instructions and + the incremental rate of floating point execution + since initialization: + [ops, mflips] = PAPI('flips') + Use PAPI_stop_counters to stop counting flips and reset the counters. + +PAPI_flops - Identical to PAPI_flips, + but counts floating point *operations* rather than instructions. + In most cases, these two are identical, but some instructions + (e.g. FMA) might contain multiple operations or vice versa. + +PAPI_ipc - Has 3 possibilities: + Initialize instruction per cycle counting with: + PAPI('ipc', 0) + Record the number of instructions since + initialization: + ins = PAPI('ipc') + Record the number of instructions and + the incremental rate of instructions per cycle + since initialization: + [ins, ipc] = PAPI('ipc') + + +PAPI_start_counters - Specify the events to count + (in text form or the actual numeric code; NOTE: make sure + to not confuse normal decimal and hexadecimal.) You cannot + specify more events than there are hardware counters. + + To begin counting cycles and instructions: + PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS'); + + +PAPI_read_counters - Simply specify the variables to read + the values into. You cannot specify more variables + than there are hardware counters. This will reset the + counters. + + To read the above events you just started: + [cycles, instructions] = PAPI('read'); + + +PAPI_accum_counters - This function adds the value you + pass to the readings in the hardware counter. You + cannot specify more variables than there are hardware + counters. This function will reset the counters. + + To add the values currently in the counters to the + previously read values: + [cycles, instructions] = PAPI('accum', cycles, instructions); + + +PAPI_stop_counters - This function reads the value of + the running hardware counters into the variables + you specify. You cannot specify more variables than + there are hardware counters. + + To stop the running counters you previously started + and record their values: + [cycles, instructions] = PAPI('stop'); + + +PAPI_Matlab.c, when compiled, functions simply as a +wrapper. In order to use the calls, you need to know a little +about mex. mex is simply the compiler you use to make your +code run in the MATLAB environment. If you don't know +how to use mex, you might want to acquaint yourself a bit. + +"mex -setup "might be needed if you encounter problems, but +the simplest explanation might be to substitute "mex" for +"gcc" and you are on your way. + +All the other rules for compiling PAPI are the same. mex +compilations can de done inside or outside of the Matlab +environment, but in this case, it is recommended that you +compile outside of Matlab. For some reason, compiling +inside does not work on some systems. + +So far, the Linux environment and the Windows environment +have been tested, but _in theory_ this code should work anywhere +PAPI and Matlab both work. + +The following instructions are for a Linux/Unix environment: + +Assuming papi.h is present in /usr/local/include and libpapi.so +is present in /usr/local/lib, the below should work. If not, +you may need to alter the compile strings and/or the #include +statement in PAPI_Matlab.c. Also, the compile string will be +different for different platforms. + +For instance, if I want to compile and run on a linux +machine assuming PAPI_Matlab.c is in your current working +directory (you'll have a different compile string on a +different architecture): + +1. Compile the wrapper: +mex -I/usr/local/include PAPI_Matlab.c /usr/local/lib/libpapi.so -output PAPI + +2. Start Matlab: +matlab + +3. Run the code: +a. Find the number of hardware counters on your system: +num_counters = PAPI('num') + + +b. Play with flips - the first makes sure the counters are stopped and clear; + the second initializes the counting; + the third returns the number of floating point instructions + since the first call, and the fourth line does the same as the + second AND reports the incremental rate of floating point + execution since the last call: +PAPI('stop') +PAPI('flips') +ins = PAPI('flips') +[ins, mflips] = PAPI('flips') + +c. Play with instructions per cycle - the first makes sure the + counters are stopped and clear; the second initializes counting; + the third returns the number of instructions + since the first call, and the fourth line does the same as the + second AND reports the incremental rate of instructions per + cycle since the last call: +PAPI('stop') +PAPI('ipc') +ins = PAPI('ipc') +[ins, ipc] = PAPI('ipc') + +d. Try the example m files included with the distribution: +PAPIInnerProduct.m +PAPIMatrixVector.m +PAPIMatrixMatrix.m + +e. Start counting: +PAPI('start', 'PAPI_TOT_CYC', 'PAPI_TOT_INS') + +f. Read the counters and reset: +[cycles, instr] = PAPI('read') + +g. Add the current value of the counters to a previous read + and reset: +[cycles, instr] = PAPI('accum', cycles, instr) + +h. Read the counters and stop them: +[cycles, instr] = PAPI('stop') + +You can pass as many events as you like to be counted or +recorded, as long as that number does not exceed the number +of available hardware counters. + +Contact ralph@eecs.utk.edu or/and ptools-perfapi@icl.utk.edu +with any questions regarding PAPI calls in Matlab - either errors or questions. +Also, this has just been implemented, so changes could +be coming.......... diff --git a/src/Matlab/PAPI_Matlab.vcproj b/src/Matlab/PAPI_Matlab.vcproj new file mode 100644 index 0000000..7400538 --- /dev/null +++ b/src/Matlab/PAPI_Matlab.vcproj @@ -0,0 +1,220 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Matlab/PAPI_flops.c b/src/Matlab/PAPI_flops.c new file mode 100644 index 0000000..6437299 --- /dev/null +++ b/src/Matlab/PAPI_flops.c @@ -0,0 +1,88 @@ +#include "mex.h" +#include "matrix.h" +#include "papi.h" + +static long long accum_error = 0; +static long long start_time = 0; + +void mexFunction(int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) { + float real_time, proc_time, rate; + double *x; + unsigned int mrows, ncols; + int result; + unsigned int flop_events[2]; + long long ins = 0, flop_values[2]; + long long elapsed_time; + + /* Check for proper number of arguments. */ + if(nrhs > 1) { + mexErrMsgTxt("This function expects one optional input."); + } else if(nlhs > 2) { + mexErrMsgTxt("This function produces 1 or 2 outputs: [ops, mflops]."); + } + /* The input must be a noncomplex scalar double.*/ + if(nrhs == 1) { + mrows = mxGetM(prhs[0]); + ncols = mxGetN(prhs[0]); + if(!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]) || !(mrows == 1 && ncols == 1)) { + mexErrMsgTxt("Input must be a noncomplex scalar double."); + } + /* Assign a pointer to the input. */ + x = mxGetPr(prhs[0]); + + /* if input is 0, reset the counters by calling PAPI_stop_counters with 0 values */ + if(*x == 0) { + if (start_time == 0) { + PAPI_stop_counters(NULL, 0); + accum_error = 0; + } else { + start_time = 0; + PAPI_stop_counters(flop_values, 2); + } + } + } + if(result = PAPI_event_name_to_code("EMON_SSE_SSE2_COMP_INST_RETIRED_PACKED_DOUBLE", &(flop_events[0])) < PAPI_OK) { + if(result = PAPI_flops( &real_time, &proc_time, &ins, &rate) 0) { + plhs[0] = mxCreateScalarDouble((double)(ins - accum_error)); + /* this call adds 7 fp instructions to the total */ + /* but apparently not on Pentium M with Matlab 7.0.4 */ +// accum_error += 7; + if(nlhs == 2) { + plhs[1] = mxCreateScalarDouble((double)rate); + /* the second call adds 4 fp instructions to the total */ + /* but apparently not on Pentium M with Matlab 7.0.4 */ +// accum_error += 4; + } + } +} diff --git a/src/Matlab/PAPI_flops.vcproj b/src/Matlab/PAPI_flops.vcproj new file mode 100644 index 0000000..827c75c --- /dev/null +++ b/src/Matlab/PAPI_flops.vcproj @@ -0,0 +1,208 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/Matlab/flops.m b/src/Matlab/flops.m new file mode 100644 index 0000000..fa620f6 --- /dev/null +++ b/src/Matlab/flops.m @@ -0,0 +1,46 @@ +% FLOPS Floating point operation count. +% FLOPS returns the cumulative number of floating point operations. +% +% FLOPS(0) - Initialize PAPI library, reset counters +% to zero and begin counting. +% ops = FLOPS - Return the number of floating point +% operations since the first call or last reset. +% [ops, mflops] = FLOPS - Return both the number of floating point +% operations since the first call or last reset, +% and the incremental rate of floating point +% execution since the last call. +% +% DESCRIPTION +% The PAPI flops function uses the PAPI Performance API to do the heavy +% lifting. PAPI takes advantage of the fact that most modern microprocessors +% have built-in hardware support for counting a variety of basic operations +% or events. PAPI uses these counters to track things like instructions +% executed, cycles elapsed, floating point instructions performed and +% a variety of other events. +% The first call to flops will initialize PAPI, set up the counters to +% monitor floating point instructions and total cpu cycles, and start +% the counters. Subsequent calls will return one or two values. The first +% value is the number of floating point operations since the first call or +% last reset. The second optional value, the execution rate in mflops, can +% also be returned. The mflops rate is computed by dividing the operations +% since the last call by the cycles since the last call and multiplying by +% cycles per second: +% mflops = ((ops/cycles)*(cycles/second))/10^6 +% The cycles per second value is a derived number determined empirically +% by counting cycles for a fixed amount of system time during the +% initialization of the PAPI library. Because of the way it is determined, +% this value can be a small but consistent source of systematic error, +% and can introduce differences between rates measured by PAPI and those +% determined by other time measurements, for example, tic and toc. Also +% note that PAPI on Windows counts events on a system level rather than +% a process or thread level. This can lead to an over-reporting of cycles, +% and typically an under-reporting of mflops. +% The flops function continues counting after any call. A call with an +% input of 0 resets the counters and returns 0. + +% Copyright 2001 - 2004 The Innovative Computing Laboratory, +% University of Tennessee. +% $Revision$ $Date$ + + + diff --git a/src/Matlab/flops.readme b/src/Matlab/flops.readme new file mode 100644 index 0000000..492e2dc --- /dev/null +++ b/src/Matlab/flops.readme @@ -0,0 +1,83 @@ +Running PAPI FLOPS in the MATLAB Environment + +If you want to measure the rate of floating point operations +in a MATLAB program, PAPI FLOPS is a good way to do it. +PAPI FLOPS uses the PAPI library and underlying hardware counters +to accurately compute both the total number of floating point operations +and the rate of floating point execution in a section of +MATLAB code. + +For more information on the flops function, +you can type 'help flops' inside MATLAB. + +FLOPS - Has 3 execution possibilities: + Initialize FLOP counting or reset the counters with: + FLOPS(0) + Record the number of floating point instructions since + initialization: + ops = FLOPS + Record the number of floating point instructions and + the incremental rate of floating point execution + since initialization: + [ops, mflips] = FLOPS + +PAPI_flops.c, when compiled, turns into a callable function in MATLAB. + In order to use this function, you need to know a little +about mex. mex is simply the compiler you use to make your +code run in the MATLAB environment. If you don't know +how to use mex, you might want to acquaint yourself a bit. + +"mex -setup "might be needed if you encounter problems, but +the simplest explanation might be to substitute "mex" for +"gcc" and you are on your way. + +All the other rules for compiling PAPI are the same. mex +compilations can de done inside or outside of the Matlab +environment, but in this case, it is recommended that you +compile outside of Matlab. For some reason, compiling +inside does not work on some systems. + +So far, the Linux environment and the Windows environment +have been tested, but _in theory_ this code should work anywhere +PAPI and Matlab both work. + +The following instructions are for a Linux/Unix environment: + +Assuming papi.h is present in /usr/local/include and libpapi.a +is present in /usr/local/lib, the below should work. If not, +you may need to alter the compile strings and/or the #include +statement in PAPI_flops.c. Also, the compile string will be +different for different platforms. + +For instance, if I want to compile and run on a linux +machine assuming PAPI_flops.c is in your current working +directory (you'll have a different compile string on a +different architecture): + +1. Compile the wrapper: +mex -I/usr/local/include PAPI_flops.c /usr/local/lib/libpapi.a -output flops + +2. Start Matlab: +matlab + +3. Run the code: + +b. Play with FLOPS - the first initializes the counting; + the second returns the number of floating point instructions + since the first call, and the third line does the same as the + first AND reports the incremental rate of floating point + execution since the last call: +FLOPS(0) +ins = FLOPS +[ins, mflips] = FLOPS + +c. Try the example m files included with the distribution: +FlopsInnerProduct.m +FlopsMatrixVector.m +FlopsMatrixMatrix.m +FlopsSampler.m + +Contact jthomas@cs.utk.edu with any questions regarding +PAPI calls in Matlab - either errors or questions. +Also, this has just been implemented, so changes could +be coming.......... diff --git a/src/README b/src/README new file mode 100644 index 0000000..ea4cea4 --- /dev/null +++ b/src/README @@ -0,0 +1,8 @@ +/* +* File: papi/src/README +* CVS: $Id$ +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +Please see the README in the root directory. diff --git a/src/Rules.bgpm b/src/Rules.bgpm new file mode 100644 index 0000000..2d609db --- /dev/null +++ b/src/Rules.bgpm @@ -0,0 +1,23 @@ +# $Id: Rules.bgpm,v 1.1 2011/03/11 23:06:54 jagode Exp $ + +ifneq ($(USE_DEBUG),) + BGPM_LIBNAME = bgpm_debug + DEBUG_BGPM = "-DDEBUG_BGPM" +else + BGPM_LIBNAME = bgpm +endif + + +BGPM_OBJS=$(shell $(AR) t $(BGPM_INSTALL_DIR)/bgpm/lib/lib$(BGPM_LIBNAME).a && $(AR) 2>/dev/null) + +MISCOBJS = $(BGPM_OBJS) $(MISCSRCS:.c=.o) + +include Makefile.inc + +CFLAGS += -I$(BGPM_INSTALL_DIR) -I$(BGPM_INSTALL_DIR)/spi/include/kernel/cnk $(DEBUG_BGPM) +LDFLAGS += $(BGPM_INSTALL_DIR)/bgpm/lib/lib$(BGPM_LIBNAME).a -lrt -lstdc++ + + +$(BGPM_OBJS): + $(AR) xv $(BGPM_INSTALL_DIR)/bgpm/lib/lib$(BGPM_LIBNAME).a + diff --git a/src/Rules.perfctr b/src/Rules.perfctr new file mode 100644 index 0000000..d713c4c --- /dev/null +++ b/src/Rules.perfctr @@ -0,0 +1,110 @@ +DESCR = "Linux with PerfCtr $(VERSION) patch and library" + +ifneq (/usr,$(PERFCTR_PREFIX)) +PWD = $(shell pwd) +ifeq (,$(PERFCTR_LIB_PATH)) +ifeq (,$(PERFCTR_ROOT)) + PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) +endif + PERFCTR_LIB_PATH := $(PERFCTR_ROOT)/usr.lib + CC_SHR += -Wl,-rpath-link -Wl,$(PERFCTR_LIB_PATH) +endif +ifeq (,$(PERFCTR_INC_PATH)) +ifeq (,$(PERFCTR_ROOT)) + PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) +endif + PERFCTR_INC_PATH := $(PERFCTR_ROOT)/usr.lib + PERFCTR_KINC_PATH := $(PERFCTR_ROOT)/linux/include +else + PERFCTR_KINC_PATH := $(PERFCTR_INC_PATH) +endif +ifneq (/usr/include,$(PERFCTR_INC_PATH)) +CFLAGS += -I$(PERFCTR_INC_PATH) +endif +ifneq (/usr/include/linux/include,$(PERFCTR_KINC_PATH)) +CFLAGS += -I$(PERFCTR_KINC_PATH) +endif +endif + +CFLAGS-2.6.x := -DPERFCTR26 +CFLAGS-2.7.x := $(CFLAGS-2.6.x) +CFLAGS += $(CFLAGS-$(VERSION)) +MISCHDRS += linux-lock.h mb.h +MISCSRCS += $(ARCH_SPEC_EVTS) +SHLIBDEPS = -Bdynamic -L$(PERFCTR_LIB_PATH) -lperfctr +PERFCTR_OBJS = $(shell $(AR) t $(PERFCTR_LIB_PATH)/libperfctr.a 2> /dev/null) +MISCOBJS = $(PERFCTR_OBJS) $(MISCSRCS:.c=.o) + +ifeq (,$(PERFCTR_OBJS)) +$(PERFCTR_LIB_PATH)/libperfctr.a: +ifneq (,${PERFCTR_ROOT}) + ifeq (ppc64,${ARCH}) + $(MAKE) CC='$(CC) $(BITFLAGS)' -C $(PERFCTR_ROOT) + endif + ifeq (x86_64,${ARCH}) + ifeq (-m32,${BITFLAGS}) + $(MAKE) CC='$(CC) $(BITFLAGS)' LD_FLAGS="-melf_i386" ARCH="i386" -C $(PERFCTR_ROOT) + else + $(MAKE) -C $(PERFCTR_ROOT) + endif + else + $(MAKE) -C $(PERFCTR_ROOT) + endif +else + @echo '$@ not installed!'; exit 1 +endif + $(MAKE) +endif + +#Dynamic linking loader needs correct version of libperfctr +ifeq ($(VERSION),2.7.x) + SO_ABIVER=6 +else +ifeq ($(VERSION),2.6.x) + i386_ABIVER=5 + x86_64_ABIVER=6 + ppc_ABIVER=5 + SO_ABIVER=$($(ARCH)_ABIVER) +endif +endif + +POST_BUILD = cp $(PERFCTR_LIB_PATH)/libperfctr.so .;ln -fs libperfctr.so libperfctr.so.$(SO_ABIVER) + +include Makefile.inc + +config.h: + @echo 'Please clobber your build and run ./configure. Direct Makefile access has been deprecated.' + +$(PERFCTR_OBJS): $(PERFCTR_LIB_PATH)/libperfctr.a + $(AR) xv $< + +$(CPU)_events.o: $(CPU)_events.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c $(CPU)_events.c -o $@ + +#perfctr.o: perfctr.c $(HEADERS) +# $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c perfctr.c -o $@ + +native_clean: + -rm -f $(MISCOBJS) libperfctr.so* +ifneq (,${PERFCTR_ROOT}) + $(MAKE) -C $(PERFCTR_ROOT) clean +endif + +native_install: +ifeq ($(ARCH),ppc64) + -mkdir -p $(DESTDIR)$(DATADIR) + -chmod go+rx $(DESTDIR)$(DATADIR) + cp -Rf ./event_data $(DESTDIR)$(DATADIR) +endif +ifneq (,${PERFCTR_ROOT}) +ifeq (-m32,${BITFLAGS}) + $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) DESTDIR=$(DESTDIR) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) ARCH="i386" install +else + $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) DESTDIR=$(DESTDIR) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) install +endif +endif + +native_clobber: +ifneq (,${PERFCTR_ROOT}) + $(MAKE) -C $(PERFCTR_ROOT) distclean +endif diff --git a/src/Rules.perfctr-pfm b/src/Rules.perfctr-pfm new file mode 100644 index 0000000..afe25a3 --- /dev/null +++ b/src/Rules.perfctr-pfm @@ -0,0 +1,157 @@ +DESCR = "Linux with PerfCtr $(VERSION) patch and library, and perfmon2 libpfm library" + +ifneq (/usr,$(PERFCTR_PREFIX)) +PWD = $(shell pwd) +ifeq (,$(PERFCTR_LIB_PATH)) +ifeq (,$(PERFCTR_ROOT)) + PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) +endif + PERFCTR_LIB_PATH := $(PERFCTR_ROOT)/usr.lib + CC_SHR += -Wl,-rpath-link -Wl,$(PERFCTR_LIB_PATH) +endif +ifeq (,$(PERFCTR_INC_PATH)) +ifeq (,$(PERFCTR_ROOT)) + PERFCTR_ROOT := $(PWD)/perfctr-$(VERSION) +endif + PERFCTR_INC_PATH := $(PERFCTR_ROOT)/usr.lib + PERFCTR_KINC_PATH := $(PERFCTR_ROOT)/linux/include +else + PERFCTR_KINC_PATH := $(PERFCTR_INC_PATH) +endif +ifneq (/usr/include,$(PERFCTR_INC_PATH)) +CFLAGS += -I$(PERFCTR_INC_PATH) +endif +ifneq (/usr/include/linux/include,$(PERFCTR_KINC_PATH)) +CFLAGS += -I$(PERFCTR_KINC_PATH) +endif +endif + +CFLAGS-2.6.x := -DPERFCTR26 +CFLAGS-2.7.x := $(CFLAGS-2.6.x) +CFLAGS += $(CFLAGS-$(VERSION)) +MISCHDRS += linux-lock.h mb.h +MISCSRCS += $(ARCH_SPEC_EVTS) +PERFCTR_OBJS = $(shell $(AR) t $(PERFCTR_LIB_PATH)/libperfctr.a 2> /dev/null) + +ifeq (,$(PERFCTR_OBJS)) +$(PERFCTR_LIB_PATH)/libperfctr.a: +ifneq (,${PERFCTR_ROOT}) + ifeq (ppc64,${ARCH}) + $(MAKE) CC='$(CC) $(BITFLAGS)' -C $(PERFCTR_ROOT) + endif + ifeq (x86_64,${ARCH}) + ifeq (-m32,${BITFLAGS}) + $(MAKE) CC='$(CC) $(BITFLAGS)' LD_FLAGS="-melf_i386" ARCH="i386" -C $(PERFCTR_ROOT) + else + $(MAKE) CC="$(CC)" -C $(PERFCTR_ROOT) + endif + else + $(MAKE) CC="$(CC) -C $(PERFCTR_ROOT) + endif +else + @echo '$@ not installed!'; exit 1 +endif + $(MAKE) +endif + +#Dynamic linking loader needs correct version of libperfctr +ifeq ($(VERSION),2.7.x) + SO_ABIVER=6 +else +ifeq ($(VERSION),2.6.x) + i386_ABIVER=5 + x86_64_ABIVER=6 + ppc_ABIVER=5 + SO_ABIVER=$($(ARCH)_ABIVER) +endif +endif + +POST_BUILD = cp $(PERFCTR_LIB_PATH)/libperfctr.so .;ln -fs libperfctr.so libperfctr.so.$(SO_ABIVER) + +#################################################################### +# This section builds libpfm to get at its support for native events +ifneq (/usr,$(PFM_PREFIX)) +PWD = $(shell pwd) +ifeq (,$(PFM_LIB_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm-3.y +endif + PFM_LIB_PATH := $(PFM_ROOT)/lib + CC_SHR += -Wl,-rpath-link -Wl,$(PFM_LIB_PATH) +endif +ifeq (,$(PFM_INC_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm-3.y +endif + PFM_INC_PATH := $(PFM_ROOT)/include +endif +ifneq (/usr/include,$(PFM_INC_PATH)) +CFLAGS += -I$(PFM_INC_PATH) +endif +endif + +CFLAGS += -DPERFCTR_PFM_EVENTS +PFM_OBJS=$(shell $(AR) t $(PFM_LIB_PATH)/libpfm.a 2>/dev/null) +PFM_LIBS=-L$(PFM_LIB_PATH) -lpfm +MISCSRCS += papi_libpfm3_events.c +MISCOBJS += $(PERFCTR_OBJS) $(PFM_OBJS) $(MISCSRCS:.c=.o) +MISCHDRS += papi_libpfm_events.h +SHLIBDEPS = -Bdynamic -L$(PERFCTR_LIB_PATH) -lperfctr $(PFM_LIBS) + +ifeq (,$(PFM_OBJS)) +$(PFM_LIB_PATH)/libpfm.a: + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC)" lib + $(MAKE) +endif +#################################################################### + +include Makefile.inc + +config.h: + @echo 'Please clobber your build and run ./configure. Direct Makefile access has been deprecated.' + +$(PERFCTR_OBJS): $(PERFCTR_LIB_PATH)/libperfctr.a + $(AR) xv $< + +$(PFM_OBJS): $(PFM_LIB_PATH)/libpfm.a + $(AR) xv $< + +#perfctr.o: perfctr.c $(HEADERS) +# $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c perfctr.c -o $@ + +papi_libpfm3_events.o: papi_libpfm3_events.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_libpfm3_events.c -o $@ + +native_clean: + -rm -f $(MISCOBJS) libperfctr.so* +ifneq (,${PERFCTR_ROOT}) + $(MAKE) -C $(PERFCTR_ROOT) clean +endif +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" clean +endif + +native_install: + -install -d $(DESTDIR)$(DATADIR) + -cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) +ifeq ($(ARCH),ppc64) + cp -Rf ./event_data $(DESTDIR)$(DATADIR) +endif +ifneq (,${PFM_ROOT}) + -$(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC=$(CC) DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install +endif +ifneq (,${PERFCTR_ROOT}) +ifeq (-m32,${BITFLAGS}) + $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) ARCH="i386" install +else + $(MAKE) -C $(PERFCTR_ROOT) PREFIX=$(DESTDIR)$(PREFIX) BINDIR=$(DESTDIR)$(BINDIR) LIBDIR=$(DESTDIR)$(LIBDIR) INCLDIR=$(DESTDIR)$(INCDIR) install +endif +endif + +native_clobber: +ifneq (,${PERFCTR_ROOT}) + $(MAKE) -C $(PERFCTR_ROOT) distclean +endif +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" distclean +endif diff --git a/src/Rules.perfmon2 b/src/Rules.perfmon2 new file mode 100644 index 0000000..4f86b6c --- /dev/null +++ b/src/Rules.perfmon2 @@ -0,0 +1,72 @@ +DESCR = "Linux with perfmon2 kernel support and library" + +ifneq (/usr,$(PFM_PREFIX)) +PWD = $(shell pwd) +ifeq (,$(PFM_LIB_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm-3.y +endif + PFM_LIB_PATH := $(PFM_ROOT)/lib + CC_SHR += -Wl,-rpath-link -Wl,$(PFM_LIB_PATH) +endif +ifeq (,$(PFM_INC_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm-3.y +endif + PFM_INC_PATH := $(PFM_ROOT)/include +endif +ifneq (/usr/include,$(PFM_INC_PATH)) +CFLAGS += -I$(PFM_INC_PATH) +endif +endif + +MISCHDRS += linux-lock.h mb.h papi_libpfm_events.h +MISCSRCS += papi_libpfm3_events.c +SHLIBDEPS = -Bdynamic -L$(PFM_LIB_PATH) -lpfm +PFM_OBJS=$(shell $(AR) t $(PFM_LIB_PATH)/libpfm.a 2>/dev/null) +MISCOBJS = $(PFM_OBJS) $(MISCSRCS:.c=.o) + +ifeq (,$(PFM_OBJS)) +$(PFM_LIB_PATH)/libpfm.a: +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC) $(BITFLAGS)" CONFIG_PFMLIB_OLD_PFMV2="$(PFM_OLD_PFMV2)" lib +else + @echo '$@ not installed!'; exit 1 +endif + $(MAKE) +endif + +include Makefile.inc + +config.h: + @echo 'Please clobber your build and run ./configure." + +$(PFM_OBJS): $(PFM_LIB_PATH)/libpfm.a + $(AR) xv $< + +papi_libpfm3_events.o: papi_libpfm3_events.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_libpfm3_events.c -o $@ + +native_clean: + -rm -f $(MISCOBJS) +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" clean +endif + +native_install: +ifneq (,${PFM_ROOT}) + -$(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CONFIG_PFMLIB_OLD_PFMV2="$(PFM_OLD_PFMV2)" DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install +endif + -install -d $(DESTDIR)$(LIBDIR) +ifneq (,$(findstring shared,$(LIBS))) + cp -p $(SHLIB) $(DESTDIR)$(LIBDIR)/libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) + cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so.$(PAPIVER) + cd $(DESTDIR)$(LIBDIR); ln -sf libpapi.so.$(PAPIVER).$(PAPIREV).$(PAPIAGE).$(PAPIINC) libpapi.so +endif + -install -d $(DESTDIR)$(DATADIR) + cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) + +native_clobber: +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" distclean +endif diff --git a/src/Rules.pfm b/src/Rules.pfm new file mode 100644 index 0000000..5a9a7d5 --- /dev/null +++ b/src/Rules.pfm @@ -0,0 +1,67 @@ +# $Id$ + +DESCR = "Linux with PFM $(VERSION) kernel support and library" + +ifneq (,$(wildcard /etc/sgi-release)) + PFM_PREFIX ?= /usr + ALTIX ?= -DALTIX +endif + +ifeq (,$(PFM_LIB_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := ./libpfm-$(VERSION) +endif + PFM_LIB_PATH := $(PFM_ROOT)/lib +endif +ifeq (,$(PFM_INC_PATH)) +ifeq (,$(PFM_ROOT)) + PFM_ROOT := ./libpfm-$(VERSION) +endif + PFM_INC_PATH := $(PFM_ROOT)/include +endif + +OPTIM := $(CFLAGS) +CFLAGS-3.y := -DPFM30 +CFLAGS += -I$(PFM_INC_PATH) $(ALTIX) $(CFLAGS-$(VERSION)) +MISCHDRS += linux-lock.h mb.h +SHLIBDEPS = -Bdynamic -L$(PFM_LIB_PATH) -lpfm +PFM_OBJS = $(shell $(AR) t $(PFM_LIB_PATH)/libpfm.a 2>/dev/null) +MISCOBJS = $(PFM_OBJS) $(MISCSRCS:.c=.o) + +ifeq (,$(PFM_OBJS)) +$(PFM_LIB_PATH)/libpfm.a: +ifneq (,${PFM_ROOT}) +ifeq (1, $(HAVE_NO_OVERRIDE_INIT)) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC)" OPTIM="$(OPTIM)" CONFIG_PFMLIB_OLD_PFMV2="$(PFM_OLD_PFMV2)" -Wno-override-init lib +else + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC)" OPTIM="$(OPTIM)" CONFIG_PFMLIB_OLD_PFMV2="$(PFM_OLD_PFMV2)" lib +endif +else + @echo '$@ not installed!'; exit 1 +endif + $(MAKE) +endif + +include Makefile.inc + +config.h: + @echo 'Please clobber your build and run ./configure." + +$(PFM_OBJS): + $(AR) xv $(PFM_LIB_PATH)/libpfm.a + +native_clean: + -rm -f $(MISCOBJS) +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) clean +endif + +native_install: +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) CONFIG_PFMLIB_OLD_PFMV2="$(PFM_OLD_PFMV2)" DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install +endif + +native_clobber: +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) distclean +endif diff --git a/src/Rules.pfm4_pe b/src/Rules.pfm4_pe new file mode 100644 index 0000000..61eedc8 --- /dev/null +++ b/src/Rules.pfm4_pe @@ -0,0 +1,83 @@ + +DESCR = "Linux with perf_event kernel support and libpfm4" + +ifneq (/usr,$(PFM_PREFIX)) + PWD = $(shell pwd) + ifeq (,$(PFM_LIB_PATH)) + ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm4 + endif + PFM_LIB_PATH := $(PFM_ROOT)/lib + CC_SHR += -Wl,-rpath-link -Wl,$(PFM_LIB_PATH) + endif + ifeq (,$(PFM_INC_PATH)) + ifeq (,$(PFM_ROOT)) + PFM_ROOT := $(PWD)/libpfm4 + endif + PFM_INC_PATH := $(PFM_ROOT)/include + endif + ifneq (/usr/include,$(PFM_INC_PATH)) + LIBCFLAGS += -I$(PFM_INC_PATH) + endif +endif + +LIBCFLAGS += -fvisibility=hidden +MISCHDRS += linux-lock.h mb.h papi_libpfm4_events.h +MISCSRCS += papi_libpfm4_events.c +SHLIBDEPS = -Bdynamic -L$(PFM_LIB_PATH) -lpfm +PFM_OBJS=$(shell $(AR) t $(PFM_LIB_PATH)/libpfm.a 2>/dev/null) +MISCOBJS = $(PFM_OBJS) $(MISCSRCS:.c=.o) + +ifeq (yes,$(MIC)) + FORCE_PFM_ARCH="CONFIG_PFMLIB_ARCH_X86=y" +endif + +ifeq (,$(PFM_OBJS)) +$(PFM_LIB_PATH)/libpfm.a: +ifneq (,${PFM_ROOT}) +ifeq ("$(CC_COMMON_NAME)","icc") + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC) $(BITFLAGS)" DBG="-g -Wall -Werror" $(FORCE_PFM_ARCH) lib +else +ifeq (1,$(HAVE_NO_OVERRIDE_INIT)) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC) $(BITFLAGS) -Wno-override-init" $(FORCE_PFM_ARCH) lib +else + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" CC="$(CC) $(BITFLAGS)" $(FORCE_PFM_ARCH) lib +endif +endif +else + @echo '$@ not installed!'; exit 1 +endif + $(MAKE) +endif + +include Makefile.inc + +config.h: + @echo 'Please clobber your build and run ./configure." + +$(PFM_OBJS): $(PFM_LIB_PATH)/libpfm.a + $(AR) xv $< + +papi_libpfm4_events.o: papi_libpfm4_events.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c papi_libpfm4_events.c -o $@ + +native_clean: + -rm -f $(MISCOBJS) +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" clean +endif + +native_install: +ifneq (,${PFM_ROOT}) + -$(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" DESTDIR=$(DESTDIR) PREFIX=$(PREFIX) install_prefix=$(PREFIX) LIBDIR=$(LIBDIR) INCDIR=$(INCDIR) MANDIR=$(MANDIR) install +endif + -install -d $(DESTDIR)$(LIBDIR) +# Makefile.inc already has installation of shared libraries so +# there is no need to do it here + -install -d $(DESTDIR)$(DATADIR) + cp -f ./papi_events.csv $(DESTDIR)$(DATADIR) + +native_clobber: +ifneq (,${PFM_ROOT}) + $(MAKE) -C $(PFM_ROOT) ARCH="$(ARCH)" distclean +endif diff --git a/src/aix-context.h b/src/aix-context.h new file mode 100644 index 0000000..c88e3f3 --- /dev/null +++ b/src/aix-context.h @@ -0,0 +1,15 @@ +#ifndef _PAPI_AIX_CONTEXT_H +#define _PAPI_AIX_CONTEXT_H + +/* overflow */ +/* Override void* definitions from PAPI framework layer */ +/* with typedefs to conform to PAPI component layer code. */ +#undef hwd_siginfo_t +#undef hwd_ucontext_t +typedef siginfo_t hwd_siginfo_t; +typedef struct sigcontext hwd_ucontext_t; + +#define GET_OVERFLOW_ADDRESS(ctx) (void *)(((hwd_ucontext_t *)(ctx->ucontext))->sc_jmpbuf.jmp_context.iar) + +#endif /* _PAPI_AIX_CONTEXT */ + diff --git a/src/aix-lock.h b/src/aix-lock.h new file mode 100644 index 0000000..1c0c13f --- /dev/null +++ b/src/aix-lock.h @@ -0,0 +1,15 @@ +#include + +/* Locks */ +extern atomic_p lock[]; + +#define _papi_hwd_lock(lck) \ +{ \ + while(_check_lock(lock[lck],0,1) == TRUE) { ; } \ +} + +#define _papi_hwd_unlock(lck) \ +{ \ + _clear_lock(lock[lck], 0); \ +} + diff --git a/src/aix-memory.c b/src/aix-memory.c new file mode 100644 index 0000000..f7d4d80 --- /dev/null +++ b/src/aix-memory.c @@ -0,0 +1,102 @@ +/* +* File: aix-memory.c +* Author: Kevin London +* london@cs.utk.edu +* +* Mods: +* +*/ + +#include "papi.h" +#include "papi_internal.h" + +#include "aix.h" + +int +_aix_get_memory_info( PAPI_hw_info_t * mem_info, int type ) +{ + PAPI_mh_level_t *L = mem_info->mem_hierarchy.level; + + /* Not quite sure what bit 30 indicates. + I'm assuming it flags a unified tlb */ + if ( _system_configuration.tlb_attrib & ( 1 << 30 ) ) { + L[0].tlb[0].type = PAPI_MH_TYPE_UNIFIED; + L[0].tlb[0].num_entries = _system_configuration.itlb_size; + L[0].tlb[0].type = PAPI_MH_TYPE_UNIFIED; + } else { + L[0].tlb[0].type = PAPI_MH_TYPE_INST; + L[0].tlb[0].num_entries = _system_configuration.itlb_size; + L[0].tlb[0].associativity = _system_configuration.itlb_asc; + L[0].tlb[1].type = PAPI_MH_TYPE_DATA; + L[0].tlb[1].num_entries = _system_configuration.dtlb_size; + L[0].tlb[1].associativity = _system_configuration.dtlb_asc; + } + /* Not quite sure what bit 30 indicates. + I'm assuming it flags a unified cache */ + if ( _system_configuration.cache_attrib & ( 1 << 30 ) ) { + L[0].cache[0].type = PAPI_MH_TYPE_UNIFIED; + L[0].cache[0].size = _system_configuration.icache_size; + L[0].cache[0].associativity = _system_configuration.icache_asc; + L[0].cache[0].line_size = _system_configuration.icache_line; + } else { + L[0].cache[0].type = PAPI_MH_TYPE_INST; + L[0].cache[0].size = _system_configuration.icache_size; + L[0].cache[0].associativity = _system_configuration.icache_asc; + L[0].cache[0].line_size = _system_configuration.icache_line; + L[0].cache[1].type = PAPI_MH_TYPE_DATA; + L[0].cache[1].size = _system_configuration.dcache_size; + L[0].cache[1].associativity = _system_configuration.dcache_asc; + L[0].cache[1].line_size = _system_configuration.dcache_line; + } + L[1].cache[0].type = PAPI_MH_TYPE_UNIFIED; + L[1].cache[0].size = _system_configuration.L2_cache_size; + L[1].cache[0].associativity = _system_configuration.L2_cache_asc; + /* is there a line size for Level 2 cache? */ + + /* it looks like we've always got at least 2 levels of info */ + /* what about level 3 cache? */ + mem_info->mem_hierarchy.levels = 2; + + return PAPI_OK; +} + +int +_aix_get_dmem_info( PAPI_dmem_info_t * d ) +{ + /* This function has been reimplemented + to conform to current interface. + It has not been tested. + Nor has it been confirmed for completeness. + dkt 05-10-06 + */ + + struct procsinfo pi; + pid_t mypid = getpid( ); + pid_t pid; + int found = 0; + + pid = 0; + while ( 1 ) { + if ( getprocs( &pi, sizeof ( pi ), 0, 0, &pid, 1 ) != 1 ) + break; + if ( mypid == pi.pi_pid ) { + found = 1; + break; + } + } + if ( !found ) + return ( PAPI_ESYS ); + + d->size = pi.pi_size; + d->resident = pi.pi_drss + pi.pi_trss; + d->high_water_mark = PAPI_EINVAL; + d->shared = PAPI_EINVAL; + d->text = pi.pi_trss; /* this is a guess */ + d->library = PAPI_EINVAL; + d->heap = PAPI_EINVAL; + d->locked = PAPI_EINVAL; + d->stack = PAPI_EINVAL; + d->pagesize = getpagesize( ); + + return ( PAPI_OK ); +} diff --git a/src/aix.c b/src/aix.c new file mode 100644 index 0000000..a44d29f --- /dev/null +++ b/src/aix.c @@ -0,0 +1,1296 @@ +/* This file handles the OS dependent part of the POWER5 and POWER6 architectures. + It supports both AIX 4 and AIX 5. The switch between AIX 4 and 5 is driven by the + system defined value _AIX_VERSION_510. + Other routines also include minor conditionally compiled differences. +*/ + +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_lock.h" + +#include "papi_memory.h" + +#include "extras.h" + +#include "aix.h" +#include "papi_vector.h" + +/* Advance declarations */ +papi_vector_t _aix_vector; + +/* Locking variables */ +volatile int lock_var[PAPI_MAX_LOCK] = { 0 }; +atomic_p lock[PAPI_MAX_LOCK]; + +/* + some heap information, start_of_text, start_of_data ..... + ref: http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixprggd/genprogc/sys_mem_alloc.htm#HDRA9E4A4C9921SYLV +*/ +#define START_OF_TEXT &_text +#define END_OF_TEXT &_etext +#define START_OF_DATA &_data +#define END_OF_DATA &_edata +#define START_OF_BSS &_edata +#define END_OF_BSS &_end + +static int maxgroups = 0; +struct utsname AixVer; + +native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; + +hwd_pminfo_t pminfo; +pm_groups_info_t pmgroups; +native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; +PPC64_native_map_t native_name_map[PAPI_MAX_NATIVE_EVENTS]; +hwd_groups_t group_map[MAX_GROUPS] = { 0 }; + +/* to initialize the native_table */ +void +aix_initialize_native_table( ) +{ + int i, j; + + memset( native_table, 0, + PAPI_MAX_NATIVE_EVENTS * sizeof ( native_event_entry_t ) ); + memset( native_name_map, 0, + PAPI_MAX_NATIVE_EVENTS * sizeof ( PPC64_native_map_t ) ); + for ( i = 0; i < PAPI_MAX_NATIVE_EVENTS; i++ ) { + native_name_map[i].index = -1; + for ( j = 0; j < MAX_COUNTERS; j++ ) + native_table[i].resources.counter_cmd[j] = -1; + } +} + +/* to setup native_table group value */ +static void +aix_ppc64_setup_gps( int total ) +{ + int i, j, gnum; + + for ( i = 0; i < total; i++ ) { + for ( j = 0; j < MAX_COUNTERS; j++ ) { + /* native_table[i].resources.rgg[j]=-1; */ + if ( native_table[i].resources.selector & ( 1 << j ) ) { + for ( gnum = 0; gnum < pmgroups.maxgroups; gnum++ ) { + if ( native_table[i].resources.counter_cmd[j] == + pmgroups.event_groups[gnum].events[j] ) { + /* could use gnum instead of pmgroups.event_groups[gnum].group_id */ + native_table[i].resources.group[pmgroups. + event_groups[gnum]. + group_id / 32] |= + 1 << ( pmgroups.event_groups[gnum].group_id % 32 ); + } + } + } + } + } + + for ( gnum = 0; gnum < pmgroups.maxgroups; gnum++ ) { + for ( i = 0; i < MAX_COUNTERS; i++ ) { + /*group_map[gnum].counter_cmd[i] = pmgroups.event_groups[gnum].events[i]; */ + if (pmgroups.event_groups[gnum].group_id >=MAX_GROUPS) { + fprintf(stderr,"ERROR, group number trying to go past MAX GROUPS\n"); + continue; + } + + group_map[pmgroups.event_groups[gnum].group_id].counter_cmd[i] = + pmgroups.event_groups[gnum].events[i]; + } + } +} + +/* to setup native_table values, and return number of entries */ +int +aix_ppc64_setup_native_table( ) +{ + hwd_pmevents_t *wevp; + hwd_pminfo_t *info; + int pmc, ev, i, j, index; + + info = &pminfo; + index = 0; + aix_initialize_native_table( ); + for ( pmc = 0; pmc < info->maxpmcs; pmc++ ) { + wevp = info->list_events[pmc]; + for ( ev = 0; ev < info->maxevents[pmc]; ev++, wevp++ ) { + for ( i = 0; i < index; i++ ) { + if ( strcmp( wevp->short_name, native_table[i].name ) == 0 ) { + native_table[i].resources.selector |= 1 << pmc; + native_table[i].resources.counter_cmd[pmc] = wevp->event_id; + break; + } + } + if ( i == index ) { + /*native_table[i].index=i; */ + native_table[i].resources.selector |= 1 << pmc; + native_table[i].resources.counter_cmd[pmc] = wevp->event_id; + native_table[i].name = wevp->short_name; + native_table[i].description = wevp->description; + native_name_map[i].name = native_table[i].name; + native_name_map[i].index = i; + index++; + } + } + } + aix_ppc64_setup_gps( index ); + + return index; +} + +/* Reports the elements of the hwd_register_t struct as an array of names and a matching array of values. + Maximum string length is name_len; Maximum number of values is count. +*/ +static void +copy_value( unsigned int val, char *nam, char *names, unsigned int *values, + int len ) +{ + *values = val; + strncpy( names, nam, len ); + names[len - 1] = '\0'; +} + + +/* this function recusively does Modified Bipartite Graph counter allocation + success return 1 + fail return 0 +*/ +static int +do_counter_allocation( ppc64_reg_alloc_t * event_list, int size ) +{ + int i, j, group = -1; + unsigned int map[GROUP_INTS]; + + for ( i = 0; i < GROUP_INTS; i++ ) + map[i] = event_list[0].ra_group[i]; + + for ( i = 1; i < size; i++ ) { + for ( j = 0; j < GROUP_INTS; j++ ) + map[j] &= event_list[i].ra_group[j]; + } + + for ( i = 0; i < GROUP_INTS; i++ ) { + if ( map[i] ) { + group = ffs( map[i] ) - 1 + i * 32; + break; + } + } + + if ( group < 0 ) + return group; /* allocation fail */ + else { + for ( i = 0; i < size; i++ ) { + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( event_list[i].ra_counter_cmd[j] >= 0 + && event_list[i].ra_counter_cmd[j] == + group_map[group].counter_cmd[j] ) + event_list[i].ra_position = j; + } + } + return group; + } +} + + +/* this function will be called when there are counters available + success return 1 + fail return 0 +*/ +int +_aix_allocate_registers( EventSetInfo_t * ESI ) +{ + hwd_control_state_t *this_state = ESI->ctl_state; + unsigned char selector; + int i, j, natNum, index; + ppc64_reg_alloc_t event_list[MAX_COUNTERS]; + int position, group; + + + /* not yet successfully mapped, but have enough slots for events */ + + /* Initialize the local structure needed + for counter allocation and optimization. */ + natNum = ESI->NativeCount; + for ( i = 0; i < natNum; i++ ) { + /* CAUTION: Since this is in the hardware layer, it's ok + to access the native table directly, but in general this is a bad idea */ + event_list[i].ra_position = -1; + /* calculate native event rank, which is number of counters it can live on, this is power3 specific */ + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( ( index = + native_name_map[ESI->NativeInfoArray[i]. + ni_event & PAPI_NATIVE_AND_MASK].index ) < + 0 ) + return PAPI_ECNFLCT; + event_list[i].ra_counter_cmd[j] = + native_table[index].resources.counter_cmd[j]; + } + for ( j = 0; j < GROUP_INTS; j++ ) { + if ( ( index = + native_name_map[ESI->NativeInfoArray[i]. + ni_event & PAPI_NATIVE_AND_MASK].index ) < + 0 ) + return PAPI_ECNFLCT; + event_list[i].ra_group[j] = native_table[index].resources.group[j]; + } + /*event_list[i].ra_mod = -1; */ + } + + if ( ( group = do_counter_allocation( event_list, natNum ) ) >= 0 ) { /* successfully mapped */ + /* copy counter allocations info back into NativeInfoArray */ + this_state->group_id = group; + for ( i = 0; i < natNum; i++ ) + ESI->NativeInfoArray[i].ni_position = event_list[i].ra_position; + /* update the control structure based on the NativeInfoArray */ + /*_papi_hwd_update_control_state(this_state, ESI->NativeInfoArray, natNum);*/ + return PAPI_OK; + } else { + return PAPI_ECNFLCT; + } +} + +int +_aix_init_control_state( hwd_control_state_t * ptr ) +{ + int i; + + for ( i = 0; i < _aix_vector.cmp_info.num_cntrs; i++ ) { + ptr->counter_cmd.events[i] = COUNT_NOTHING; + } + ptr->counter_cmd.mode.b.is_group = 1; + + _aix_vector.set_domain( ptr, _aix_vector.cmp_info.default_domain ); + _aix_set_granularity( ptr, _aix_vector.cmp_info.default_granularity ); + /*setup_native_table(); */ + return ( PAPI_OK ); +} + + +/* This function updates the control structure with whatever resources are allocated + for all the native events in the native info structure array. */ +int +_aix_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * context ) +{ + + this_state->counter_cmd.events[0] = this_state->group_id; + return PAPI_OK; +} + + +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ + /* The following is for any POWER hardware */ + +/* Trims trailing blank space and line endings from a string (in place). + Returns pointer to start address */ +static char * +trim_string( char *in ) +{ + int len, i = 0; + char *start = in; + + if ( in == NULL ) + return ( in ); + len = strlen( in ); + if ( len == 0 ) + return ( in ); + /* Trim right */ + i = strlen( start ) - 1; + while ( i >= 0 ) { + if ( isblank( start[i] ) || ( start[i] == '\r' ) || + ( start[i] == '\n' ) ) + start[i] = '\0'; + else + break; + i--; + } + return ( start ); +} + + +/* Routines to support an opaque native event table */ +int +_aix_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) +{ + if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= + _aix_vector.cmp_info.num_native_events ) + return ( PAPI_ENOEVNT ); + strncpy( ntv_name, + native_name_map[EventCode & PAPI_NATIVE_AND_MASK].name, len ); + trim_string( ntv_name ); + if ( strlen( native_name_map[EventCode & PAPI_NATIVE_AND_MASK].name ) > + len - 1 ) + return ( PAPI_EBUF ); + return ( PAPI_OK ); +} + +int +_aix_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len ) +{ + if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= + _aix_vector.cmp_info.num_native_events ) + return ( PAPI_ENOEVNT ); + strncpy( ntv_descr, + native_table[native_name_map[EventCode & PAPI_NATIVE_AND_MASK]. + index].description, len ); + trim_string( ntv_descr ); + if ( strlen + ( native_table + [native_name_map[EventCode & PAPI_NATIVE_AND_MASK].index]. + description ) > len - 1 ) + return ( PAPI_EBUF ); + return ( PAPI_OK ); +} + +int +_aix_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ + bits = &native_table[EventCode & PAPI_NATIVE_AND_MASK].resources; /* it is not right, different type */ + return ( PAPI_OK ); +} + +/* this function return the next native event code. + modifier = PAPI_ENUM_FIRST returns first native event code + modifier = PAPI_ENUM_EVENTS returns next native event code + modifier = PAPI_NTV_ENUM_GROUPS return groups in which this + native event lives, in bits 16 - 23 of event code + terminating with PAPI_ENOEVNT at the end of the list. + function return value: + PAPI_OK successful, event code is valid + PAPI_EINVAL bad modifier + PAPI_ENOEVNT end of list or fail, event code is invalid +*/ +int +_aix_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + if ( modifier == PAPI_ENUM_FIRST ) { + *EventCode = PAPI_NATIVE_MASK; + return ( PAPI_OK ); + } + if ( modifier == PAPI_ENUM_EVENTS ) { + int index = *EventCode & PAPI_NATIVE_AND_MASK; + + if ( native_table[index + 1].resources.selector ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + } else if ( modifier == PAPI_NTV_ENUM_GROUPS ) { +#if defined(_POWER5) || defined(_POWER6) + unsigned int group = + ( *EventCode & PAPI_NTV_GROUP_AND_MASK ) >> PAPI_NTV_GROUP_SHIFT; + int index = *EventCode & 0x000000FF; + int i; + unsigned int tmpg; + + *EventCode = *EventCode & ( ~PAPI_NTV_GROUP_SHIFT ); + for ( i = 0; i < GROUP_INTS; i++ ) { + tmpg = native_table[index].resources.group[i]; + if ( group != 0 ) { + while ( ( ffs( tmpg ) + i * 32 ) <= group && tmpg != 0 ) + tmpg = tmpg ^ ( 1 << ( ffs( tmpg ) - 1 ) ); + } + if ( tmpg != 0 ) { + group = ffs( tmpg ) + i * 32; + *EventCode = *EventCode | ( group << PAPI_NTV_GROUP_SHIFT ); + return ( PAPI_OK ); + } + } +#endif + return ( PAPI_ENOEVNT ); + } else + return ( PAPI_EINVAL ); +} + +static void +set_config( hwd_control_state_t * ptr, int arg1, int arg2 ) +{ + ptr->counter_cmd.events[arg1] = arg2; +} + +static void +unset_config( hwd_control_state_t * ptr, int arg1 ) +{ + ptr->counter_cmd.events[arg1] = 0; +} + +int +init_domain( ) +{ + int domain = 0; + + domain = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_OTHER; +#ifdef PM_INITIALIZE +#ifdef _AIXVERSION_510 + if ( pminfo.proc_feature.b.hypervisor ) { + domain |= PAPI_DOM_SUPERVISOR; + } +#endif +#endif + return ( domain ); +} + +static int +_aix_set_domain( hwd_control_state_t * this_state, int domain ) +{ + pm_mode_t *mode = &( this_state->counter_cmd.mode ); + int did = 0; + + mode->b.user = 0; + mode->b.kernel = 0; + if ( domain & PAPI_DOM_USER ) { + did++; + mode->b.user = 1; + } + if ( domain & PAPI_DOM_KERNEL ) { + did++; + mode->b.kernel = 1; + } +#ifdef PM_INITIALIZE +#ifdef _AIXVERSION_510 + if ( ( domain & PAPI_DOM_SUPERVISOR ) && pminfo.proc_feature.b.hypervisor ) { + did++; + mode->b.hypervisor = 1; + } +#endif +#endif + if ( did ) + return ( PAPI_OK ); + else + return ( PAPI_EINVAL ); +/* + switch (domain) + { + case PAPI_DOM_USER: + mode->b.user = 1; + mode->b.kernel = 0; + break; + case PAPI_DOM_KERNEL: + mode->b.user = 0; + mode->b.kernel = 1; + break; + case PAPI_DOM_ALL: + mode->b.user = 1; + mode->b.kernel = 1; + break; + default: + return(PAPI_EINVAL); + } + return(PAPI_OK); +*/ +} + +int +_aix_set_granularity( hwd_control_state_t * this_state, int domain ) +{ + pm_mode_t *mode = &( this_state->counter_cmd.mode ); + + switch ( domain ) { + case PAPI_GRN_THR: + mode->b.process = 0; + mode->b.proctree = 0; + break; + /* case PAPI_GRN_PROC: + mode->b.process = 1; + mode->b.proctree = 0; + break; + case PAPI_GRN_PROCG: + mode->b.process = 0; + mode->b.proctree = 1; + break; */ + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_OK ); +} + +static int +set_default_domain( EventSetInfo_t * zero, int domain ) +{ + hwd_control_state_t *current_state = zero->ctl_state; + return ( _aix_set_domain( current_state, domain ) ); +} + +static int +set_default_granularity( EventSetInfo_t * zero, int granularity ) +{ + hwd_control_state_t *current_state = zero->ctl_state; + return ( _aix_set_granularity( current_state, granularity ) ); +} + +/* Initialize the system-specific settings */ +/* Machine info structure. -1 is unused. */ +int +_aix_mdi_init( ) +{ + int retval; + + if ( ( retval = uname( &AixVer ) ) < 0 ) + return ( PAPI_ESYS ); + if ( AixVer.version[0] == '4' ) { + _papi_hwi_system_info.exe_info.address_info.text_start = + ( caddr_t ) START_OF_TEXT; + _papi_hwi_system_info.exe_info.address_info.text_end = + ( caddr_t ) END_OF_TEXT; + _papi_hwi_system_info.exe_info.address_info.data_start = + ( caddr_t ) START_OF_DATA; + _papi_hwi_system_info.exe_info.address_info.data_end = + ( caddr_t ) END_OF_DATA; + _papi_hwi_system_info.exe_info.address_info.bss_start = + ( caddr_t ) START_OF_BSS; + _papi_hwi_system_info.exe_info.address_info.bss_end = + ( caddr_t ) END_OF_BSS; + } else { + _aix_update_shlib_info( &_papi_hwi_system_info ); + } + +/* _papi_hwi_system_info.supports_64bit_counters = 1; + _papi_hwi_system_info.supports_real_usec = 1; + _papi_hwi_system_info.sub_info.fast_real_timer = 1; + _papi_hwi_system_info.sub_info->available_domains = init_domain();*/ + + + return ( PAPI_OK ); +} + + +static int +_aix_get_system_info( papi_mdi_t *mdi ) +{ + int retval; + /* pm_info_t pminfo; */ + struct procsinfo psi = { 0 }; + pid_t pid; + char maxargs[PAPI_HUGE_STR_LEN]; + char pname[PAPI_HUGE_STR_LEN]; + + pid = getpid( ); + if ( pid == -1 ) + return ( PAPI_ESYS ); + _papi_hwi_system_info.pid = pid; + psi.pi_pid = pid; + retval = getargs( &psi, sizeof ( psi ), maxargs, PAPI_HUGE_STR_LEN ); + if ( retval == -1 ) + return ( PAPI_ESYS ); + + if ( realpath( maxargs, pname ) ) + strncpy( _papi_hwi_system_info.exe_info.fullname, pname, + PAPI_HUGE_STR_LEN ); + else + strncpy( _papi_hwi_system_info.exe_info.fullname, maxargs, + PAPI_HUGE_STR_LEN ); + + strcpy( _papi_hwi_system_info.exe_info.address_info.name, + basename( maxargs ) ); + +#ifdef _POWER7 + /* we pass PM_POWER7 for the same reasons as below (power6 case) */ + retval = pm_initialize( PM_INIT_FLAGS , &pminfo, &pmgroups, PM_POWER7); +#elif defined(_POWER6) + /* problem with pm_initialize(): it cannot be called multiple times with + PM_CURRENT; use instead the actual proc type - here PM_POWER6 - + and multiple invocations are no longer a problem */ + retval = pm_initialize( PM_INIT_FLAGS, &pminfo, &pmgroups, PM_POWER6 ); +#else +#ifdef _AIXVERSION_510 +#ifdef PM_INITIALIZE + SUBDBG( "Calling AIX 5 version of pm_initialize...\n" ); +/*#if defined(_POWER5) + retval = pm_initialize(PM_INIT_FLAGS, &pminfo, &pmgroups, PM_POWER5); +#endif*/ + retval = pm_initialize( PM_INIT_FLAGS, &pminfo, &pmgroups, PM_CURRENT ); +#else + SUBDBG( "Calling AIX 5 version of pm_init...\n" ); + retval = pm_init( PM_INIT_FLAGS, &pminfo, &pmgroups ); +#endif + +#else + SUBDBG( "Calling AIX 4 version of pm_init...\n" ); + retval = pm_init( PM_INIT_FLAGS, &pminfo ); +#endif +#endif + SUBDBG( "...Back from pm_init\n" ); + + if ( retval > 0 ) + return ( retval ); + + _aix_mdi_init( ); + + _papi_hwi_system_info.hw_info.nnodes = 1; + _papi_hwi_system_info.hw_info.ncpu = _system_configuration.ncpus; + _papi_hwi_system_info.hw_info.totalcpus = + _papi_hwi_system_info.hw_info.ncpu * + _papi_hwi_system_info.hw_info.nnodes; + _papi_hwi_system_info.hw_info.vendor = -1; + strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); + _papi_hwi_system_info.hw_info.model = _system_configuration.implementation; + strcpy( _papi_hwi_system_info.hw_info.model_string, pminfo.proc_name ); + _papi_hwi_system_info.hw_info.revision = + ( float ) _system_configuration.version; + _papi_hwi_system_info.hw_info.mhz = ( float ) ( pm_cycles( ) / 1000000.0 ); + _papi_hwi_system_info.hw_info.cpu_max_mhz=_papi_hwi_system_info.hw_info.mhz; + _papi_hwi_system_info.hw_info.cpu_min_mhz=_papi_hwi_system_info.hw_info.mhz; + +/* _papi_hwi_system_info.num_gp_cntrs = pminfo.maxpmcs;*/ + _aix_vector.cmp_info.num_cntrs = pminfo.maxpmcs; + _aix_vector.cmp_info.num_mpx_cntrs = MAX_MPX_COUNTERS; // pminfo.maxpmcs, + + _aix_vector.cmp_info.available_granularities = PAPI_GRN_THR; +/* This field doesn't appear to exist in the PAPI 3.0 structure + _papi_hwi_system_info.cpunum = mycpu(); +*/ + _aix_vector.cmp_info.available_domains = init_domain( ); + return PAPI_OK; +} + +/* Low level functions, should not handle errors, just return codes. */ + +/* At init time, the higher level library should always allocate and + reserve EventSet zero. */ + +long long +_aix_get_real_usec( void ) +{ + timebasestruct_t t; + long long retval; + + read_real_time( &t, TIMEBASE_SZ ); + time_base_to_time( &t, TIMEBASE_SZ ); + retval = ( t.tb_high * 1000000 ) + t.tb_low / 1000; + return ( retval ); +} + +long long +_aix_get_real_cycles( void ) +{ + return ( _aix_get_real_usec( ) * + ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz ); +} + +long long +_aix_get_virt_usec( void ) +{ + long long retval; + struct tms buffer; + + times( &buffer ); + SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime, + ( int ) buffer.tms_stime ); + retval = + ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * + ( 1000000 / CLK_TCK ) ); + return ( retval ); +} + +static void +_aix_lock_init( void ) +{ + int i; + for ( i = 0; i < PAPI_MAX_LOCK; i++ ) + lock[i] = ( int * ) ( lock_var + i ); +} + +int +_aix_shutdown_thread( hwd_context_t * ctx ) +{ + return ( PAPI_OK ); +} + +int +_aix_init_component( int cidx ) +{ + int retval = PAPI_OK, procidx; + + /* Fill in what we can of the papi_system_info. */ + retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); + if ( retval ) + return ( retval ); + + /* Setup memory info */ + retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, 0 ); + if ( retval ) + return ( retval ); + + SUBDBG( "Found %d %s %s CPUs at %d Mhz.\n", + _papi_hwi_system_info.hw_info.totalcpus, + _papi_hwi_system_info.hw_info.vendor_string, + _papi_hwi_system_info.hw_info.model_string, + _papi_hwi_system_info.hw_info.cpu_max_mhz ); + + _aix_vector.cmp_info.CmpIdx = cidx; + _aix_vector.cmp_info.num_native_events = aix_ppc64_setup_native_table( ); + + procidx = pm_get_procindex( ); + switch ( procidx ) { + case PM_POWER5: + _papi_load_preset_table( "POWER5", 0, cidx ); + break; + case PM_POWER5_II: + _papi_load_preset_table( "POWER5+", 0, cidx ); + break; + case PM_POWER6: + _papi_load_preset_table( "POWER6", 0, cidx ); + break; + case PM_PowerPC970: + _papi_load_preset_table( "PPC970", 0, cidx ); + break; + case PM_POWER7: + _papi_load_preset_table( "POWER7", 0, cidx ); + break; + default: + fprintf( stderr, "%s is not supported!\n", pminfo.proc_name ); + return PAPI_ENOIMPL; + } + + _aix_lock_init( ); + + return ( retval ); +} + + +int +_aix_init_thread( hwd_context_t * context ) +{ + int retval; + /* Initialize our global control state. */ + + _aix_init_control_state( &context->cntrl ); +} + +/* Go from highest counter to lowest counter. Why? Because there are usually + more counters on #1, so we try the least probable first. */ + +static int +get_avail_hwcntr_bits( int cntr_avail_bits ) +{ + int tmp = 0, i = 1 << ( POWER_MAX_COUNTERS - 1 ); + + while ( i ) { + tmp = i & cntr_avail_bits; + if ( tmp ) + return ( tmp ); + i = i >> 1; + } + return ( 0 ); +} + +static void +set_hwcntr_codes( int selector, unsigned char *from, int *to ) +{ + int useme, i; + + for ( i = 0; i < _aix_vector.cmp_info.num_cntrs; i++ ) { + useme = ( 1 << i ) & selector; + if ( useme ) { + to[i] = from[i]; + } + } +} + + +#ifdef DEBUG +void +dump_cmd( pm_prog_t * t ) +{ + SUBDBG( "mode.b.threshold %d\n", t->mode.b.threshold ); + SUBDBG( "mode.b.spare %d\n", t->mode.b.spare ); + SUBDBG( "mode.b.process %d\n", t->mode.b.process ); + SUBDBG( "mode.b.kernel %d\n", t->mode.b.kernel ); + SUBDBG( "mode.b.user %d\n", t->mode.b.user ); + SUBDBG( "mode.b.count %d\n", t->mode.b.count ); + SUBDBG( "mode.b.proctree %d\n", t->mode.b.proctree ); + SUBDBG( "events[0] %d\n", t->events[0] ); + SUBDBG( "events[1] %d\n", t->events[1] ); + SUBDBG( "events[2] %d\n", t->events[2] ); + SUBDBG( "events[3] %d\n", t->events[3] ); + SUBDBG( "events[4] %d\n", t->events[4] ); + SUBDBG( "events[5] %d\n", t->events[5] ); + SUBDBG( "events[6] %d\n", t->events[6] ); + SUBDBG( "events[7] %d\n", t->events[7] ); + SUBDBG( "reserved %d\n", t->reserved ); +} + +void +dump_data( long long *vals ) +{ + int i; + + for ( i = 0; i < MAX_COUNTERS; i++ ) { + SUBDBG( "counter[%d] = %lld\n", i, vals[i] ); + } +} +#endif + +int +_aix_reset( hwd_context_t * ESI, hwd_control_state_t * zero ) +{ + int retval = pm_reset_data_mythread( ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "PAPI Error: pm_reset_data_mythread", retval ); + return ( retval ); + } + return ( PAPI_OK ); +} + + +int +_aix_read( hwd_context_t * ctx, hwd_control_state_t * spc, + long long **vals, int flags ) +{ + int retval; + + retval = pm_get_data_mythread( &spc->state ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "PAPI Error: pm_get_data_mythread", retval ); + return ( retval ); + } + + *vals = spc->state.accu; + +#ifdef DEBUG + if ( ISLEVEL( DEBUG_SUBSTRATE ) ) + dump_data( *vals ); +#endif + + return ( PAPI_OK ); +} + +static int +round_requested_ns( int ns ) +{ + if ( ns <= _papi_os_info.itimer_res_ns ) { + return _papi_os_info.itimer_res_ns; + } else { + int leftover_ns = ns % _papi_os_info.itimer_res_ns; + return ( ns - leftover_ns + _papi_os_info.itimer_res_ns ); + } +} + +int +_aix_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + switch ( code ) { +/* I don't understand what it means to set the default domain + case PAPI_DEFDOM: + return(set_default_domain(zero, option->domain.domain)); +*/ + case PAPI_DOMAIN: + return ( _aix_set_domain + ( option->domain.ESI->ctl_state, option->domain.domain ) ); +/* I don't understand what it means to set the default granularity + case PAPI_DEFGRN: + return(set_default_granularity(zero, option->granularity.granularity)); +*/ + case PAPI_GRANUL: + return ( _aix_set_granularity + ( option->domain.ESI->ctl_state, + option->granularity.granularity ) ); +#if 0 + case PAPI_INHERIT: + return ( set_inherit( option->inherit.inherit ) ); +#endif + case PAPI_DEF_ITIMER: + { + /* flags are currently ignored, eventually the flags will be able + to specify whether or not we use POSIX itimers (clock_gettimer) */ + if ( ( option->itimer.itimer_num == ITIMER_REAL ) && + ( option->itimer.itimer_sig != SIGALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && + ( option->itimer.itimer_sig != SIGVTALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_PROF ) && + ( option->itimer.itimer_sig != SIGPROF ) ) + return PAPI_EINVAL; + if ( option->itimer.ns > 0 ) + option->itimer.ns = round_requested_ns( option->itimer.ns ); + /* At this point, we assume the user knows what he or + she is doing, they maybe doing something arch specific */ + return PAPI_OK; + } + case PAPI_DEF_MPX_NS: + { + option->multiplex.ns = round_requested_ns( option->multiplex.ns ); + return ( PAPI_OK ); + } + case PAPI_DEF_ITIMER_NS: + { + option->itimer.ns = round_requested_ns( option->itimer.ns ); + return ( PAPI_OK ); + } + default: + return ( PAPI_ENOSUPP ); + } +} + +void +_aix_dispatch_timer( int signal, siginfo_t * si, void *i ) +{ + _papi_hwi_context_t ctx; + ThreadInfo_t *t = NULL; + caddr_t address; + + ctx.si = si; + ctx.ucontext = ( hwd_ucontext_t * ) i; + + address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( &ctx ) ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, NULL, 0, 0, + &t, _aix_vector.cmp_info.CmpIdx ); +} + +int +_aix_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + hwd_control_state_t *this_state = ESI->ctl_state; + + return ( PAPI_OK ); +} + +void * +_aix_get_overflow_address( void *context ) +{ + void *location; + struct sigcontext *info = ( struct sigcontext * ) context; + location = ( void * ) info->sc_jmpbuf.jmp_context.iar; + + return ( location ); +} + + +/* Copy the current control_state into the new thread context */ +/*int _papi_hwd_start(EventSetInfo_t *ESI, EventSetInfo_t *zero)*/ +int +_aix_start( hwd_context_t * ctx, hwd_control_state_t * cntrl ) +{ + int i, retval; + hwd_control_state_t *current_state = &ctx->cntrl; + + /* If we are nested, merge the global counter structure + with the current eventset */ + + SUBDBG( "Start\n" ); + + /* Copy the global counter structure to the current eventset */ + + SUBDBG( "Copying states\n" ); + memcpy( current_state, cntrl, sizeof ( hwd_control_state_t ) ); + + retval = pm_set_program_mythread( ¤t_state->counter_cmd ); + if ( retval > 0 ) { + if ( retval == 13 ) { + retval = pm_delete_program_mythread( ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "PAPI Error: pm_delete_program_mythread", + retval ); + return ( retval ); + } + retval = pm_set_program_mythread( ¤t_state->counter_cmd ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "PAPI Error: pm_set_program_mythread", retval ); + return ( retval ); + } + } else { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "PAPI Error: pm_set_program_mythread", retval ); + return ( retval ); + } + } + + /* Set up the new merged control structure */ + +#if 0 + dump_cmd( ¤t_state->counter_cmd ); +#endif + + /* Start the counters */ + + retval = pm_start_mythread( ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "pm_start_mythread()", retval ); + return ( retval ); + } + + return ( PAPI_OK ); +} + +int +_aix_stop( hwd_context_t * ctx, hwd_control_state_t * cntrl ) +{ + int retval; + + retval = pm_stop_mythread( ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "pm_stop_mythread()", retval ); + return ( retval ); + } + + retval = pm_delete_program_mythread( ); + if ( retval > 0 ) { + if ( _papi_hwi_error_level != PAPI_QUIET ) + pm_error( "pm_delete_program_mythread()", retval ); + return ( retval ); + } + + return ( PAPI_OK ); +} + +int +_aix_update_shlib_info( papi_mdi_t *mdi ) +{ +#if ( ( defined( _AIXVERSION_510) || defined(_AIXVERSION_520))) + struct ma_msg_s + { + long flag; + char *name; + } ma_msgs[] = { + { + MA_MAINEXEC, "MAINEXEC"}, { + MA_KERNTEXT, "KERNTEXT"}, { + MA_READ, "READ"}, { + MA_WRITE, "WRITE"}, { + MA_EXEC, "EXEC"}, { + MA_SHARED, "SHARED"}, { + MA_BREAK, "BREAK"}, { + MA_STACK, "STACK"},}; + + char fname[80], name[PAPI_HUGE_STR_LEN]; + prmap_t newp; + int count, t_index, retval, i, j, not_first_flag_bit; + FILE *map_f; + void *vaddr; + prmap_t *tmp1 = NULL; + PAPI_address_map_t *tmp2 = NULL; + + sprintf( fname, "/proc/%d/map", getpid( ) ); + map_f = fopen( fname, "r" ); + if ( !map_f ) { + PAPIERROR( "fopen(%s) returned < 0", fname ); + return ( PAPI_OK ); + } + + /* count the entries we need */ + count = 0; + t_index = 0; + while ( ( retval = fread( &newp, sizeof ( prmap_t ), 1, map_f ) ) > 0 ) { + if ( newp.pr_pathoff > 0 && newp.pr_mapname[0] != '\0' ) { + if ( newp.pr_mflags & MA_STACK ) + continue; + + count++; + SUBDBG( "count=%d offset=%ld map=%s\n", count, + newp.pr_pathoff, newp.pr_mapname ); + + if ( ( newp.pr_mflags & MA_READ ) && ( newp.pr_mflags & MA_EXEC ) ) + t_index++; + } + } + rewind( map_f ); + tmp1 = ( prmap_t * ) papi_calloc( ( count + 1 ), sizeof ( prmap_t ) ); + if ( tmp1 == NULL ) + return ( PAPI_ENOMEM ); + + tmp2 = + ( PAPI_address_map_t * ) papi_calloc( t_index, + sizeof ( PAPI_address_map_t ) ); + if ( tmp2 == NULL ) + return ( PAPI_ENOMEM ); + + i = 0; + t_index = -1; + while ( ( retval = fread( &tmp1[i], sizeof ( prmap_t ), 1, map_f ) ) > 0 ) { + if ( tmp1[i].pr_pathoff > 0 && tmp1[i].pr_mapname[0] != '\0' ) + if ( !( tmp1[i].pr_mflags & MA_STACK ) ) + i++; + } + for ( i = 0; i < count; i++ ) { + char c; + int cc = 0; + + retval = fseek( map_f, tmp1[i].pr_pathoff, SEEK_SET ); + if ( retval != 0 ) + return ( PAPI_ESYS ); + while ( fscanf( map_f, "%c", &c ) != EOF ) { + name[cc] = c; + /* how many char are hold in /proc/xxxx/map */ + cc++; + if ( c == '\0' ) + break; + } + + + /* currently /proc/xxxx/map file holds only 33 char per line (incl NULL char); + * if executable name > 32 char, compare first 32 char only */ + if ( strncmp( _papi_hwi_system_info.exe_info.address_info.name, + basename( name ), cc - 1 ) == 0 ) { + if ( strlen( _papi_hwi_system_info.exe_info.address_info.name ) != + cc - 1 ) + PAPIERROR + ( "executable name too long (%d char). Match of first %d char only", + strlen( _papi_hwi_system_info.exe_info.address_info. + name ), cc - 1 ); + + if ( tmp1[i].pr_mflags & MA_READ ) { + if ( tmp1[i].pr_mflags & MA_EXEC ) { + _papi_hwi_system_info.exe_info.address_info. + text_start = ( caddr_t ) tmp1[i].pr_vaddr; + _papi_hwi_system_info.exe_info.address_info. + text_end = + ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); + } else if ( tmp1[i].pr_mflags & MA_WRITE ) { + _papi_hwi_system_info.exe_info.address_info. + data_start = ( caddr_t ) tmp1[i].pr_vaddr; + _papi_hwi_system_info.exe_info.address_info. + data_end = + ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); + } + } + + } else { + if ( ( _papi_hwi_system_info.exe_info.address_info.text_start == 0 ) + && ( _papi_hwi_system_info.exe_info.address_info.text_end == + 0 ) && + ( _papi_hwi_system_info.exe_info.address_info.data_start == 0 ) + && ( _papi_hwi_system_info.exe_info.address_info.data_end == + 0 ) ) + PAPIERROR( "executable name not recognized" ); + + if ( tmp1[i].pr_mflags & MA_READ ) { + if ( tmp1[i].pr_mflags & MA_EXEC ) { + t_index++; + tmp2[t_index].text_start = ( caddr_t ) tmp1[i].pr_vaddr; + tmp2[t_index].text_end = + ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); + strncpy( tmp2[t_index].name, name, PAPI_MAX_STR_LEN ); + } else if ( tmp1[i].pr_mflags & MA_WRITE ) { + tmp2[t_index].data_start = ( caddr_t ) tmp1[i].pr_vaddr; + tmp2[t_index].data_end = + ( caddr_t ) ( tmp1[i].pr_vaddr + tmp1[i].pr_size ); + } + } + + } + } + fclose( map_f ); + + if ( _papi_hwi_system_info.shlib_info.map ) + papi_free( _papi_hwi_system_info.shlib_info.map ); + _papi_hwi_system_info.shlib_info.map = tmp2; + _papi_hwi_system_info.shlib_info.count = t_index + 1; + papi_free( tmp1 ); + + return PAPI_OK; +#else + return PAPI_ENOIMPL; +#endif +} + +int +_aix_ntv_name_to_code( const char *name, unsigned int *evtcode ) +{ + int i; + + for ( i = 0; i < PAPI_MAX_NATIVE_EVENTS; i++ ) + if ( strcmp( name, native_name_map[i].name ) == 0 ) { + *evtcode = native_name_map[i].index | PAPI_NATIVE_MASK; + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +PAPI_os_info_t _papi_os_info; + +int +_papi_hwi_init_os(void) { + + struct utsname uname_buffer; + + uname(&uname_buffer); + + strncpy(_papi_os_info.name,uname_buffer.sysname,PAPI_MAX_STR_LEN); + + strncpy(_papi_os_info.version,uname_buffer.release,PAPI_MAX_STR_LEN); + + _papi_os_info.itimer_sig = PAPI_INT_MPX_SIGNAL; + _papi_os_info.itimer_num = PAPI_INT_ITIMER; + _papi_os_info.itimer_res_ns = 1; + _papi_os_info.itimer_ns = 1000 * PAPI_INT_MPX_DEF_US; + + return PAPI_OK; + +} + + +papi_vector_t _aix_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + + .name = "aix", + .description = "AIX pmapi CPU counters", + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 1, + .fast_virtual_timer = 1, + .attach = 1, + .attach_must_ptrace = 1, + .cntr_umasks = 1, + } + , + + /* sizes of framework-opaque component-private structures + these are remapped in pmapi_ppc64.h, ppc64_events.h */ + .size = { + .context = sizeof ( hwd_context_t ), + .control_state = sizeof ( hwd_control_state_t ), + .reg_value = sizeof ( hwd_register_t ), + .reg_alloc = sizeof ( hwd_reg_alloc_t ), + } + , + + /* function pointers in this component */ + .init_control_state = _aix_init_control_state, + .start = _aix_start, + .stop = _aix_stop, + .read = _aix_read, + .allocate_registers = _aix_allocate_registers, + .update_control_state = _aix_update_control_state, + .set_domain = _aix_set_domain, + .reset = _aix_reset, + .set_overflow = _aix_set_overflow, +/* .stop_profiling = _aix_stop_profiling, */ + .ntv_enum_events = _aix_ntv_enum_events, + .ntv_name_to_code = _aix_ntv_name_to_code, + .ntv_code_to_name = _aix_ntv_code_to_name, + .ntv_code_to_descr = _aix_ntv_code_to_descr, + .ntv_code_to_bits = _aix_ntv_code_to_bits, + + .init_component = _aix_init_component, + .ctl = _aix_ctl, + .dispatch_timer = _aix_dispatch_timer, + .init_thread = _aix_init_thread, + .shutdown_thread = _aix_shutdown_thread, +}; + +papi_os_vector_t _papi_os_vector = { + .get_memory_info = _aix_get_memory_info, + .get_dmem_info = _aix_get_dmem_info, + .get_real_usec = _aix_get_real_usec, + .get_real_cycles = _aix_get_real_cycles, + .get_virt_usec = _aix_get_virt_usec, + .update_shlib_info = _aix_update_shlib_info, + .get_system_info = _aix_get_system_info, +}; diff --git a/src/aix.h b/src/aix.h new file mode 100644 index 0000000..545b301 --- /dev/null +++ b/src/aix.h @@ -0,0 +1,138 @@ +#ifndef _PAPI_AIX_H /* _PAPI_AIX */ +#define _PAPI_AIX_H + +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: pmapi-ppc64.h +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +*/ + + + +#include +#include +#include +#include +#include +#include +#include +#include +#if defined( _AIXVERSION_510) || defined(_AIXVERSION_520) +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "pmapi.h" + +#define ANY_THREAD_GETS_SIGNAL +#define POWER_MAX_COUNTERS MAX_COUNTERS +#define MAX_COUNTER_TERMS MAX_COUNTERS +#define MAX_MPX_COUNTERS 32 +#define INVALID_EVENT -2 +#define POWER_MAX_COUNTERS_MAPPING 8 + +extern _text; +extern _etext; +extern _edata; +extern _end; +extern _data; + +/* globals */ +#ifdef PM_INITIALIZE +#ifdef _AIXVERSION_510 +#define PMINFO_T pm_info2_t +#define PMEVENTS_T pm_events2_t +#else +#define PMINFO_T pm_info_t +#define PMEVENTS_T pm_events_t +#endif +PMINFO_T pminfo; +#else +#define PMINFO_T pm_info_t +#define PMEVENTS_T pm_events_t +/*pm_info_t pminfo;*/ +#endif + +#include "aix-context.h" + +/* define the vector structure at the bottom of this file */ + +#define PM_INIT_FLAGS PM_VERIFIED|PM_UNVERIFIED|PM_CAVEAT|PM_GET_GROUPS + +#ifdef PM_INITIALIZE +typedef pm_info2_t hwd_pminfo_t; +typedef pm_events2_t hwd_pmevents_t; +#else +typedef pm_info_t hwd_pminfo_t; +typedef pm_events_t hwd_pmevents_t; +#endif + +#include "ppc64_events.h" + +typedef struct ppc64_pmapi_control +{ + /* Buffer to pass to the kernel to control the counters */ + pm_prog_t counter_cmd; + int group_id; + /* Space to read the counters */ + pm_data_t state; +} ppc64_pmapi_control_t; + +typedef struct ppc64_reg_alloc +{ + int ra_position; + unsigned int ra_group[GROUP_INTS]; + int ra_counter_cmd[MAX_COUNTERS]; +} ppc64_reg_alloc_t; + +typedef struct ppc64_pmapi_context +{ + /* this structure is a work in progress */ + ppc64_pmapi_control_t cntrl; +} ppc64_pmapi_context_t; + +/* Override void* definitions from PAPI framework layer */ +/* typedefs to conform to hardware independent PAPI code. */ +#undef hwd_control_state_t +#undef hwd_reg_alloc_t +#undef hwd_context_t +typedef ppc64_pmapi_control_t hwd_control_state_t; +typedef ppc64_reg_alloc_t hwd_reg_alloc_t; +typedef ppc64_pmapi_context_t hwd_context_t; + +/* +typedef struct hwd_groups { + // group number from the pmapi pm_groups_t struct + //int group_id; + // Buffer containing counter cmds for this group + unsigned char counter_cmd[POWER_MAX_COUNTERS]; +} hwd_groups_t; +*/ + +/* prototypes */ +extern int _aix_set_granularity( hwd_control_state_t * this_state, int domain ); +extern int _papi_hwd_init_preset_search_map( hwd_pminfo_t * info ); + +extern int _aix_get_memory_info( PAPI_hw_info_t * mem_info, int type ); +extern int _aix_get_dmem_info( PAPI_dmem_info_t * d ); + +/* Machine dependent info structure */ +extern pm_groups_info_t pmgroups; + +#endif /* _PAPI_AIX */ + diff --git a/src/components/Makefile_comp_tests b/src/components/Makefile_comp_tests new file mode 100644 index 0000000..7c8e627 --- /dev/null +++ b/src/components/Makefile_comp_tests @@ -0,0 +1,23 @@ +UTILOBJS= ../../../testlib/libtestlib.a +DOLOOPS= ../../../testlib/do_loops.o +INCLUDE = -I../../../testlib -I../../.. -I. +LIBRARY = -L../../../ -lpapi +PAPILIB = $(LIBRARY) + +tests: $(NAME)_tests + +install: + @echo "$(NAME) tests (DATADIR) being installed in: \"$(DATADIR)\""; + -mkdir -p $(DATADIR)/$(NAME)/tests + -chmod go+rx $(DATADIR) + -chmod go+rx $(DATADIR)/$(NAME)/tests + -find . -perm -100 -type f -exec cp {} $(DATADIR)/$(NAME)/tests \; + -chmod go+rx $(DATADIR)/$(NAME)/* + -find . -name "*.[ch]" -type f -exec cp {} $(DATADIR)/$(NAME)/tests \; + -cp Makefile $(DATADIR)/$(NAME)/tests + -cp ../../Makefile_comp_tests.target $(DATADIR)/Makefile_comp_tests + +clean: + +distclean clobber: clean + rm -f Makefile_comp_tests.target diff --git a/src/components/Makefile_comp_tests.target.in b/src/components/Makefile_comp_tests.target.in new file mode 100644 index 0000000..9a369ad --- /dev/null +++ b/src/components/Makefile_comp_tests.target.in @@ -0,0 +1,35 @@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +exec_prefix = @exec_prefix@ +prefix = @prefix@ +datarootdir = @datarootdir@ +datadir = ../../.. +testlibdir = $(datadir)/testlib +validationlibdir = $(datadir)/validation_tests +INCLUDE = -I. -I@includedir@ -I$(datadir) -I$(testlibdir) -I$(validationlibdir) +LIBDIR = @libdir@ +PAPILIB = $(datadir)/@LIBRARY@ +TESTLIB = $(testlibdir)/libtestlib.a +LDFLAGS = @LDL@ +CC = @CC@ +F77 = @F77@ +CC_R = @CC_R@ +CFLAGS = @CFLAGS@ +OPTFLAGS= @OPTFLAGS@ +TOPTFLAGS= @TOPTFLAGS@ +OMPCFLGS = @OMPCFLGS@ +UTILOBJS = $(TESTLIB) + +tests: $(NAME)_tests + +install: + @echo "$(NAME) tests (DATADIR) being installed in: \"$(DATADIR)\""; + -mkdir -p $(DATADIR)/$(NAME)/tests + -chmod go+rx $(DATADIR) + -chmod go+rx $(DATADIR)/$(NAME)/tests + -find . -perm -100 -type f -exec cp {} $(DATADIR)/$(NAME)/tests \; + -chmod go+rx $(DATADIR)/$(NAME)/* + -find . -name "*.[ch]" -type f -exec cp {} $(DATADIR)/$(NAME)/tests \; + -cp Makefile $(DATADIR)/$(NAME)/tests + -cp ../../Makefile_comp_tests $(DATADIR) + + diff --git a/src/components/README b/src/components/README new file mode 100644 index 0000000..aa30107 --- /dev/null +++ b/src/components/README @@ -0,0 +1,45 @@ +/** +* @file: README +* @author: Brian Sheely +* bsheely@eecs.utk.edu +* @defgroup papi_components Components +* @brief Component Readme file +*/ + +/** @page component_readme Component Readme +@section Creating New Components + +The first step in creating a new component is to create a new directory inside the components directory. The naming convention is to use lower case letters for the directory name. At a minimum, this directory will contain all header files and source code required to build the component along with a Rules file which contains the build rules and compiler settings specific to that component. The Rules file must be named using the format Rules.x where x is the name of the directory. There are no restrictions on the naming of header or source files. + +If the component requires user input for any of the compiler settings, then the component directory will also contain the files required to generate a configure script using autoconf. The configure script can be used to generate a Makefile which the Rules file will include. The file configure.in is required in order to generate configure. It should specify that the Makefile that gets generated is named Makefile.x where x is the name of the component directory. Finally, configure also needs an input file to create the Makefile. That file must be named Makefile.x.in where x is the name of the component directory. + +The following comments apply to components that are under source control. Although configure is generated, it requires the correct version of autoconf. For that reason, configure should be placed under source control. The generated Makefile should not be placed under source control. + +In summary, the additional files required for configuration based on user input are: configure.in, configure (generated by autoconf), Makefile.x.in, and Makefile.x (generated by configure) where x is the name of the component directory. + +There is one final very important naming convention that applies to components. The array of function pointers that the component defines must use the naming convention papi_vector_t _x_vector where x is the name of the component directory. + + +Adding tests to the components: +------------------------------- + +In order to add tests to a component that will be compiled together with PAPI when typing 'make' (as well as cleaned up when 'make clean' or 'make clobber' is typed and installed when 'make install-all' or 'make install-tests' is called), the following steps need to be carried out: + + 1. create a directory with name 'tests' in the specific component directory + 2. add your test files and a Makefile to the 'tests' directory (see the example test and Makefile in components/example/tests) + 3. The components/< component >/tests/Makefile has to have a rule with the name '< component >_tests'; e.g. for tests added to the example component, the name of the rule would be 'example_tests'. See: + TESTS = HelloWorld + example_tests: $(TESTS) + 4. Include components/Makefile_comp_tests to your component test Makefile + (see components/example/tests/Makefile for more details) + 5. You may also define 'clean' and/or 'install' targets (as shown in the example) which will be called during those parts of the build. If these targets are missing it will just print a message reporting the missing target and continue. + +NOTE: there is no need to modify any PAPI code other than adding your tests and a Makefile to your component and follow step 1 to 4 listed above. + + + +@section Component Specific Information + +Some components under source control have additional information specific to their build process or operation. That information can be found in a README file inside the component directory. If the README doesn't exist, no special information is necessary. + +*/ diff --git a/src/components/Rules.components b/src/components/Rules.components new file mode 100644 index 0000000..7a2ef62 --- /dev/null +++ b/src/components/Rules.components @@ -0,0 +1,3 @@ +# $Id$ +# This file is intended to prevent an empty include compile error in Makefile.inc + diff --git a/src/components/appio/CHANGES b/src/components/appio/CHANGES new file mode 100644 index 0000000..4208f9a --- /dev/null +++ b/src/components/appio/CHANGES @@ -0,0 +1,12 @@ +AppIO component changelog: + +2012-01-19 Tushar Mohan + * Support for read/write/fread/fwrite added + * Test cases added + * Static and dynamic linkage tested + * Thread support enabled + +2011-12-01 Phil Mucci + + * Initial skeleton + diff --git a/src/components/appio/README b/src/components/appio/README new file mode 100644 index 0000000..d359909 --- /dev/null +++ b/src/components/appio/README @@ -0,0 +1,79 @@ +COMPONENT + + appio + +SUMMARY + + Application I/O component + +DESCRIPTION + + This application I/O component enables PAPI-C to determine + I/O used by the application. This is to be distinguished + from system-wide I/O statistics. The goal of this component + is to help the programmer attribute the I/O (read/write) to + files and sockets, to the source code. + + Listed below are the events measured by the component: + + Event names + ----------- + READ_BYTES READ_CALLS READ_ERR READ_INTERRUPTED READ_WOULD_BLOCK READ_SHORT READ_EOF READ_BLOCK_SIZE READ_USEC + WRITE_BYTES WRITE_CALLS WRITE_ERR WRITE_INTERRUPTED WRITE_WOULD_BLOCK WRITE_SHORT WRITE_BLOCK_SIZE WRITE_USEC + OPEN_CALLS OPEN_ERR OPEN_FDS + SELECT_USEC + RECV_BYTES RECV_CALLS RECV_ERR RECV_INTERRUPTED RECV_WOULD_BLOCK RECV_SHORT RECV_EOF RECV_BLOCK_SIZE RECV_USEC + + SOCK_READ_BYTES SOCK_READ_CALLS SOCK_READ_ERR SOCK_READ_SHORT SOCK_READ_WOULD_BLOCK SOCK_READ_USEC + SOCK_WRITE_BYTES SOCK_WRITE_CALLS SOCK_WRITE_ERR SOCK_WRITE_SHORT SOCK_WRITE_WOULD_BLOCK SOCK_WRITE_USEC + + SEEK_CALLS SEEK_ABS_BLOCK_SIZE SEEK_USEC + + The component works by intercepting I/O system calls on Linux. At present, + the code uses a features available in libc on Linux, and is unlikely to + work on other platforms without modifications. The code works for static + and shared executables. + + The component has been tested on 32 and 64-bit Linux. It's also been tested + to work for multithreaded programs. + + Limitations and future work: + --------------------------- + The most important aspect to note is that the code is likely to only work on + Linux, given the low-level dependencies on libc features. + + At present the component intercepts the open(), close(), read(), write(), + fread() and fwrite(). In the future it's expected that these will be expanded + to cover lseek(), select(), other I/O calls. + + While READ_* and WRITE_* calls will not distinguish between file and network + I/O, the user can explicitly determine network statistics using SOCK_* calls. + + Threads are handled using thread-specific structures in the backend. However, no + aggregation is currently performed across threads. There is also NO global structure + that has the statistics of all the threads. This means the user can call + a PAPI read to get statitics for a running thread. However, if the thread has + joined, then it's statistics can no longer be queried. + + TESTING: + ------- + Tests lie in the tests/ sub-directory. All but one test take no argument. + + The iozone test (appio_test_iozone) needs arguments just like iozone does. + It is not built by default as part of the PAPI tests. To build it: + cd appio/tests; make appio_test_iozone + An example run for the iozone test could be: + ./appio_test_iozone -s 100m -r 64 -i 0 -i 1 -t 1 + + +AUTHOR + + The code is written by Tushar Mohan and + Philip Mucci . The component leverages code + written by Jose Pedro Oliveira for the PAPI + net component. + +SEE ALSO + + +# vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/appio/Rules.appio b/src/components/appio/Rules.appio new file mode 100644 index 0000000..8899ec4 --- /dev/null +++ b/src/components/appio/Rules.appio @@ -0,0 +1,9 @@ +# $Id$ + +COMPSRCS += components/appio/appio.c +COMPOBJS += appio.o +SHLIBDEPS += -ldl + +appio.o: components/appio/appio.h components/appio/appio.c components/appio/appio.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/appio/appio.c -o $@ + diff --git a/src/components/appio/appio.c b/src/components/appio/appio.c new file mode 100644 index 0000000..7a3d571 --- /dev/null +++ b/src/components/appio/appio.c @@ -0,0 +1,782 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file appio.c + * + * @author Philip Mucci + * phil.mucci@samaratechnologygroup.com + * + * @author Tushar Mohan + * tusharmohan@gmail.com + * + * Credit to: + * Jose Pedro Oliveira + * jpo@di.uminho.pt + * whose code in the linux net component was used as a template for + * many sections of code in this component. + * + * @ingroup papi_components + * + * @brief appio component + * This file contains the source code for a component that enables + * PAPI to access application level file and socket I/O information. + * It does this through function replacement in the first person and + * by trapping syscalls in the third person. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "appio.h" + +// The PIC test implies it's built for shared linkage +#ifdef PIC +# include "dlfcn.h" +#endif + +/* +#pragma weak dlerror +static void *_dlsym_fake(void *handle, const char* symbol) { (void) handle; (void) symbol; return NULL; } +void *dlsym(void *handle, const char* symbol) __attribute__ ((weak, alias ("_dlsym_fake"))); +*/ + +papi_vector_t _appio_vector; + +/********************************************************************* + * Private + ********************************************************************/ + +//#define APPIO_FOO 1 + +static APPIO_native_event_entry_t * _appio_native_events; + + +/* If you modify the appio_stats_t below, you MUST update APPIO_MAX_COUNTERS */ +static __thread long long _appio_register_current[APPIO_MAX_COUNTERS]; +typedef enum { + READ_BYTES = 0, + READ_CALLS, + READ_ERR, + READ_INTERRUPTED, + READ_WOULD_BLOCK, + READ_SHORT, + READ_EOF, + READ_BLOCK_SIZE, + READ_USEC, + WRITE_BYTES, + WRITE_CALLS, + WRITE_ERR, + WRITE_SHORT, + WRITE_INTERRUPTED, + WRITE_WOULD_BLOCK, + WRITE_BLOCK_SIZE, + WRITE_USEC, + OPEN_CALLS, + OPEN_ERR, + OPEN_FDS, + SELECT_USEC, + RECV_BYTES, + RECV_CALLS, + RECV_ERR, + RECV_INTERRUPTED, + RECV_WOULD_BLOCK, + RECV_SHORT, + RECV_EOF, + RECV_BLOCK_SIZE, + RECV_USEC, + SOCK_READ_BYTES, + SOCK_READ_CALLS, + SOCK_READ_ERR, + SOCK_READ_SHORT, + SOCK_READ_WOULD_BLOCK, + SOCK_READ_USEC, + SOCK_WRITE_BYTES, + SOCK_WRITE_CALLS, + SOCK_WRITE_ERR, + SOCK_WRITE_SHORT, + SOCK_WRITE_WOULD_BLOCK, + SOCK_WRITE_USEC, + SEEK_CALLS, + SEEK_ABS_STRIDE_SIZE, + SEEK_USEC +} _appio_stats_t ; + +static const struct appio_counters { + const char *name; + const char *description; +} _appio_counter_info[APPIO_MAX_COUNTERS] = { + { "READ_BYTES", "Bytes read"}, + { "READ_CALLS", "Number of read calls"}, + { "READ_ERR", "Number of read calls that resulted in an error"}, + { "READ_INTERRUPTED","Number of read calls that timed out or were interruped"}, + { "READ_WOULD_BLOCK","Number of read calls that would have blocked"}, + { "READ_SHORT", "Number of read calls that returned less bytes than requested"}, + { "READ_EOF", "Number of read calls that returned an EOF"}, + { "READ_BLOCK_SIZE", "Average block size of reads"}, + { "READ_USEC", "Real microseconds spent in reads"}, + { "WRITE_BYTES", "Bytes written"}, + { "WRITE_CALLS", "Number of write calls"}, + { "WRITE_ERR", "Number of write calls that resulted in an error"}, + { "WRITE_SHORT", "Number of write calls that wrote less bytes than requested"}, + { "WRITE_INTERRUPTED","Number of write calls that timed out or were interrupted"}, + { "WRITE_WOULD_BLOCK","Number of write calls that would have blocked"}, + { "WRITE_BLOCK_SIZE","Mean block size of writes"}, + { "WRITE_USEC", "Real microseconds spent in writes"}, + { "OPEN_CALLS", "Number of open calls"}, + { "OPEN_ERR", "Number of open calls that resulted in an error"}, + { "OPEN_FDS", "Number of currently open descriptors"}, + { "SELECT_USEC", "Real microseconds spent in select calls"}, + { "RECV_BYTES", "Bytes read in recv/recvmsg/recvfrom"}, + { "RECV_CALLS", "Number of recv/recvmsg/recvfrom calls"}, + { "RECV_ERR", "Number of recv/recvmsg/recvfrom calls that resulted in an error"}, + { "RECV_INTERRUPTED","Number of recv/recvmsg/recvfrom calls that timed out or were interruped"}, + { "RECV_WOULD_BLOCK","Number of recv/recvmsg/recvfrom calls that would have blocked"}, + { "RECV_SHORT", "Number of recv/recvmsg/recvfrom calls that returned less bytes than requested"}, + { "RECV_EOF", "Number of recv/recvmsg/recvfrom calls that returned an EOF"}, + { "RECV_BLOCK_SIZE", "Average block size of recv/recvmsg/recvfrom"}, + { "RECV_USEC", "Real microseconds spent in recv/recvmsg/recvfrom"}, + { "SOCK_READ_BYTES", "Bytes read from socket"}, + { "SOCK_READ_CALLS", "Number of read calls on socket"}, + { "SOCK_READ_ERR", "Number of read calls on socket that resulted in an error"}, + { "SOCK_READ_SHORT", "Number of read calls on socket that returned less bytes than requested"}, + { "SOCK_READ_WOULD_BLOCK", "Number of read calls on socket that would have blocked"}, + { "SOCK_READ_USEC", "Real microseconds spent in read(s) on socket(s)"}, + { "SOCK_WRITE_BYTES","Bytes written to socket"}, + { "SOCK_WRITE_CALLS","Number of write calls to socket"}, + { "SOCK_WRITE_ERR", "Number of write calls to socket that resulted in an error"}, + { "SOCK_WRITE_SHORT","Number of write calls to socket that wrote less bytes than requested"}, + { "SOCK_WRITE_WOULD_BLOCK","Number of write calls to socket that would have blocked"}, + { "SOCK_WRITE_USEC", "Real microseconds spent in write(s) to socket(s)"}, + { "SEEK_CALLS", "Number of seek calls"}, + { "SEEK_ABS_STRIDE_SIZE", "Average absolute stride size of seeks"}, + { "SEEK_USEC", "Real microseconds spent in seek calls"} +}; + + +/********************************************************************* + *** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT **** + ********************************************************************/ + +int __close(int fd); +int close(int fd) { + int retval; + SUBDBG("appio: intercepted close(%d)\n", fd); + retval = __close(fd); + if ((retval == 0) && (_appio_register_current[OPEN_FDS]>0)) _appio_register_current[OPEN_FDS]--; + return retval; +} + +int __open(const char *pathname, int flags, mode_t mode); +int open(const char *pathname, int flags, mode_t mode) { + int retval; + SUBDBG("appio: intercepted open(%s,%d,%d)\n", pathname, flags, mode); + retval = __open(pathname,flags,mode); + _appio_register_current[OPEN_CALLS]++; + if (retval < 0) _appio_register_current[OPEN_ERR]++; + else _appio_register_current[OPEN_FDS]++; + return retval; +} + +/* we use timeval as a zero value timeout to select in read/write + for polling if the operation would block */ +struct timeval zerotv; /* this has to be zero, so define it here */ + +int __select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); +int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { + int retval; + SUBDBG("appio: intercepted select(%d,%p,%p,%p,%p)\n", nfds,readfds,writefds,exceptfds,timeout); + long long start_ts = PAPI_get_real_usec(); + retval = __select(nfds,readfds,writefds,exceptfds,timeout); + long long duration = PAPI_get_real_usec() - start_ts; + _appio_register_current[SELECT_USEC] += duration; + return retval; +} + +off_t __lseek(int fd, off_t offset, int whence); +off_t lseek(int fd, off_t offset, int whence) { + off_t retval; + SUBDBG("appio: intercepted lseek(%d,%ld,%d)\n", fd, offset, whence); + long long start_ts = PAPI_get_real_usec(); + retval = __lseek(fd, offset, whence); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[SEEK_CALLS]++; + _appio_register_current[SEEK_USEC] += duration; + if (offset < 0) offset = -offset; // get abs offset + _appio_register_current[SEEK_ABS_STRIDE_SIZE]= (n * _appio_register_current[SEEK_ABS_STRIDE_SIZE] + offset)/(n+1); // mean absolute stride size + return retval; +} + +extern int errno; +ssize_t __read(int fd, void *buf, size_t count); +ssize_t read(int fd, void *buf, size_t count) { + int retval; + SUBDBG("appio: intercepted read(%d,%p,%lu)\n", fd, buf, (unsigned long)count); + + struct stat st; + int issocket = 0; + if (fstat(fd, &st) == 0) { + if ((st.st_mode & S_IFMT) == S_IFSOCK) issocket = 1; + } + // check if read would block on descriptor + fd_set readfds; + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + int ready = __select(fd+1, &readfds, NULL, NULL, &zerotv); + if (ready == 0) { + _appio_register_current[READ_WOULD_BLOCK]++; + if (issocket) _appio_register_current[SOCK_READ_WOULD_BLOCK]++; + } + + long long start_ts = PAPI_get_real_usec(); + retval = __read(fd,buf, count); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[READ_CALLS]++; // read calls + if (issocket) _appio_register_current[SOCK_READ_CALLS]++; // read calls + if (retval > 0) { + _appio_register_current[READ_BLOCK_SIZE]= (n * _appio_register_current[READ_BLOCK_SIZE] + count)/(n+1); // mean size + _appio_register_current[READ_BYTES] += retval; // read bytes + if (issocket) _appio_register_current[SOCK_READ_BYTES] += retval; + if (retval < (int)count) { + _appio_register_current[READ_SHORT]++; // read short + if (issocket) _appio_register_current[SOCK_READ_SHORT]++; // read short + } + _appio_register_current[READ_USEC] += duration; + if (issocket) _appio_register_current[SOCK_READ_USEC] += duration; + } + if (retval < 0) { + _appio_register_current[READ_ERR]++; // read err + if (issocket) _appio_register_current[SOCK_READ_ERR]++; // read err + if (EINTR == errno) + _appio_register_current[READ_INTERRUPTED]++; // signal interrupted the read + //if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) { + // _appio_register_current[READ_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking + // if (issocket) _appio_register_current[SOCK_READ_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking + //} + } + if (retval == 0) _appio_register_current[READ_EOF]++; // read eof + return retval; +} + +size_t _IO_fread(void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { + size_t retval; + SUBDBG("appio: intercepted fread(%p,%lu,%lu,%p)\n", ptr, (unsigned long) size, (unsigned long) nmemb, (void*) stream); + long long start_ts = PAPI_get_real_usec(); + retval = _IO_fread(ptr,size,nmemb,stream); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[READ_CALLS]++; // read calls + if (retval > 0) { + _appio_register_current[READ_BLOCK_SIZE]= (n * _appio_register_current[READ_BLOCK_SIZE]+ size*nmemb)/(n+1);//mean size + _appio_register_current[READ_BYTES]+= retval * size; // read bytes + if (retval < nmemb) _appio_register_current[READ_SHORT]++; // read short + _appio_register_current[READ_USEC] += duration; + } + + /* A value of zero returned means one of two things..*/ + if (retval == 0) { + if (feof(stream)) _appio_register_current[READ_EOF]++; // read eof + else _appio_register_current[READ_ERR]++; // read err + } + return retval; +} + +ssize_t __write(int fd, const void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count) { + int retval; + SUBDBG("appio: intercepted write(%d,%p,%lu)\n", fd, buf, (unsigned long)count); + struct stat st; + int issocket = 0; + if (fstat(fd, &st) == 0) { + if ((st.st_mode & S_IFMT) == S_IFSOCK) issocket = 1; + } + + // check if write would block on descriptor + fd_set writefds; + FD_ZERO(&writefds); + FD_SET(fd, &writefds); + int ready = __select(fd+1, NULL, &writefds, NULL, &zerotv); + if (ready == 0) { + _appio_register_current[WRITE_WOULD_BLOCK]++; + if (issocket) _appio_register_current[SOCK_WRITE_WOULD_BLOCK]++; + } + + long long start_ts = PAPI_get_real_usec(); + retval = __write(fd,buf, count); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[WRITE_CALLS]++; // write calls + if (issocket) _appio_register_current[SOCK_WRITE_CALLS]++; // socket write + if (retval >= 0) { + _appio_register_current[WRITE_BLOCK_SIZE]= (n * _appio_register_current[WRITE_BLOCK_SIZE] + count)/(n+1); // mean size + _appio_register_current[WRITE_BYTES]+= retval; // write bytes + if (issocket) _appio_register_current[SOCK_WRITE_BYTES] += retval; + if (retval < (int)count) { + _appio_register_current[WRITE_SHORT]++; // short write + if (issocket) _appio_register_current[SOCK_WRITE_SHORT]++; + } + _appio_register_current[WRITE_USEC] += duration; + if (issocket) _appio_register_current[SOCK_WRITE_USEC] += duration; + } + if (retval < 0) { + _appio_register_current[WRITE_ERR]++; // err + if (issocket) _appio_register_current[SOCK_WRITE_ERR]++; + if (EINTR == errno) + _appio_register_current[WRITE_INTERRUPTED]++; // signal interrupted the op + //if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) { + // _appio_register_current[WRITE_WOULD_BLOCK]++; //op would block on descriptor marked as non-blocking + // if (issocket) _appio_register_current[SOCK_WRITE_WOULD_BLOCK]++; + //} + } + return retval; +} + +// The PIC test implies it's built for shared linkage +#ifdef PIC +static ssize_t (*__recv)(int sockfd, void *buf, size_t len, int flags) = NULL; +ssize_t recv(int sockfd, void *buf, size_t len, int flags) { + int retval; + SUBDBG("appio: intercepted recv(%d,%p,%lu,%d)\n", sockfd, buf, (unsigned long)len, flags); + if (!__recv) __recv = dlsym(RTLD_NEXT, "recv"); + if (!__recv) { + fprintf(stderr, "appio,c Internal Error: Could not obtain handle for real recv\n"); + exit(1); + } + // check if recv would block on descriptor + fd_set readfds; + FD_ZERO(&readfds); + FD_SET(sockfd, &readfds); + int ready = __select(sockfd+1, &readfds, NULL, NULL, &zerotv); + if (ready == 0) _appio_register_current[RECV_WOULD_BLOCK]++; + + long long start_ts = PAPI_get_real_usec(); + retval = __recv(sockfd, buf, len, flags); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[RECV_CALLS]++; // read calls + if (retval > 0) { + _appio_register_current[RECV_BLOCK_SIZE]= (n * _appio_register_current[RECV_BLOCK_SIZE] + len)/(n+1); // mean size + _appio_register_current[RECV_BYTES] += retval; // read bytes + if (retval < (int)len) _appio_register_current[RECV_SHORT]++; // read short + _appio_register_current[RECV_USEC] += duration; + } + if (retval < 0) { + _appio_register_current[RECV_ERR]++; // read err + if (EINTR == errno) + _appio_register_current[RECV_INTERRUPTED]++; // signal interrupted the read + if ((EAGAIN == errno) || (EWOULDBLOCK == errno)) + _appio_register_current[RECV_WOULD_BLOCK]++; //read would block on descriptor marked as non-blocking + } + if (retval == 0) _appio_register_current[RECV_EOF]++; // read eof + return retval; +} +#endif /* PIC */ + +size_t _IO_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { + size_t retval; + SUBDBG("appio: intercepted fwrite(%p,%lu,%lu,%p)\n", ptr, (unsigned long) size, (unsigned long) nmemb, (void*) stream); + long long start_ts = PAPI_get_real_usec(); + retval = _IO_fwrite(ptr,size,nmemb,stream); + long long duration = PAPI_get_real_usec() - start_ts; + int n = _appio_register_current[WRITE_CALLS]++; // write calls + if (retval > 0) { + _appio_register_current[WRITE_BLOCK_SIZE]= (n * _appio_register_current[WRITE_BLOCK_SIZE] + size*nmemb)/(n+1); // mean block size + _appio_register_current[WRITE_BYTES]+= retval * size; // write bytes + if (retval < nmemb) _appio_register_current[WRITE_SHORT]++; // short write + _appio_register_current[WRITE_USEC] += duration; + } + if (retval == 0) _appio_register_current[WRITE_ERR]++; // err + return retval; +} + + +/********************************************************************* + *************** BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ********* + *********************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_appio_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + SUBDBG("_appio_init_thread %p\n", ctx); + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_appio_init_component( int cidx ) +{ + + SUBDBG("_appio_component %d\n", cidx); + _appio_native_events = (APPIO_native_event_entry_t *) papi_calloc(APPIO_MAX_COUNTERS, sizeof(APPIO_native_event_entry_t)); + + if (_appio_native_events == NULL ) { + PAPIERROR( "malloc():Could not get memory for events table" ); + return PAPI_ENOMEM; + } + int i; + for (i=0; ivalues, 0, APPIO_MAX_COUNTERS*sizeof(appio_ctl->values[0])); + + return PAPI_OK; +} + + +static int +_appio_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long ** events, int flags ) +{ + (void) flags; + (void) ctx; + + SUBDBG("_appio_read %p %p\n", ctx, ctl); + APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; + int i; + + for ( i=0; inum_events; i++ ) { + int index = appio_ctl->counter_bits[i]; + SUBDBG("event=%d, index=%d, val=%lld\n", i, index, _appio_register_current[index]); + appio_ctl->values[index] = _appio_register_current[index]; + } + *events = appio_ctl->values; + + return PAPI_OK; +} + + +static int +_appio_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + + SUBDBG("_appio_stop ctx=%p ctl=%p\n", ctx, ctl); + APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; + int i; + for ( i=0; inum_events; i++ ) { + int index = appio_ctl->counter_bits[i]; + SUBDBG("event=%d, index=%d, val=%lld\n", i, index, _appio_register_current[index]); + appio_ctl->values[i] = _appio_register_current[index]; + } + + return PAPI_OK; +} + + +/* + * Thread shutdown + */ +static int +_appio_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + +/* + * Clean up what was setup in appio_init_component(). + */ +static int +_appio_shutdown_component( void ) +{ + papi_free( _appio_native_events ); + return PAPI_OK; +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and + * PAPI_SET_INHERIT + */ +static int +_appio_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +static int +_appio_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, int count, hwd_context_t *ctx ) +{ + ( void ) ctx; + ( void ) ctl; + + SUBDBG("_appio_update_control_state ctx=%p ctl=%p num_events=%d\n", ctx, ctl, count); + int i, index; + APPIO_control_state_t *appio_ctl = (APPIO_control_state_t *) ctl; + (void) ctx; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + appio_ctl->counter_bits[i] = index; + native[i].ni_position = index; + } + appio_ctl->num_events = count; + + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_appio_set_domain( hwd_control_state_t *ctl, int domain ) +{ + ( void ) ctl; + + int found = 0; + + if ( PAPI_DOM_USER == domain ) found = 1; + + if ( !found ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int +_appio_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_appio_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + break; + + case PAPI_ENUM_EVENTS: + index = *EventCode; + if ( index < APPIO_MAX_COUNTERS - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + break; + } + return PAPI_EINVAL; +} + + +/* + * + */ +static int +_appio_ntv_name_to_code( const char *name, unsigned int *EventCode ) +{ + int i; + + for ( i=0; i= 0 && index < APPIO_MAX_COUNTERS ) { + strncpy( name, _appio_counter_info[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_appio_ntv_code_to_descr( unsigned int EventCode, char *desc, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < APPIO_MAX_COUNTERS ) { + strncpy(desc, _appio_counter_info[index].description, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_appio_ntv_code_to_bits( unsigned int EventCode, hwd_register_t *bits ) +{ + int index = EventCode; + + if ( index >= 0 && index < APPIO_MAX_COUNTERS ) { + memcpy( ( APPIO_register_t * ) bits, + &( _appio_native_events[index].resources ), + sizeof ( APPIO_register_t ) ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +papi_vector_t _appio_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "appio", + .short_name = "appio", + .version = "1.1.2.4", + .CmpIdx = 0, /* set by init_component */ + .num_mpx_cntrs = APPIO_MAX_COUNTERS, + .num_cntrs = APPIO_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( APPIO_context_t ), + .control_state = sizeof ( APPIO_control_state_t ), + .reg_value = sizeof ( APPIO_register_t ), + .reg_alloc = sizeof ( APPIO_reg_alloc_t ), + }, + + /* function pointers in this component */ + .init_thread = _appio_init_thread, + .init_component = _appio_init_component, + .init_control_state = _appio_init_control_state, + .start = _appio_start, + .stop = _appio_stop, + .read = _appio_read, + .shutdown_thread = _appio_shutdown_thread, + .shutdown_component = _appio_shutdown_component, + .ctl = _appio_ctl, + + .update_control_state = _appio_update_control_state, + .set_domain = _appio_set_domain, + .reset = _appio_reset, + + .ntv_enum_events = _appio_ntv_enum_events, + .ntv_name_to_code = _appio_ntv_name_to_code, + .ntv_code_to_name = _appio_ntv_code_to_name, + .ntv_code_to_descr = _appio_ntv_code_to_descr, + .ntv_code_to_bits = _appio_ntv_code_to_bits + /* .ntv_bits_to_info = NULL, */ +}; + +/* vim:set ts=4 sw=4 sts=4 et: */ diff --git a/src/components/appio/appio.h b/src/components/appio/appio.h new file mode 100644 index 0000000..5eaef3a --- /dev/null +++ b/src/components/appio/appio.h @@ -0,0 +1,84 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file appio.h + * CVS: $Id: appio.h,v 1.1.2.4 2012/02/01 05:01:00 tmohan Exp $ + * + * @author Philip Mucci + * phil.mucci@samaratechnologygroup.com + * + * @author Tushar Mohan + * tushar.mohan@samaratechnologygroup.com + * + * @ingroup papi_components + * + * @brief appio component + * This file contains the source code for a component that enables + * PAPI to access application level file and socket I/O information. + * It does this through function replacement in the first person and + * by trapping syscalls in the third person. + */ + +#ifndef _PAPI_APPIO_H +#define _PAPI_APPIO_H + +#include + +/************************* DEFINES SECTION ***********************************/ + +/* Set this equal to the number of elements in _appio_counter_info array */ +#define APPIO_MAX_COUNTERS 45 + +/** Structure that stores private information of each event */ +typedef struct APPIO_register +{ + /* This is used by the framework. It likes it to be !=0 to do something */ + unsigned int selector; +} APPIO_register_t; + + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + + +/* This structure is used to build the table of events */ + +typedef struct APPIO_native_event_entry +{ + APPIO_register_t resources; + const char* name; + const char* description; +} APPIO_native_event_entry_t; + + +typedef struct APPIO_reg_alloc +{ + APPIO_register_t ra_bits; +} APPIO_reg_alloc_t; + + +typedef struct APPIO_control_state +{ + int num_events; + int counter_bits[APPIO_MAX_COUNTERS]; + long long values[APPIO_MAX_COUNTERS]; // used for caching +} APPIO_control_state_t; + + +typedef struct APPIO_context +{ + APPIO_control_state_t state; +} APPIO_context_t; + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ + +#endif /* _PAPI_APPIO_H */ + +/* vim:set ts=4 sw=4 sts=4 et: */ diff --git a/src/components/appio/tests/Makefile b/src/components/appio/tests/Makefile new file mode 100644 index 0000000..b1cf7ab --- /dev/null +++ b/src/components/appio/tests/Makefile @@ -0,0 +1,71 @@ +NAME=appio +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = appio_list_events appio_values_by_code appio_values_by_name appio_test_read_write appio_test_pthreads appio_test_fread_fwrite appio_test_seek + +ALL_TESTS = $(TESTS) appio_test_blocking appio_test_select appio_test_recv appio_test_socket + +appio_tests: $(TESTS) + +all: $(ALL_TESTS) + +ARCH=$(shell uname -m) + +ifeq (x86_64,$(ARCH)) + ARCH_SUFFIX="-AMD64" +endif + + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +appio_list_events: appio_list_events.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_list_events.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_values_by_code: appio_values_by_code.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_values_by_code.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_values_by_name: appio_values_by_name.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_values_by_name.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_read_write: appio_test_read_write.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_read_write.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_seek: appio_test_seek.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_seek.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_blocking: appio_test_blocking.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_blocking.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_socket: appio_test_socket.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_socket.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_recv: appio_test_recv.o $(UTILOBJS) ../../../libpapi.so + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_recv.o $(UTILOBJS) -Wl,-rpath ../../.. ../../../libpapi.so $(LDFLAGS) + +appio_test_select: appio_test_select.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_select.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_fread_fwrite: appio_test_fread_fwrite.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_fread_fwrite.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +appio_test_pthreads: appio_test_pthreads.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ appio_test_pthreads.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -lpthread + +iozone/iozone_linux$(ARCH_SUFFIX).o iozone/libasync.o iozone/libbif.o: + cd iozone; $(MAKE) iozone_linux$(ARCH_SUFFIX).o libasync.o libbif.o + +init_fini.o: init_fini.c + $(CC) $(CFLAGS) $(INCLUDE) -o $@ -c $^ + +# to test, try: +# ./appio_test_iozone -s 100m -r 64 -i 0 -i 1 -t 1 +appio_test_iozone: iozone/iozone_linux$(ARCH_SUFFIX).o iozone/libasync.o iozone/libbif.o init_fini.o $(UTILOBJS) $(PAPILIB) + $(CC) -g -O2 -o $@ $(LDFLAGS) $^ -lpthread -lrt + +clean: + rm -f $(ALL_TESTS) appio_test_iozone *.o + diff --git a/src/components/appio/tests/appio_list_events.c b/src/components/appio/tests/appio_list_events.c new file mode 100644 index 0000000..7bad423 --- /dev/null +++ b/src/components/appio/tests/appio_list_events.c @@ -0,0 +1,92 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Tushar Mohan + * (adapted for appio from original linux-net code) + * + * test case for the appio component + * + * @brief + * List all appio events codes and names + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int total_events=0; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Listing all appio events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname, "appio") == NULL) { + continue; + } + + if (!TESTS_QUIET) { + printf("Component %d (%d) - %d events - %s\n", + cid, cmpinfo->CmpIdx, + cmpinfo->num_native_events, cmpinfo->name); + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %s\n", code, event_name); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No appio events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/appio/tests/appio_test_blocking.c b/src/components/appio/tests/appio_test_blocking.c new file mode 100644 index 0000000..dd5ca62 --- /dev/null +++ b/src/components/appio/tests/appio_test_blocking.c @@ -0,0 +1,80 @@ +/* + * Test case for appio + * Author: Tushar Mohan + * tusharmohan@gmail.com + * + * Description: This test case reads from standard linux /etc/group + * and writes the output to stdout. + * Statistics are printed at the end of the run., + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 12 + +int main(int argc, char** argv) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"OPEN_CALLS", "OPEN_FDS", "READ_CALLS", "READ_BYTES", "READ_USEC", "READ_ERR", "READ_INTERRUPTED", "READ_WOULD_BLOCK", "WRITE_CALLS","WRITE_BYTES","WRITE_USEC", "WRITE_WOULD_BLOCK"}; + long long values[NUM_EVENTS]; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + if (!TESTS_QUIET) fprintf(stderr, "This program will read from stdin and echo it to stdout\n"); + int retval; + int e; + for (e=0; e 0) { + write(1, buf, bytes); + } + + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +#define NUM_EVENTS 8 + +int main(int argc, char** argv) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES","READ_USEC","READ_ERR", "READ_EOF", "WRITE_CALLS","WRITE_BYTES","WRITE_USEC"}; + long long values[NUM_EVENTS]; + + char *infile = "/etc/group"; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + if (!TESTS_QUIET) printf("This program will read %s and write it to /dev/null\n", infile); + FILE* fdin=fopen(infile, "r"); + if (fdin == NULL) perror("Could not open file for reading: \n"); + FILE* fout=fopen("/dev/null", "w"); + if (fout == NULL) perror("Could not open file for writing: \n"); + int bytes = 0; + char buf[1024]; + + int retval; + int e; + for (e=0; e 0) { + fwrite(buf, 1, bytes, fout); + } + + fclose(fdin); + fclose(fout); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 6 +const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES","READ_USEC","WRITE_CALLS","WRITE_BYTES","WRITE_USEC"}; + +#define NUM_INFILES 4 +static const char* files[NUM_INFILES] = {"/etc/passwd", "/etc/group", "/etc/protocols", "/etc/nsswitch.conf"}; + +void *ThreadIO(void *arg) { + unsigned long tid = (unsigned long)pthread_self(); + if (!TESTS_QUIET) printf("\nThread %#lx: will read %s and write it to /dev/null\n", tid,(const char*) arg); + int Events[NUM_EVENTS]; + long long values[NUM_EVENTS]; + int retval; + int e; + for (e=0; e 0) { + write(fdout, buf, bytes); + } + close(fdout); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + for (e=0; e +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 12 + +int main(int argc, char** argv) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"OPEN_CALLS", "OPEN_FDS", "READ_CALLS", "READ_BYTES", "READ_USEC", "READ_ERR", "READ_INTERRUPTED", "READ_WOULD_BLOCK", "WRITE_CALLS","WRITE_BYTES","WRITE_USEC","WRITE_WOULD_BLOCK"}; + long long values[NUM_EVENTS]; + + char *infile = "/etc/group"; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + int fdin; + if (!TESTS_QUIET) printf("This program will read %s and write it to /dev/null\n", infile); + int retval; + int e; + for (e=0; e 0) { + write(fdout, buf, bytes); + } + + /* Closing the descriptors before doing the PAPI_stop + means, OPEN_FDS will be reported as zero, which is + right, since at the time of PAPI_stop, the descriptors + we opened have been closed */ + close (fdin); + close (fdout); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include /* exit() */ +#include /* herror() */ +#include /* gethostbyname() */ +#include /* bind() accept() */ +#include /* bind() accept() */ +#include + +#include "papi.h" +#include "papi_test.h" + +#define PORT 3490 +#define NUM_EVENTS 6 + +main(int argc, char *argv[]) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"RECV_CALLS", "RECV_BYTES", "RECV_USEC", "RECV_ERR", "RECV_INTERRUPTED", "RECV_WOULD_BLOCK"}; + long long values[NUM_EVENTS]; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + if (!TESTS_QUIET) printf("This program will listen on port 3490, and write data received to standard output\n"); + int retval; + int e; + for (e=0; e 0) { + write(1, buf, bytes); + } + + close(n_sockfd); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 7 + +int main(int argc, char** argv) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES", "READ_BLOCK_SIZE", "READ_USEC", "SEEK_CALLS", "SEEK_USEC", "SEEK_ABS_STRIDE_SIZE"}; + long long values[NUM_EVENTS]; + + char *infile = "/etc/group"; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + int fdin; + if (!TESTS_QUIET) printf("This program will do a strided read %s and write it to stdout\n", infile); + int retval; + int e; + for (e=0; e 0) { + write(1, buf, bytes); + lseek(fdin, 16, SEEK_CUR); + } + + /* Closing the descriptors before doing the PAPI_stop + means, OPEN_FDS will be reported as zero, which is + right, since at the time of PAPI_stop, the descriptors + we opened have been closed */ + close (fdin); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main(int argc, char** argv) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"SELECT_USEC"}; + long long values[NUM_EVENTS]; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + if (!TESTS_QUIET) printf("This program will read from stdin and echo it to stdout\n"); + int retval; + int e; + for (e=0; e 0) write(1, buf, bytes); + if (bytes == 0) break; + } + + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include /* exit() */ +#include /* herror() */ +#include /* gethostbyname() */ +#include /* bind() accept() */ +#include /* bind() accept() */ +#include + +#include "papi.h" +#include "papi_test.h" + +#define PORT 3490 +#define NUM_EVENTS 15 + +main(int argc, char *argv[]) { + int Events[NUM_EVENTS]; + const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES", "READ_USEC", "READ_WOULD_BLOCK", "SOCK_READ_CALLS", "SOCK_READ_BYTES", "SOCK_READ_USEC", "SOCK_READ_WOULD_BLOCK", "WRITE_BYTES", "WRITE_CALLS", "WRITE_WOULD_BLOCK", "WRITE_USEC", "SOCK_WRITE_BYTES", "SOCK_WRITE_CALLS", "SOCK_WRITE_USEC"}; + long long values[NUM_EVENTS]; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + + if (!TESTS_QUIET) + printf("This program will listen on port 3490, and write data received to standard output AND socket\n" + "In the output ensure that the following identities hold:\n" + "READ_* == SOCK_READ_*\n" + "WRITE_{CALLS,BYTES} = 2 * SOCK_WRITE_{CALLS,BYTES}\n" + "SOCK_READ_BYTES == SOCK_WRITE_BYTES\n"); + int retval; + int e; + for (e=0; e 0) { + write(1, buf, bytes); + write(n_sockfd, buf, bytes); + } + + close(n_sockfd); + + /* Stop counting events */ + if (PAPI_stop_counters(values, NUM_EVENTS) != PAPI_OK) { + fprintf(stderr, "Error in PAPI_stop_counters\n"); + } + + if (!TESTS_QUIET) { + printf("----\n"); + for (e=0; e +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_EVENTS 48 + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + int code; + char event_names[MAX_EVENTS][PAPI_MAX_STR_LEN]; + int event_codes[MAX_EVENTS]; + long long event_values[MAX_EVENTS]; + int total_events=0; /* events added so far */ + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all appio events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidnum_native_events, cmpinfo->name); + } + + if ( strstr(cmpinfo->name, "appio") == NULL) { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + /* Create and populate the EventSet */ + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); + } + + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_names[total_events] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("Added event %s (code=%#x)\n", event_names[total_events], code); + } + event_codes[total_events++] = code; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + int fdin,fdout; + const char* infile = "/etc/group"; + printf("This program will read %s and write it to /dev/null\n", infile); + int bytes = 0; + char buf[1024]; + + retval = PAPI_add_events( EventSet, event_codes, total_events); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_add_events()", retval); + } + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + fdin=open(infile, O_RDONLY); + if (fdin < 0) perror("Could not open file for reading: \n"); + fdout = open("/dev/null", O_WRONLY); + if (fdout < 0) perror("Could not open /dev/null for writing: \n"); + + while ((bytes = read(fdin, buf, 1024)) > 0) { + write(fdout, buf, bytes); + } + + retval = PAPI_stop( EventSet, event_values ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); + } + close(fdin); + close(fdout); + + int i; + if (!TESTS_QUIET) { + for ( i=0; i +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +#define NUM_EVENTS 11 + +int main (int argc, char **argv) +{ + int i, retval; + int EventSet = PAPI_NULL; + char *event_name[NUM_EVENTS] = { + "READ_BYTES", + "READ_CALLS", + "READ_USEC", + "READ_EOF", + "READ_SHORT", + "READ_ERR", + "WRITE_BYTES", + "WRITE_CALLS", + "WRITE_USEC", + "WRITE_ERR", + "WRITE_SHORT" + }; + int event_code[NUM_EVENTS] = { 0, 0, 0, 0, 0, 0, 0, 0, 0}; + long long event_value[NUM_EVENTS]; + int total_events=0; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Appio events by name\n"); + } + + /* Map names to codes */ + for ( i=0; i 0) { + write(fdout, buf, bytes); + } + close(fdin); + close(fdout); + + retval = PAPI_stop( EventSet, event_value ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + if (!TESTS_QUIET) { + for ( i=0; i +#include +#include +#include +#include "papi.h" + +#define NUM_EVENTS 6 +static int Events[NUM_EVENTS]; +static const char* names[NUM_EVENTS] = {"READ_CALLS", "READ_BYTES","READ_USEC","WRITE_CALLS","WRITE_BYTES","WRITE_USEC"}; +static long long values[NUM_EVENTS]; + +__attribute__ ((constructor)) void my_init(void) { + //fprintf(stderr, "appio: constructor started\n"); + int version = PAPI_library_init (PAPI_VER_CURRENT); + if (version != PAPI_VER_CURRENT) { + fprintf(stderr, "PAPI_library_init version mismatch\n"); + exit(1); + } + else { + fprintf(stderr, "appio: PAPI library initialized\n"); + } + int retval; + int e; + for (e=0; efilesize are skipped. + + Modified preadv code to vary the number of buffers as + necessary such that they will fit in min(MAXBUFFERSIZE,filesize). + This fixes problems where the number of buffers in + the i/o vector exceeded the size of mainbuffer. + + Added bzero for buffer when it is first malloc'd. This + ensures that it is initialized before use. + + Created a script (profile.fs) that runs a series of tests + to generate a "box" around common application variables + such as filesize, buffer size, buffer encachement, and + number of concurrent processes. This is intended to serve + as the "standard" filesystem profile. + + buffer reset to mainbuffer before each test loop + +V2.3 (kcollins): + + added -F option to write to specify pathnames for throughput + tests (allowing throughput tests to multiple filesystems). + +V2.4 (capps): + Changed preadv/pwritev to use a non-sequential access pattern. + Changed the version number. + Moved all user interface values to KB. This simplifies + the user interface. (consistant scaling) and it also + allows one to start with 512kb file. This is very important + since the first indirect block causes a significant + slowdown in the initial write cases. + +V2.5 (capps): + Re-structure and cleanup. + +V2.6 (kcollins) + Bug fix for the throughput tests. + +V2.7 (capps): + Added -o flag. This makes all file opens for writes + have the O_SYNC flag set. This makes all writes go + to disk before competion. This is useful for seeing + what the media can do without the buffer cache helping. + +V2.8 (capps): + Added -V flag. This turns on pattern verification. If + the user were to type: + -V 165 + Then bit pattern 0xa5 would be placed in every byte in the + buffer and when read back from buffer cache, or disk, + it will be verified to be correct. If it fails then + the error handler will specify the byte location of the + miscompare. + +V2.9 (capps): + Added fread/re-fread, fwrite/re-fwrite to list of tests. + Added -E to allow the user to run pread and friends as an option. + +V2.10 (capps): + Added -R. This will generate Excel compatible files that + can then be imported into Excel and graphed. + Added support for 5 targets to the makefile. + Added -M This prints out the uname -a stuff about a machine. + Added -O This gives all results in operations/sec instead of KB/sec. + More code cleanup. Update comments. + +V2.11 (kcollins) + added -A. Auto mode with no crossover and read/write tests only + changed default record size to 64KB (from 512 bytes) + +V2.12 (capps) + Added shared memory barrier sync for throughput mode. This + provides much finer control over the actual timeing + of the children. + Added mmap() for BSD (Convex) machines that do not + have System V shared memory. + Added two ways of showing throughput results. The second + method takes into consideration children that lag behind + due to slow devices, and gives results that are more accurate. + Cleanup of some tab problems in throughput results. + Cleanup of floating point output taking to much space. + Added -d to allow a variable delay comming out of the barrier + in the throughput tests. +V2.12 (kcollins) + added declaration for create_list to make ansi c compiles work + several fixes to some of the SPPUX 5.x make targets + added date run to banner (hope this doesn't break your scripts $-) + +V2.13 (capps) + Added "stone walling". During throughput tests, if one process + finishes then all others are sent a signal to tell them + to stop. (The parallel region has finished). This provides + better numbers for throughput. + Only bzero or fill min(reclen,CACHE_SIZE) this saves a bunch + of paging on workstations with small memory systems. + Fixed broken target in the makefile. + Note: use of -d is not advised. It makes the children not run + in parallel. +V2.14 (capps) + Bug fix to avoid anomaly in SPP-UX. In SPP-UX the filesystem + code preallocates meta-data to improve initial file writes. + The first indirect block allocation was causing a block + of zeros to be written syncronously. In SPP-UX the filesytem + code preallocates zero filled blocks when the first writer + touches a filesystem after a sync. A pool of on disk zero'd + blocks are created asynchronously and handed out to writers + when they cross the boundry into the first level indirect + and would have had to stop and wait for the zero filled + block to be written. Iozone's testing methodology was not + allowing the OS to have any time to complete the async + pre-allocation and was not showing the speed up that real + applications would see. + +V2.15 (capps) + Improve throughput testing mode. + +V2.16 (capps) + Added -U option. This allows the filesystem to be unmounted + and remounted between tests. This guarentees that the buffer + cache is cold. + +V2.17 (capps) + Added -T option. This makes the throughput tests use + threads instead of processes. Currently using pthread_create(), + pthread_self(), and pthread_exit(). + Cleaned up file cleanup mechanism. Control C will now cause + all temp files to be deleted. Removed all signals used to + control sub-processes. + +V2.18 (capps) + Cleanup. Added read stride, read backwards to the throughput + tests. Various bug fixes + +V2.19 (capps) + Removed all calls to malloc() and all use of system V shared + memory. mmap() is much easier to deal with. As for malloc() + HP programs are very limited on the ammount of malloc() space + and not nearly so constrained on mmap() memory. It was necessary + to move to mmap() since multiple threads all need buffers in + the processes address space. + Removed dependency on first thread being number 2. Iozone now + probes to find out what the thread library will return for + the first thread. This makes the switching thread libraries + much easier. + +V2.20 (capps) + Children now set stop_flag and shutdown all other children.There + is no further need to tell the parent to distribute the stop_flag. + verify, purge, and osync are now supported in the throughput + tests. Fixed bug where pthreads stack size was causing + segmentation violation when purgeit() was called for buffer + that were greater than 256kb. + +V2.21 (capps) + Enhanced throughput reporting. Now provides: + Child throughput, Parent throughput, Minimum throughput for + any child in the group, Maximum throughput for any child in the + group, and Minimum transfer count. Due to stone walling + not all children write the full requested size. This + minimum transfer count provides the user with knowledge of + how much work was performed by the slowest child. + Added -C flag. This allows the user to see all of the transfer + counts for each child. Had to add system 5 shared memory back. + Linux does not support mmap(MAP_ANONYMOUS|MAP_SHARED). So it + must use SYSV shared memory get get sharing working. + +V2.22 (capps) + Made changes to make iozone work correctly on Linux on a + PC. Changes are just scaling down the test to fit on a + pc, and scaling down shared segments to < 16 Meg so it + can run on an Intel 386 class machine. + Added: -L # Set the processor cache line size in bytes. + Added: -S # Set the processor cache size in kbytes. + Removed spin wait in parent waiting for threads to + finish each throughput test. Code not uses thread_join(). + Fixed -O (operations/sec) mode to work in throughput tests. + +V2.23 (capps) + Close small timing hole where thread/process has set stop flag + and others are in a system call. The hole allowed threads/processes + to continue to increment work done after one had finished and + told the others to stop. The result was that the children would + report slightly high numbers as they were not truely parallel + at the finish line. Added random read throughput test. + Fixes for VxFS small extents being created by prime_zb() functions. + Provides more details about the throughput run. + +V2.24 (capps) + Added support for -R (Excell chart generation) to the throughput + tests. Also added support for the -O (ops/sec) to the throughput + Excell chart. + +V2.25 (capps) + Added support for selecting which test to run. -i # + -i 0 -i 3 + will run write and read-backwards tests only. For a list + of the test numbers type iozone -h. +V2.26 (capps) + Added support for LARGE_FILES for the hpux-11.0 target. + + +V2.27 (capps) + All tests now verify one long word of data from each page + written/read to/from the file. This is to level the + playing field with systems that do not move data + when "read" or "write" is called, but instead just + map the file and perform the I/O when the address space + is touched. Benchmarks that do not validate the data + ,at least touch each page, do not measure the read/write + times just the map times. + Note: The -V option still verifies each byte of the buffer, + the default is now to verify one long from each page. + +V2.28 (capps) + Added support for benchmarking mmap() files. + Added more command line options. -B -G -D + B = Use mmap() files for the benchmark. + G = Use msync(MS_SYNC) for mmap files. + D = Use msync(MS_ASYNC) for mmap files. + +V2.29 (capps) + Bug fixes for: + Combination of running individual tests and mmap() files support. + Stride read bug that caused only portions of the total file to be + examined. + +V2.30 (capps) + Fixups for build under SPP-UX + +V2.31 (capps) + Fixups for build under Linux. + Added -j ### to support user setting the stride size for the + stride read benchmark. + +V2.32 (capps) + Add support for IRIX and IRIX64. + +V2.33 (capps) + Add support for POSIX async I/O benchmarking. Uses a library + to interface to POSIX async I/O model. The library provides + and extended async_read() interface. It takes the standard + calling options of read() but also allows the application to + perform read-ahead with a stride. (positive or negative) + and allows the user to specify how much read ahead to + perform. + Tested on HP-UX 11.0, Linux, SGI Origin. + +V2.34 (capps) + Added -k. This allows POSIX async I/O to utilize the buffer + specified and not to perform any bcopys. Fixes to make + multi-threadedness work on SGI Origin. + +V2.34 (capps) + Added [-k #]. This allows POSIX async I/O to utilize the buffer + specified and not to perform any bcopys. Fixes to make + multi-threadedness work on SGI Origin. + +V2.36 (capps) + Iozone is now a 64 bit application. It may be compiled for either + 64 bit or 32 bit machines. The makefile supports 64 and 32 bit + targets for machines that support 32 & 64 bit targets. + All version numbers are now automatically generated by + RCS. This is the last time we have to bump the version + number by hand. + + +----------------------------------------------------------------------------------- +Changed over to RCS source control here: +Version Numbers are reset at this point back to Version 1.1. +----------------------------------------------------------------------------------- + + +RCS file: iozone.c,v; Working file: iozone.c +head: 1.94 +locks: ; strict +access list: +symbolic names: +comment leader: " * " +total revisions: 94; selected revisions: 94 +description: +Initial rcs version of Iozone +---------------------------- +Revision 1.94 +date: 99/01/18 13:02:57; author: capps; state: Exp; lines added/del: 7/2 +Call msync if writer wants sync in timing and terminates early in multi thread test case. +---------------------------- +Revision 1.93 +date: 99/01/18 11:46:11; author: capps; state: Exp; lines added/del: 309/126 +Cleanup for include_flush and include_close for single and multi threaded operations. +---------------------------- +Revision 1.92 +date: 99/01/15 10:53:58; author: capps; state: Exp; lines added/del: 40/11 +Add include_close support for throughput testing +---------------------------- +Revision 1.91 +date: 98/12/07 09:26:22; author: capps; state: Exp; lines added/del: 43/24 +For Windows: Use the high resolution timers instead of timeofday(); +Fix a few casting problems. +---------------------------- +Revision 1.90 +date: 98/11/30 14:49:46; author: capps; state: Exp; lines added/del: 24/17 +Update the copyright and names and places +---------------------------- +Revision 1.89 +date: 98/10/30 09:04:51; author: capps; state: Exp; lines added/del: 1/2 +An extra close(fd) causes HP-UX to fail future unmounts... +---------------------------- +Revision 1.88 +date: 98/10/29 09:47:25; author: capps; state: Exp; lines added/del: 17/17 +Cleanup the help screen +---------------------------- +Revision 1.87 +date: 98/10/28 23:31:11; author: capps; state: Exp; lines added/del: 7/6 +Spelling error fix. +---------------------------- +Revision 1.86 +date: 98/10/14 11:21:50; author: capps; state: Exp; lines added/del: 23/68 +Unified the time method to only have 2 ways to get time. +---------------------------- +Revision 1.85 +date: 98/10/14 09:22:09; author: capps; state: Exp; lines added/del: 91/91 +Added code to remove the latency of gettimeofday() from the file performance measurements. +---------------------------- +Revision 1.84 +date: 98/10/12 11:44:50; author: capps; state: Exp; lines added/del: 107/8 +Add time resolution output, and fix the divide by zero when the time in +a system call turns out to be Zero. This will introduce distortion for machines +that have very fast system calls and very poor time resolution. Windows +has a 50 Milli second resolution on gettimeofday(). So... to fix it +all calls that take less than 50 Milli seconds will be rounded up to +cost 50 milliseconds. +---------------------------- +Revision 1.83 +date: 98/10/06 09:58:16; author: capps; state: Exp; lines added/del: 46/2 +Add support for Windows build +---------------------------- +Revision 1.82 +date: 98/09/23 09:48:02; author: capps; state: Exp; lines added/del: 2/2 +Fix bug where -i # was leaving tmp files after throughput test. +---------------------------- +Revision 1.81 +date: 98/09/23 09:41:12; author: capps; state: Exp; lines added/del: 1/3 +Remove debug printf +---------------------------- +Revision 1.80 +date: 98/09/23 09:29:01; author: capps; state: Exp; lines added/del: 23/1 +Add my_nap(). This allows the threads to switch processors to their +new bound processor before performing any work. +---------------------------- +Revision 1.79 +date: 98/09/22 11:57:20; author: capps; state: Exp; lines added/del: 8/8 +Change xx back into an int so the modulo will work better. +---------------------------- +Revision 1.78 +date: 98/09/18 16:27:05; author: capps; state: Exp; lines added/del: 18/15 +Remove create in rewrite path. +---------------------------- +Revision 1.77 +date: 98/08/17 16:44:06; author: capps; state: Exp; lines added/del: 23/1 +Fixes for Solaris and the new processor bind feature. +---------------------------- +Revision 1.76 +date: 98/08/17 16:17:45; author: capps; state: Exp; lines added/del: 1/2 +Remove debug code. +---------------------------- +Revision 1.75 +date: 98/08/17 16:16:15; author: capps; state: Exp; lines added/del: 92/5 +Add support for binding procs/threads to cpus. +---------------------------- +Revision 1.74 +date: 98/08/07 16:51:41; author: capps; state: Exp; lines added/del: 4/3 +Add fsync to the fwrite test case when the user specifies -e +---------------------------- +Revision 1.73 +date: 98/08/07 16:47:38; author: capps; state: Exp; lines added/del: 178/208 +Add -c and -e to allow closes and fsyncs to be inside the timing calculations. +---------------------------- +Revision 1.72 +date: 98/08/06 22:40:15; author: capps; state: Exp; lines added/del: 9/1 +Add setvbuf to fwrite and fread tests so that the internal fwrite and fread +buffer size is the same as the record size. This is what a well tuned application +would do. +---------------------------- +Revision 1.71 +date: 98/08/06 09:03:06; author: capps; state: Exp; lines added/del: 2/3 +Fix fsync filename problem in fwrite_perf_test +---------------------------- +Revision 1.70 +date: 98/08/05 18:06:41; author: capps; state: Exp; lines added/del: 6/2 +Add fsync after fwrite test case so the fread will start with a +clean buffer cache and no writes in progress. +---------------------------- +Revision 1.69 +date: 98/08/03 10:45:49; author: capps; state: Exp; lines added/del: 3/3 +Bug fix for -V option not filling the entire buffer. +---------------------------- +Revision 1.68 +date: 98/07/30 22:11:11; author: capps; state: Exp; lines added/del: 2/3 +Fix for solaris +---------------------------- +Revision 1.67 +date: 98/07/30 22:08:19; author: capps; state: Exp; lines added/del: 2/2 +Fix for solaris +---------------------------- +Revision 1.66 +date: 98/07/30 22:05:02; author: capps; state: Exp; lines added/del: 43/15 +Add support for Solaris +---------------------------- +Revision 1.65 +date: 98/07/01 14:19:19; author: capps; state: Exp; lines added/del: 80/82 +Move end_async inside the timing loops as in async I/O it counts. +---------------------------- +Revision 1.64 +date: 98/06/16 17:04:36; author: capps; state: Exp; lines added/del: 13/2 +Correct problem where user specifies pread tests on hpux... which does not +support these operations. The test now prints an error message and exits. +---------------------------- +Revision 1.63 +date: 98/06/16 16:54:22; author: capps; state: Exp; lines added/del: 1/2 +Remove exit from auto_test. This allows the message "iozone test complete" to +be printed when in auto test mode. +---------------------------- +Revision 1.62 +date: 98/06/10 10:54:28; author: capps; state: Exp; lines added/del: 175/173 +All exit()s now have a unique exit value. +---------------------------- +Revision 1.61 +date: 98/05/18 13:34:03; author: capps; state: Exp; lines added/del: 17/18 +Move .dat file descriptors to global data. Needed to prevent re-opens. +---------------------------- +Revision 1.60 +date: 98/05/18 13:24:22; author: capps; state: Exp; lines added/del: 6/3 +Bug fix. Prevents re-opending .dat files when in auto mode. +---------------------------- +Revision 1.59 +date: 98/05/08 13:03:02; author: capps; state: Exp; lines added/del: 21/3 +Enhance throughput tests to follow the -i test number to run options. +---------------------------- +Revision 1.58 +date: 98/05/07 14:15:49; author: capps; state: Exp; lines added/del: 109/39 +Make VXFS a define in the make command. This makes moving to other targets +easier. It removes the binding of HPUX and VXFS. +Also, Added -Q to support offset/latency file generation for later use +as inputs to plot program. +---------------------------- +Revision 1.57 +date: 98/05/06 15:09:43; author: capps; state: Exp; lines added/del: 100/27 +Add -N to provide results in microseconds per operation. +---------------------------- +Revision 1.56 +date: 98/05/05 13:23:29; author: capps; state: Exp; lines added/del: 3/10 +If the user specifies -i 0 then run both write and rewrite tests. +---------------------------- +Revision 1.55 +date: 98/04/30 15:19:02; author: capps; state: Exp; lines added/del: 1/1 +No change +---------------------------- +Revision 1.54 +date: 98/04/30 15:09:58; author: capps; state: Exp; lines added/del: 2/2 +Unlink the vxfstest when the test fails. +---------------------------- +Revision 1.53 +date: 98/04/30 13:07:21; author: capps; state: Exp; lines added/del: 7/5 +Cleanup help output. +---------------------------- +Revision 1.52 +date: 98/04/30 12:58:29; author: capps; state: Exp; lines added/del: 21/4 +Add async I/O with no bcopy to throughput tests. +---------------------------- +Revision 1.51 +date: 98/04/29 15:29:29; author: capps; state: Exp; lines added/del: 5/1 +Fixes so it will compile on the SGI Origin. +---------------------------- +Revision 1.50 +date: 98/04/29 11:57:58; author: capps; state: Exp; lines added/del: 5/1 +Do not need to limit async ops. Fix is in libasync.c +---------------------------- +Revision 1.49 +date: 98/04/29 10:45:19; author: capps; state: Exp; lines added/del: 61/3 +Add async I/O to throughput testing for writes +---------------------------- +Revision 1.48 +date: 98/04/28 11:57:13; author: capps; state: Exp; lines added/del: 5/1 +Limit max async operations to 60. Beyond this there be dragons. +---------------------------- +Revision 1.47 +date: 98/04/28 10:16:09; author: capps; state: Exp; lines added/del: 108/21 +Completed support for no_bcopy POSIX async I/O in the async_write_no_copy path. +This allows write tests to perform async I/O with buffers released when +the write is completed. +---------------------------- +Revision 1.46 +date: 98/04/27 16:58:38; author: capps; state: Exp; lines added/del: 43/10 +Add aio_write() to the write and re-write tests. This provides +POSIX async I/O for the those tests. +---------------------------- +Revision 1.45 +date: 98/04/25 09:53:39; author: capps; state: Exp; lines added/del: 3/2 +direct_flag is an int. Was a char in one place and +an int in another. +---------------------------- +Revision 1.44 +date: 98/04/25 09:17:42; author: capps; state: Exp; lines added/del: 27/15 +More support for vx_direct support in the write path +---------------------------- +Revision 1.43 +date: 98/04/24 16:33:44; author: capps; state: Exp; lines added/del: 115/77 +Move VX_DIRECT to libasync. But keep the VX_DIRECT support also +in iozone. So one can use VX_DIRECT with and without async I/O +---------------------------- +Revision 1.42 +date: 98/04/24 16:20:34; author: capps; state: Exp; lines added/del: 127/60 +Move VX_DIRECT to the libasync module. +---------------------------- +Revision 1.41 +date: 98/04/24 15:50:54; author: capps; state: Exp; lines added/del: 190/7 +Add support for VxFS VX_DIRECT +Idea is to use VX_DIRECT and POSIX async I/O together +---------------------------- +Revision 1.40 +date: 98/04/22 16:38:25; author: capps; state: Exp; lines added/del: 5/5 +Sppux wants ail_gettimeofday variables to be unsigned int. +---------------------------- +Revision 1.39 +date: 98/04/22 16:19:50; author: capps; state: Exp; lines added/del: 7/3 +Fix -M option not printing cleanly +Fix -R in 32 bit mode printing garbage. +---------------------------- +Revision 1.38 +date: 98/04/22 15:56:02; author: capps; state: Exp; lines added/del: 1/1 +Change to only disply revision not full header. +---------------------------- +Revision 1.37 +date: 98/04/22 15:52:19; author: capps; state: Exp; lines added/del: 1/1 +Add RCS Header to support versioning. +---------------------------- +Revision 1.36 +date: 98/04/22 15:38:26; author: capps; state: Exp; lines added/del: 1/1 +fix to bcopy() third arg needs to be size_t for 32 bit mode. +---------------------------- +Revision 1.35 +date: 98/04/22 09:09:24; author: capps; state: Exp; lines added/del: 17/17 +Bug fixes for 64 bit mode on IRIX, and addition +on the internal inuse queue to insure that the +internal struct_cache_ent structures are not released +too early when doing direct I/O (async_read_no_copy). +---------------------------- +Revision 1.34 +date: 98/04/21 09:31:02; author: capps; state: Exp; lines added/del: 4/0 +Fix to eliminate hidden (dot) files that iozone was creating +in throughput mode. All files are now visible with ls. +---------------------------- +Revision 1.33 +date: 98/04/21 08:30:35; author: capps; state: Exp; lines added/del: 7/1 +Have Iozone print the compile model used. +---------------------------- +Revision 1.32 +date: 98/04/20 18:46:02; author: capps; state: Exp; lines added/del: 49/20 +Fixes for 32 bit mode. +---------------------------- +Revision 1.31 +date: 98/04/20 16:57:29; author: capps; state: Exp; lines added/del: 8/8 +make sure malloc is called with (size_t) parameter. +---------------------------- +Revision 1.30 +date: 98/04/20 16:05:08; author: capps; state: Exp; lines added/del: 933/757 +Iozone now 64 bit application +---------------------------- +Revision 1.29 +date: 98/04/20 12:32:25; author: capps; state: Exp; lines added/del: 4/4 +Move msync to before munmap so file gets written. +---------------------------- +Revision 1.28 +date: 98/04/20 10:21:30; author: capps; state: Exp; lines added/del: 2/2 +Minor fix for -O flag and -B not working smoothly together. +---------------------------- +Revision 1.27 +date: 98/04/20 10:17:19; author: capps; state: Exp; lines added/del: 0/0 +No change + +---------------------------- +Revision 1.26 +date: 98/04/19 15:11:07; author: capps; state: Exp; lines added/del: 5/5 +Remove prime_zbfill. It causes problems with mmap files. +---------------------------- +Revision 1.25 +date: 98/04/16 15:24:50; author: capps; state: Exp; lines added/del: 228/70 +-H is Nastran async I/O with bcopy +-k is async I/O without any bcopys +---------------------------- +Revision 1.24 +date: 98/04/15 16:48:30; author: capps; state: Exp; lines added/del: 22/4 +fix to make build on 9.05 and 10.1 +---------------------------- +Revision 1.23 +date: 98/04/15 15:36:55; author: capps; state: Exp; lines added/del: 9/9 +Cleanup some compiler warnings about un-initialized variables. They +are not really un-initialized and used but it does generate +compiler warnings on some machines. +---------------------------- +Revision 1.22 +date: 98/04/15 15:32:56; author: capps; state: Exp; lines added/del: 7/7 +Need to free the dummyname space a bit later. +---------------------------- +Revision 1.21 +date: 98/04/15 14:37:05; author: capps; state: Exp; lines added/del: 27/13 +Fix to use smaller stack size in thread_ routines. It was causing +the SGI to drop core in throughput tests. +---------------------------- +Revision 1.20 +date: 98/04/14 17:01:19; author: capps; state: Exp; lines added/del: 27/16 +Fix a memory leak. In multi_throughput testing shmalloc was getting called +for each iteration. This is not needed and causes much to much +shm to be allocated. Not broken but definately a pig. +---------------------------- +Revision 1.19 +date: 98/04/14 15:19:15; author: capps; state: Exp; lines added/del: 2/0 +When -k is specified alone this will turn on the POSIX async I/O and +set depth to 0. +---------------------------- +Revision 1.18 +date: 98/04/14 15:00:18; author: capps; state: Exp; lines added/del: 21/20 +Fixes to make multi-threaded version run on the SGI Origin. +---------------------------- +Revision 1.17 +date: 98/04/14 11:55:44; author: capps; state: Exp; lines added/del: 17/11 +Add support for -k. When using POSIX async I/O use the +buffer specified and do not perform any bcopys. +---------------------------- +Revision 1.16 +date: 98/04/13 10:22:18; author: capps; state: Exp; lines added/del: 27/380 +Add libasync library support +---------------------------- +Revision 1.15 +date: 98/04/11 12:09:25; author: capps; state: Exp; lines added/del: 1/0 +Fix memory leak. Now calls del_cache when ever any calls to async_end happen. +This will ensure that there are no outstanding I/Os on the cache that +ha ve not been canceled . +---------------------------- +Revision 1.14 +date: 98/04/11 11:57:10; author: capps; state: Exp; lines added/del: 632/47 +Add support for POSIX async I/O testing +---------------------------- +Revision 1.13 +date: 98/03/31 14:30:15; author: capps; state: Exp; lines added/del: 44/6 +Fix support for bsd4_2 and ConvexOS +---------------------------- +Revision 1.12 +date: 98/03/31 11:26:34; author: capps; state: Exp; lines added/del: 2/2 +Bump version number to 2.32 +---------------------------- +Revision 1.11 +date: 98/03/31 11:20:51; author: capps; state: Exp; lines added/del: 70/6 +Add support for SGI IRIX and SGI IRIX64 +---------------------------- +Revision 1.10 +date: 98/03/27 14:00:47; author: capps; state: Exp; lines added/del: 15/20 +Put the bcopy back. It is more represenative +of what the real application will do. +---------------------------- +Revision 1.9 +date: 98/03/27 13:25:02; author: capps; state: Exp; lines added/del: 40/14 +Improved mmap file support. Now only have 1 long word from +each page touched. This eliminates the overhead of bcopy +dominating the results. It also is performing the same +work that the non-mmap version does with verify(). +---------------------------- +Revision 1.8 +date: 98/03/27 10:41:13; author: capps; state: Exp; lines added/del: 10/4 +Bug fix. Frewrite was truncating the file. This fix +ensures that the Frewrite test opens without trunc. +---------------------------- +Revision 1.7 +date: 98/03/27 10:16:41; author: capps; state: Exp; lines added/del: 3/3 +Fix report to specify stride size as a function of reclen. +It did not make sense to output kbytes as the value changes +when in auto mode to match the current record length. +---------------------------- +Revision 1.6 +date: 98/03/26 15:28:15; author: capps; state: Exp; lines added/del: 16/8 +Add support for -j option. This +allows the user to specify the stride size for +the strided file access benchmark. +---------------------------- +Revision 1.5 +date: 98/03/25 15:27:01; author: capps; state: Exp; lines added/del: 1/1 +Fixup help screen to reflect new options +---------------------------- +Revision 1.4 +date: 98/03/25 15:21:23; author: capps; state: Exp; lines added/del: 1/1 +Change the revision number +---------------------------- +Revision 1.3 +date: 98/03/25 15:20:28; author: capps; state: Exp; lines added/del: 16/1 +Fixup support for Linux +---------------------------- +Revision 1.2 +date: 98/03/25 13:58:05; author: capps; state: Exp; lines added/del: 16/3 +Bug fixes for SPP-UX +---------------------------- +Revision 1.1 +date: 98/03/25 10:43:45; author: capps; state: Exp; +Initial revision +============================================================================= + +RCS file: libasync.c,v; Working file: libasync.c +head: 1.39 +locks: ; strict +access list: +symbolic names: +comment leader: " * " +total revisions: 39; selected revisions: 39 +description: +Initial version of POSIX async I/O library interface. +---------------------------- +Revision 1.39 +date: 98/07/30 22:05:21; author: capps; state: Exp; lines added/del: 3/1 +Add support for Solaris +---------------------------- +Revision 1.38 +date: 98/07/07 13:00:39; author: capps; state: Exp; lines added/del: 1/11 +Remove extra bcopy in the async_write_no_bcopy path. +---------------------------- +Revision 1.37 +date: 98/06/11 09:47:58; author: capps; state: Exp; lines added/del: 3/3 +Fix syntax error for IRIX +---------------------------- +Revision 1.36 +date: 98/06/10 10:56:55; author: capps; state: Exp; lines added/del: 10/10 +All exit()s now have a unique exit value. +---------------------------- +Revision 1.35 +date: 98/05/07 14:17:20; author: capps; state: Exp; lines added/del: 2/2 +Make VXFS a define in the make command. This makes moving to other targets +easier. It removes the binding of HPUX and VXFS. +Also, Added -Q to support offset/latency file generation for later use +as inputs to plot program. +---------------------------- +Revision 1.34 +date: 98/04/30 15:19:54; author: capps; state: Exp; lines added/del: 1/3 +Remove debug code that breaks 64 bit mode compiled code. +---------------------------- +Revision 1.33 +date: 98/04/30 13:09:13; author: capps; state: Exp; lines added/del: 2/2 +Make retval an int so it can be checked for less than zero. +---------------------------- +Revision 1.32 +date: 98/04/29 16:49:34; author: capps; state: Exp; lines added/del: 5/11 +If overshooting on number of asyncs then terminate the loop and +let the next time through pick up the I/O. +---------------------------- +Revision 1.31 +date: 98/04/29 16:37:49; author: capps; state: Exp; lines added/del: 3/3 +Remove debug code +---------------------------- +Revision 1.30 +date: 98/04/29 15:29:48; author: capps; state: Exp; lines added/del: 3/1 +Fixes so it will compile on the SGI Origin. +---------------------------- +Revision 1.29 +date: 98/04/29 11:56:27; author: capps; state: Exp; lines added/del: 36/10 +Work around for bug in POSIX async I/O library +---------------------------- +Revision 1.28 +date: 98/04/29 11:04:26; author: capps; state: Exp; lines added/del: 1/2 +Remove debug code +---------------------------- +Revision 1.27 +date: 98/04/29 11:02:54; author: capps; state: Exp; lines added/del: 54/27 +Added resource shortage paths. +---------------------------- +Revision 1.26 +date: 98/04/28 18:12:51; author: capps; state: Exp; lines added/del: 1/3 +Add async I/O to the throughput tests +---------------------------- +Revision 1.25 +date: 98/04/28 17:12:40; author: capps; state: Exp; lines added/del: 3/1 +fix wait_for_ routine to reset w_tail if item being removed is also the tail. +---------------------------- +Revision 1.24 +date: 98/04/28 16:14:06; author: capps; state: Exp; lines added/del: 1/3 +bug fix. 2 calls to malloc for aligned memory. +---------------------------- +Revision 1.23 +date: 98/04/28 11:57:39; author: capps; state: Exp; lines added/del: 37/13 +Limit max async operations to 60. Beyond this there be dragons. +---------------------------- +Revision 1.22 +date: 98/04/28 10:17:22; author: capps; state: Exp; lines added/del: 127/42 +Completed support for no_bcopy POSIX async I/O in the async_write_no_copy path. +This allows write tests to perform async I/O with buffers released when +the write is completed. +---------------------------- +Revision 1.21 +date: 98/04/27 16:59:14; author: capps; state: Exp; lines added/del: 246/9 +Add aio_write() to the write and re-write tests. This provides +POSIX async I/O for the those tests. +---------------------------- +Revision 1.20 +date: 98/04/24 16:20:55; author: capps; state: Exp; lines added/del: 15/3 +Move VX_DIRECT to the libasync module. +---------------------------- +Revision 1.19 +date: 98/04/24 15:50:13; author: capps; state: Exp; lines added/del: 42/11 +Add support for VxFS VX_DIRECT +Idea is to use VX_DIRECT and POSIX async I/O together +---------------------------- +Revision 1.18 +date: 98/04/24 12:36:42; author: capps; state: Exp; lines added/del: 13/5 +Fix some error printfs to match the size of the off_t. +---------------------------- +Revision 1.17 +date: 98/04/24 12:18:11; author: capps; state: Exp; lines added/del: 7/7 +Fixes for LP64 mode. off_t changed to off64_t +---------------------------- +Revision 1.16 +date: 98/04/24 09:33:32; author: capps; state: Exp; lines added/del: 275/35 +Add comments and fix for LP64 model on hpux. +---------------------------- +Revision 1.15 +date: 98/04/23 16:58:06; author: capps; state: Exp; lines added/del: 167/13 +Make libasync large file aware. +---------------------------- +Revision 1.14 +date: 98/04/22 15:58:45; author: capps; state: Exp; lines added/del: 1/1 +Change version to only display rcs version id. +---------------------------- +Revision 1.13 +date: 98/04/22 15:52:54; author: capps; state: Exp; lines added/del: 1/2 +Add RCS version support +---------------------------- +Revision 1.12 +date: 98/04/22 11:39:35; author: capps; state: Exp; lines added/del: 52/8 +Add firewall to prevent in flight changes to the aiocb structure. +---------------------------- +Revision 1.11 +date: 98/04/22 09:10:36; author: capps; state: Exp; lines added/del: 57/19 +Bug fixes for 64 bit mode on IRIX, and addition +on the internal inuse queue to insure that the +internal struct_cache_ent structures are not released +too early when doing direct I/O (async_read_no_copy). +---------------------------- +Revision 1.10 +date: 98/04/21 09:34:14; author: capps; state: Exp; lines added/del: 18/10 +Improve error messages. +---------------------------- +Revision 1.9 +date: 98/04/20 16:06:21; author: capps; state: Exp; lines added/del: 53/50 +Iozone now 64 bit application +---------------------------- +Revision 1.8 +date: 98/04/20 10:17:59; author: capps; state: Exp; lines added/del: 0/0 +no change +---------------------------- +Revision 1.7 +date: 98/04/17 08:49:16; author: capps; state: Exp; lines added/del: 15/2 +Optimization on async operations. Just add one to the end +of the list if the list already has more than one item. +---------------------------- +Revision 1.6 +date: 98/04/17 00:00:30; author: capps; state: Exp; lines added/del: 10/2 +Make cancel keep trying until it succeeds. Otherwise transfers after the buffer +is freed can occur. +---------------------------- +Revision 1.5 +date: 98/04/16 16:49:28; author: capps; state: Exp; lines added/del: 49/4 +Improve error handling when running machine out of memory. +---------------------------- +Revision 1.4 +date: 98/04/16 15:26:41; author: capps; state: Exp; lines added/del: 118/28 +added async_read_no_copy(). This allows the application to let the +library specify the destination buffer and perform the async I/O +without unwanted bcopys. +---------------------------- +Revision 1.3 +date: 98/04/14 11:56:23; author: capps; state: Exp; lines added/del: 36/10 +Add supporf for -k. When using POSIX async I/O use +the buffer specified and do not perform any bcopys. +---------------------------- +Revision 1.2 +date: 98/04/13 10:35:20; author: capps; state: Exp; lines added/del: 5/7 +Fixup for error path to propagate any small transfers. +---------------------------- +Revision 1.1 +date: 98/04/13 10:21:23; author: capps; state: Exp; +Initial revision +============================================================================= + +RCS file: makefile,v; Working file: makefile +head: 1.20 +locks: ; strict +access list: +symbolic names: +comment leader: "# " +total revisions: 20; selected revisions: 20 +description: +Initial version of makefile +---------------------------- +Revision 1.20 +date: 98/10/06 10:36:22; author: capps; state: Exp; lines added/del: 87/28 +Add comments to describe each targets capabilities. +---------------------------- +Revision 1.19 +date: 98/10/06 09:59:18; author: capps; state: Exp; lines added/del: 3/3 +Fix spelling error +---------------------------- +Revision 1.18 +date: 98/10/06 09:58:29; author: capps; state: Exp; lines added/del: 18/3 +Add support for Windows build +---------------------------- +Revision 1.17 +date: 98/08/17 16:44:56; author: capps; state: Exp; lines added/del: 2/2 +Fixes for Solaris +---------------------------- +Revision 1.16 +date: 98/07/30 22:05:33; author: capps; state: Exp; lines added/del: 20/1 +Add support for Solaris +---------------------------- +Revision 1.15 +date: 98/05/07 14:17:26; author: capps; state: Exp; lines added/del: 13/13 +Make VXFS a define in the make command. This makes moving to other targets +easier. It removes the binding of HPUX and VXFS. +Also, Added -Q to support offset/latency file generation for later use +as inputs to plot program. +---------------------------- +Revision 1.14 +date: 98/04/22 16:02:42; author: capps; state: Exp; lines added/del: 2/0 +Add RCS version ids. +---------------------------- +Revision 1.13 +date: 98/04/22 13:58:54; author: capps; state: Exp; lines added/del: 6/6 +For now only build the SGI targets in 32 bit mode. +There is some problem with POSIX async I/O and 64 bit apps. +---------------------------- +Revision 1.12 +date: 98/04/22 12:08:25; author: capps; state: Exp; lines added/del: 3/3 +Let the IRIX64 target default to its default compile mode. +---------------------------- +Revision 1.11 +date: 98/04/22 09:10:54; author: capps; state: Exp; lines added/del: 3/3 +Bug fixes for 64 bit mode on IRIX, and addition +on the internal inuse queue to insure that the +internal struct_cache_ent structures are not released +too early when doing direct I/O (async_read_no_copy). +---------------------------- +Revision 1.10 +date: 98/04/21 09:29:57; author: capps; state: Exp; lines added/del: 17/17 +Improve dependencies +---------------------------- +Revision 1.9 +date: 98/04/20 16:05:48; author: capps; state: Exp; lines added/del: 58/29 +Iozone now 64 bit application +---------------------------- +Revision 1.8 +date: 98/04/20 10:17:44; author: capps; state: Exp; lines added/del: 0/0 +*** empty log message *** +---------------------------- +Revision 1.7 +date: 98/04/16 16:50:11; author: capps; state: Exp; lines added/del: 6/6 +Have the SGI build 32 bit app too. +---------------------------- +Revision 1.6 +date: 98/04/15 16:48:09; author: capps; state: Exp; lines added/del: 5/5 +Fix to make build on 9.05 and 10.1 +---------------------------- +Revision 1.5 +date: 98/04/13 10:22:34; author: capps; state: Exp; lines added/del: 14/6 +Add support for libasync library. +---------------------------- +Revision 1.4 +date: 98/04/11 11:57:34; author: capps; state: Exp; lines added/del: 10/10 +AAdd support for POSIX async I/O testing +---------------------------- +Revision 1.3 +date: 98/03/31 11:21:34; author: capps; state: Exp; lines added/del: 24/0 +Add support for SGI IRIX and SGI IRIX64 +---------------------------- +Revision 1.2 +date: 98/03/25 13:59:18; author: capps; state: Exp; lines added/del: 21/9 +Fixes for SPP-UX +---------------------------- +Revision 1.1 +date: 98/03/25 10:48:21; author: capps; state: Exp; +Initial revision +============================================================================= +Added support for BIFF file output. Iozone can now write Excel spreadsheet +format. This allows one to directly access the Excel spreadsheet without +needing to import with tab and space delimited method. + +Added support for large files and threads for Solaris. + +Add support for FreeBSD + +Change default stride value to avoid nodalization with various spindle counts. +============================================================================= +Version 3.3: +Changed name of processor_bind to ioz_processor_bind to avoid collision +with SVR5.4.MP shared library. +Removed leading tab on an #ifdef that caused some compilers to get sick. +============================================================================= +Version 3.4: +Add support for OpenBSD +============================================================================= +Version 3.6: +Lots of code cleanup. +Added support for OSF1 on the DEC Alpha. +============================================================================= +Version 3.7: +Add support for OSF Version 4. +Add timer resolution problem detection. + +============================================================================= +Add support for OSF Version 5. +============================================================================= +Version 3.13: +Add support for Linux to use pthreads. + +============================================================================= +Version 3.16: +============================================================================= +Add support for Netbsd +Add support for Largefiles and Async I/O to Linux target +============================================================================= +Version 3.17: +============================================================================= +Removed small model for Linux. In the past Iozone was forced to +use a small model for testing Linux as the normal load caused +Redhat to panic. Redhat users have told me that the system now +works fine with the normal load. They have tested Redhat 6.1 and +it no longer panics. +============================================================================= +Version 3.18: +============================================================================= +Add support for BSDI. Base, largefiles, pthread. No async I/O +============================================================================= +Revision 3.19 +============================================================================= +date: 2000/03/08 14:47:21; author: capps; state: Exp; lines added/del: 4/1 +Add support for getpagesize. This is used when available. +============================================================================= +Revision 3.20 +============================================================================= +date: 00/04/01 11:04:59; author: capps; state: Exp; lines added/del: 2/2 +Fix for multiple filenames and range of threads being used. +============================================================================= +Revision 3.21 +============================================================================= +date: 00/04/01 11:10:54; author: capps; state: Exp; lines added/del: 3/1 +SPPUX does not have getpagesize... +============================================================================= +Revision 3.22 +============================================================================= +Add support for Linux-ia64 +Add support for mmap & normal file I/O mixing. +============================================================================= +Revision 3.23 +Fixups for IBM AIX. +============================================================================= +Revision 3.24 +Fixups for BSD 2.7 (New release of BSD that supports O_SYNC) +============================================================================= +Revision 3.27 +Fixups for Cygnus compiler changes. (Windows targets). With this +change Iozone will compile with at least 2 versions of the Cygnus +compilers. +============================================================================= +Revision 3.28 +============================================================================= +Add support for reading and writing while holding lockf() on the file. +This turns out to be important aspect of NFS benchmarking. +============================================================================= +Revision 3.29 +============================================================================= +Change calls to lockf() to calls to fcntl(). This is more portable. +============================================================================= +Revision 3.30 +============================================================================= +Add support for variable compute cycle time before each I/O +operation. This allows one to more accuratly represent +a specific application that is doing compute/read/compute/read +style operations. +============================================================================= +Revision 3.30 through 3.37 +============================================================================= +Add support for read and write telemetry files. +============================================================================= +Revision 3.40 +============================================================================= +Code cleanup for popen() usage in -M path. +============================================================================= +Revision 3.41 +============================================================================= +Bug fix for ops/sec in rewrite throughput testing. +Added average throughput to output in throughput mode. +============================================================================= +Revision 3.42 +============================================================================= +Bug fix for read and re-read. Usage of un-initialized variable that +caused results to be wrong. +============================================================================= +Revision 3.43 +============================================================================= +Add support for latency plot data for throughput testing. +Each child thread/process gets its own data file. +============================================================================= +Revision 3.44 +============================================================================= +Enhance compatibility of multi-thread/proc latency offsets with +telemetry file support. +============================================================================= +Revision 3.45 through 3.48 +============================================================================= +Added latency/offset plot data files for all throughput tests. +============================================================================= +Revision 3.49 +============================================================================= +Fixed compile warning for Linux off64_t redefinition. +Add Solaris2.6 target with simple build. +============================================================================= +Revision 3.50 +============================================================================= +Added support for openbsd-threads +Cleanup for page size foo. +============================================================================= +Revision 3.51, 3.52, 3.53 +============================================================================= +Cleanup for new random write testing in throughput mode. +Improve perror handling. +============================================================================= +Revision 3.54 +============================================================================= +Add -g maxfilesize so people will not have to edit the source to +test files bigger than 512 Mbytes. +============================================================================= +Revision 3.55 +============================================================================= +Supports -n and -g to set the min and max file sizes to be used for +an auto mode run. +============================================================================= +Revision 3.56 +============================================================================= +Added support for SCO Unixware SVR5 with gcc compiler +============================================================================= +Revision 3.57 +============================================================================= +Fixed bug where file locking was not being used when +_LARGE_FILE64_SOURCE was defined in read_perf_test. +============================================================================= +Revision 3.58 +============================================================================= +Added -z option. This is to be used with the -a option. It +provides more complete testing for small record sizes +when the file sizes are very large. +Fixed -a so that the cross-over mechanism works correctly. +============================================================================= +Revision 3.59 +============================================================================= +Fix a bug where the user specified -R -s but did not specify +-a or -r. This caused the Excel report to print a bunch +of zeros. +============================================================================= +Revision 3.60 +============================================================================= +Fix headers in the Excel output when cross over kicks in. +============================================================================= +Revision 3.61 +============================================================================= +Added -y and -q to set record size range +Added command line to output +============================================================================= +Revision 3.62 +============================================================================= +Put auto cross over back to 16 Meg +============================================================================= +Revision 3.63 +============================================================================= +Minor code cleanups for error messages +============================================================================= +Revision 3.64 +============================================================================= +Re-organize the help listing. +============================================================================= +Revision 3.65 +============================================================================= +Add labels to the latency/offset output files. +============================================================================= +Revision 3.66 +============================================================================= +Added Randy Dunlap to the list of contributors. Thanks Randy !! +============================================================================= +Revision 3.67 +============================================================================= +Fix labels when using -R and -i options together. +============================================================================= +Revision 3.68 +============================================================================= +Code cleanup. No functionality changes. +============================================================================= +Revision 3.69 +============================================================================= +Prevent mixed modes. Auto and throughput. +Added support for the Plus extended options. +============================================================================= +Revision 3.70 +============================================================================= +Added support for -+u option. Cpu utilization. +============================================================================= +Revision 3.71 +============================================================================= +Added comment for the support for -+u option. Cpu utilization. +============================================================================= +Revision 3.72 +============================================================================= +Added network testing mode. -+m (Experimental) Tested: Linux, HP-UX +============================================================================= +Revision 3.73 +============================================================================= +Added -xflag support for distributed mode. +Handle interrupts when in distributed mode. +============================================================================= +Revision 3.74 +============================================================================= +Add default for REMOTE_SHELL +============================================================================= +Revision 3.75 +============================================================================= +Code cleanup. +============================================================================= +Revision 3.76 +============================================================================= +Portability change for shmat(). +Added and example of client_list file to the distribution. +============================================================================= +Revision 3.77 +============================================================================= +Disable CPU utilization in distributed mode. +Bug fix for CPU utilization in normal mode. +============================================================================= +Revision 3.78 +============================================================================= +Fix compatibility with AIX for shmat() +============================================================================= +Revision 3.79 +============================================================================= +Fix throughput labels when user is selecting specific tests with -i option. +============================================================================= +Revision 3.80 +============================================================================= +Remove dependency on min() and max(). They are not portable. +============================================================================= +Revision 3.81 +============================================================================= +Changes for 64bit architectures. Brad Smith. OpenBSD. +============================================================================= +Revision 3.83 +============================================================================= +Add -+m cluster option to the help list and the list of options. +============================================================================= +Revision 3.84 -> 3.88 +============================================================================= +Fix file descriptor leak in cluster mode. +============================================================================= +Revision 3.89 -> 3.91 +============================================================================= +Support for heterogeneous clusters, bug fix for -C +============================================================================= +Revision 3.92 +============================================================================= +Add a small sleep in the client so the master's terminate message +can arrive before the client exits and closes the channel. +============================================================================= +Revision 3.93 +============================================================================= +Add support for UWIN (Unix for Windows) +============================================================================= +Revision 3.94 +============================================================================= +Bug fix for client's working dir in cluster mode. +============================================================================= +Revision 3.95 +============================================================================= +Enable more options in Cluster mode. +============================================================================= +Revision 3.96 +============================================================================= +Add support for Solaris 8 in 64-bit mode. +============================================================================= +Revision 3.97 +============================================================================= +Linux demands a function proto for functions that take floats as args. +============================================================================= +Revision 3.98 +============================================================================= +Changes for Solaris to make their silly compiler eat reasonable +function prototypes. (yech !!) +============================================================================= +Revision 3.99 +============================================================================= +Add protocol version checking for distributed messages. +Add support for AIX 5.2 +============================================================================= +Revision 3.100 +============================================================================= +Fixes for socket ports. Needed to be in network format. +============================================================================= +Revision 3.101 +============================================================================= +Add support for RSH environment override. +============================================================================= +Revision 3.102 +============================================================================= +Improve O_DIRECT and VX_DIRECT so that testing is done +on the correct file on the correct client. +============================================================================= +Revision 3.103 +============================================================================= +Code cleanup. +============================================================================= +Revision 3.104 +============================================================================= +Code cleanup. Bug fix for O_DIRECT in read_perf_test. +============================================================================= +Revision 3.105 +============================================================================= +Bug fix for TRU64 and OSF where reclen was not getting displayed. +============================================================================= +Revision 3.106 +============================================================================= +Add -+d file I/O diagnostic mode. +============================================================================= +Revision 3.107 +============================================================================= +Fixes for the awesome Diagnostics mode. +============================================================================= +Revision 3.108 +============================================================================= +turn off cdebug +Switch child comm to SOCK_STREAM. Avoid UDP fragment problems. +============================================================================= +Revision 3.109 +============================================================================= +Fix for "disrupt" and Direct I/O. Needs to be page size and aligned. +============================================================================= +Revision 3.110 +============================================================================= +Cleanup for -Wall to all source files. +============================================================================= +Revision 3.111 +============================================================================= +Fixes for UWIN compile warnings. +============================================================================= +Revision 3.112 +============================================================================= +Fixes for Windows compile warnings. do_compute() proto. +============================================================================= +Revision 3.113 +============================================================================= +Add definition char *dumb for Solaris to alloc_mem() +============================================================================= +Revision 3.114 +============================================================================= +Code cleanup for AIX. No async support caused warnings. +============================================================================= +Revision 3.115 +============================================================================= +Fix for Solaris returning short reads() from socket to child_listen. +============================================================================= +Revision 3.116 +============================================================================= +Add support for Mac OS X +============================================================================= +Revision 3.117 +============================================================================= +Add code to set the socket buffer window size. Solaris needs this. +============================================================================= +Revision 3.118 +============================================================================= +Add O_Direct for AIX +============================================================================= +Revision 3.119-> 3.120 +============================================================================= +Fix some compiler warnings and implement the -+x option for +setting the multiplier used for file and record size incrementing. +============================================================================= +Revision 3.121 +============================================================================= +Add changes from Debian. Add powerpc and sparc. +Add changes to fix warning on Irix and Irix64 +============================================================================= +Revision 3.122 +============================================================================= +Bug fix for cluster mode. Need to bzero buffers before sprintf or sscanf +============================================================================= +Revision 3.123 +============================================================================= +Bug fix for handling all chars that are transported over messaging. +============================================================================= +Revision 3.124 +============================================================================= +Simplify the child's debug output mechanism. +============================================================================= +Revision 3.125 +============================================================================= +Fix for stonewall in cluster mode. +============================================================================= +Revision 3.126 +============================================================================= +Shrink the client_neutral_command structure so it fits in a single +UDP packet. +============================================================================= +Revision 3.127 +============================================================================= +Improve debug code for cluster mode. +============================================================================= +Revision 3.128 +============================================================================= +Reduce the message traffic due to master's distribution of STOP. Only +one STOP distribution is needed. More can lead to socket buffer overflows. +============================================================================= +Revision 3.129 +============================================================================= +Bzero structures on the stack before using. No problem seen but it +is a possible hole. +============================================================================= +Revision 3.130 +============================================================================= +Add error checking for the client file contents. +============================================================================= +Revision 3.131 +============================================================================= +Use prealloc() for HP-UX to create file for use with mmap. +============================================================================= +Revision 3.132 +============================================================================= +Add random mix mode. +============================================================================= +Revision 3.133 +============================================================================= +Make a better 32 bit random offset from calling rand()<<16||rand() +============================================================================= +Revision 3.134 +============================================================================= +Add -+p percentage read option. +============================================================================= +Revision 3.135 +============================================================================= +Improve the mixed mode distribution algorithm. +============================================================================= +Revision 3.136 +============================================================================= +Fix auto bug introduced by mixed mode testing. +Introduce -+r for O_RSYNC. +============================================================================= +Revision 3.137 +============================================================================= +Code cleanup for some warnings on IA-64 systems. +============================================================================= +Revision 3.138 +============================================================================= +Fixes for FreeBSD +============================================================================= +Revision 3.139 +============================================================================= +Add support for multiple -r and -s options. +============================================================================= +Revision 3.140 +============================================================================= +Code cleanup for non-ansi builds +Add target build to output. +============================================================================= +Revision 3.141 +============================================================================= +Add speed check code. +============================================================================= +Revision 3.142 +============================================================================= +Increase maximum threads/procs to 256 +============================================================================= +Revision 3.143 +============================================================================= +Add contribs and -+t to help splash screen. +============================================================================= +Revision 3.144 +============================================================================= +Bug fix for Redhat. +============================================================================= +Revision 3.145 +============================================================================= +Bug fix for when user used -l but failed to use -u too. +============================================================================= +Revision 3.146 +============================================================================= +Add void to speed_main() for non-ansi compiles. +============================================================================= +Revision 3.147 +============================================================================= +Add "Test running" So users will know the test is running +and not to hit control 'c' too soon. +Bug fix in libbif.c do_float() +============================================================================= +Revision 3.148 +============================================================================= +Turn off some child debug code. +============================================================================= +Revision 3.149 +============================================================================= +Disable fread and fwrite testing if mmap or async is in use. +============================================================================= +Revision 3.150 +============================================================================= +Add pread/pwrite to Linux +============================================================================= +Revision 3.151 +============================================================================= +Handle -EB +============================================================================= +Revision 3.152 +============================================================================= +Add pread/pwrite throughput testing +============================================================================= +Revision 3.153 +============================================================================= +Changed second parameter to mmap() to be size_t. AIX needs this. +============================================================================= +Revision 3.154 +============================================================================= +Add support for madvise(). +============================================================================= +Revision 3.155 +============================================================================= +Code cleanup. +============================================================================= +Revision 3.156 +============================================================================= +Fixes for -w -t -R from Veritas +============================================================================= +Revision 3.157 +============================================================================= +Make madvise() go away for windows. +============================================================================= +Revision 3.158 +============================================================================= +Permit smaller values for -n and -g +============================================================================= +Revision 3.159 +============================================================================= +Make initial write in initfile() a page size request. +============================================================================= +Revision 3.160 +============================================================================= +Stop test if file can not be written. +============================================================================= +Revision 3.161 +============================================================================= +Special handling for mmap of a file that is opened (O_DIRECT) +============================================================================= +Revision 3.162 +============================================================================= +Fixup for systems that do not have O_DIRECT. +============================================================================= +Revision 3.163 +============================================================================= +Simplify the prototype for do_compute() +============================================================================= +Revision 3.164 +============================================================================= +Zero compute_val inside of loops. +============================================================================= +Revision 3.165 +============================================================================= +Add support for O_DIRECT for IRIX and IRIX64 +============================================================================= +Revision 3.166 +============================================================================= +Improve macros and add prototypes. +============================================================================= +Revision 3.167 +============================================================================= +Improve resolution of get_resolution(). +============================================================================= +Revision 3.168 +============================================================================= +Changes to support RedHat 9.0. +============================================================================= +Revision 3.169 +============================================================================= +Special handling of NAME for broken frontend in Cygwin/Windows env. +============================================================================= +Revision 3.170 +============================================================================= +Add support for the CrayX1 +============================================================================= +Revision 3.171 +============================================================================= +Remove reference to PAGE_SIZE for linux. This causes problems +with SuSe 8. +============================================================================= +Revision 3.172 +============================================================================= +Fixup for SCO build. +============================================================================= +Revision 3.173 +============================================================================= +Add -DHAVE_PREAD for Solaris8-64 target. +============================================================================= +Revision 3.174 +============================================================================= +Code cleanup for Linux +============================================================================= +Revision 3.177 +============================================================================= +Improve -+d so that each byte is more unique. +Improve byte level validation. +============================================================================= +Revision 3.178 +============================================================================= +Provide byte level error detection with Found char and Expecting Char in + -+d mode. +============================================================================= +Revision 3.179 +============================================================================= +Improve speed of -+d without losing uniqueness of bytes. +============================================================================= +Revision 3.180 +============================================================================= +Fix so that Windows can use multiple processes. Needed mmap like SCO. +============================================================================= +Revision 3.181 +============================================================================= +Use malloc() instead of mmap() for threads memory, instead of mmap. +============================================================================= +Revision 3.182 +============================================================================= +Make CPU utilization use doubles everywhere. +============================================================================= +Revision 3.183 +============================================================================= +Add support for CPU utilization while in distributed mode. +============================================================================= +Revision 3.184 +============================================================================= +Make all times relative so multi node can do CPU usage. +============================================================================= +Revision 3.185 +============================================================================= +Remove unused variables. +============================================================================= +Revision 3.186 +============================================================================= +Add -+n option to disable re-testing. +============================================================================= +Revision 3.187 +============================================================================= +Fixup -+n for throughput mode. +============================================================================= +Revision 3.188 +============================================================================= +Fix Excel output when -+n is used. +============================================================================= +Revision 3.189 +============================================================================= +Add support for the IBM S390 running Linux. +============================================================================= +Revision 3.190 +============================================================================= +Cleanup naming conventions for the S390 and fixup a #define. +============================================================================= +Revision 3.191 +============================================================================= +Add 64 bit compiles for s390x +Move BIG_ENDIAN to ZBIG_ENDIAN to avoid header conflicts. +============================================================================= +Revision 3.192 +============================================================================= +Make random offsets always based on 48 bit random values. +============================================================================= +Revision 3.193 +============================================================================= +Addition for make random offsets always based on 48 bit random values. +============================================================================= +Revision 3.194 +============================================================================= +Make rands long longs. +============================================================================= +Revision 3.195 +============================================================================= +Bug fix for 48 bit rands in bsd4_2 and Windows. +============================================================================= +Revision 3.196 +============================================================================= +Make big_rand a long long. +============================================================================= +Revision 3.197 +============================================================================= +Inject Erik's changes for Multi-client Windows. +============================================================================= +Revision 3.198 +============================================================================= +Change proto version due to changes in Windows -+m support. +Add Eric to the contributors list. +============================================================================= +Revision 3.199 +============================================================================= +Add more Windows support. +============================================================================= +Revision 3.200 +============================================================================= +Spelling error. +============================================================================= +Revision 3.201 +============================================================================= +Bug fixes from Erik H. +============================================================================= +Revision 3.202 +============================================================================= +Reduce usage of shared memory. +============================================================================= +Revision 3.203 +============================================================================= +Eliminate STUPID warning from the silly compiler. +============================================================================= +Revision 3.204 +============================================================================= +Changes to remove warnings on BSD. Thanks to Christian Weisgerber +============================================================================= +Revision 3.205 +============================================================================= +Support for the AMD64 +============================================================================= +Revision 3.206 +============================================================================= +Add -+k for constant aggregate data set size in throughput mode. +============================================================================= +Revision 3.207 +============================================================================= +Add pread support for the TRU64 target. Department of Defense in Canada. +Add -+q for delay in seconds between tests. +============================================================================= +Revision 3.208 +============================================================================= +Move variable up, GCC on Solaris was getting a bogus parse error +============================================================================= +Revision 3.209 +============================================================================= +Add support for -+D (O_DSYNC) mode testing. +============================================================================= +Revision 3.210 +============================================================================= +Make O_DSYNC conditional. +============================================================================= +Revision 3.211 +============================================================================= +Add telemetry support for pread/pwrite +============================================================================= +Revision 3.212 +============================================================================= +Add record locking +Add single file, file sharing. +============================================================================= +Revision 3.213 +============================================================================= +Enhance fill/verify (diag mode) for shared file. +============================================================================= +Revision 3.214 +============================================================================= +Remove warnings. +============================================================================= +Revision 3.215 +============================================================================= +Add prototype for mylockr() +============================================================================= +Revision 3.216 +============================================================================= +Fix prototype for mylockr +============================================================================= +Revision 3.217 +============================================================================= +Enable options for Windows systems. +============================================================================= +Revision 3.218 +============================================================================= +Add label to Excel spreadsheet that describes the rows and columns. +Add support for Solaris64 with VxFS. +Add support for Linux-arm +============================================================================= +Revision 3.219 +============================================================================= +Add sleep to permit child to get connection up before master does connect. +============================================================================= +Revision 3.220 +============================================================================= +Improve master connect to child, without delays. +============================================================================= +Revision 3.221 +============================================================================= +Add -+B Mixed sequential testing. BlueArc request. +============================================================================= +Revision 3.222 +============================================================================= +Workaround for bug in Cygwin's sscanf +============================================================================= +Revision 3.223 +============================================================================= +Add transfer size to the output from -Q +============================================================================= +Revision 3.224 +============================================================================= +Work around for TCP_WAIT in Windows. +============================================================================= +Revision 3.225 +============================================================================= +Fix for broken rsh on Windows. +============================================================================= +Revision 3.226 +============================================================================= +Workaround for gcc 3.4. From the folks at Gentoo.org. +============================================================================= +Revision 3.227 +============================================================================= +Enable -+m and telemetry files. +============================================================================= +Revision 3.228 +============================================================================= +Make more unique file names for mmap files. +============================================================================= +Revision 3.229 +============================================================================= +Add -+T time stamps. +============================================================================= +Revision 3.230 +============================================================================= +Bug fix for -m and validation code. +============================================================================= +Revision 3.231 +============================================================================= +Add a space to the throughput output dump. +============================================================================= +Revision 3.232 +============================================================================= +Add another space to the throughput output dump. +============================================================================= +Revision 3.233 +============================================================================= +Enable shared file with no locking +============================================================================= +Revision 3.234 +============================================================================= +Add sanity check to validate that open(name, O_CREAT | O_WRONLY | O_TRUNC, 0) +does work correctly. This is an NFS client test that +detects if the NFS server's local filesystem is broken and +fails to support the sequence above correctly. +============================================================================= +Revision 3.235 +============================================================================= +add a close(fd) to the sanity test. +============================================================================= +Revision 3.237 +============================================================================= +Transport the -o flag to remote clients. +============================================================================= +Revision 3.238 +============================================================================= +Fix hang when using HP-UX master, Linux client, ssh buildup. +============================================================================= +Revision 3.239 +============================================================================= +Add -+h hostname. Permits one to manually set the hostname. For systems +with multiple names/NICs. +============================================================================= +Revision 3.241 +============================================================================= +Add -+h, set hostname, and fix Solaris hang. +============================================================================= +Revision 3.242 +============================================================================= +Remove the side effect of no-rereads when using -w. Now use -+n for +consistancy. +============================================================================= +Revision 3.243 +============================================================================= +Bug fix for -+k option. +============================================================================= +Revision 3.246 +============================================================================= +Add the -+U for WIN32 API calls .. Unbuffered I/O. Sony studios. +============================================================================= +Revision 3.247 +============================================================================= +Add support for -+U with -K (WIN32API calls + Jitter) +============================================================================= +Revision 3.248 +============================================================================= +Bug fix. -J with -+m not passing compute delay correctly. +============================================================================= +Revision 3.249 +============================================================================= +Add support for -i 8 when used with -+B (sequential mix) +============================================================================= +Revision 3.250 +============================================================================= +Change the default pattern. Samba is trying to cheat by +special casing IOZONE.tmp, and the pattern of 0xA5. +============================================================================= +Revision 3.251 +============================================================================= +Make the default pattern random, and based on Iozone version. +This is to prevent the hack from Richard Sharpe (in Samba) +from special casing Iozone, and lying to the user. +============================================================================= +Revision 3.252 +============================================================================= +bug fix in pattern gen. +============================================================================= +Revision 3.253 +============================================================================= +Add -+Z old data set mode. +Add -+X constant data for short circuit testing only. +============================================================================= +Revision 3.254 +============================================================================= + Multi-node changes for new options. (-+Z and -+X) +============================================================================= +Revision 3.255 +============================================================================= +Add -+K flag for Sony. +============================================================================= +Revision 3.256 +============================================================================= +Move -+K outside of Windows only. +============================================================================= +Revision 3.257 +============================================================================= +Simplify percentage calculation +============================================================================= +Revision 3.258 +============================================================================= +Add error checking for -f and -F in the wrong modes. +============================================================================= +Revision 3.259 +============================================================================= +Bug fix for pbuffer allocation on remote clients. +============================================================================= +Revision 3.260 +============================================================================= +Check for max_rec_size when using ranges. -r -r -r +============================================================================= +Revision 3.261 +============================================================================= +Fix for Debian user bug. -r 1m -n 1m -g 2m gave bogus error. +============================================================================= +Revision 3.262 +============================================================================= +Bug fix for -k used in conjunction with -t and content validation. +============================================================================= +Revision 3.263 +============================================================================= +Bug fix for -k used in conjunction with -t and content validation. +============================================================================= +Revision 3.264 +============================================================================= +Add DragonFly target. +============================================================================= +Revision 3.265 +============================================================================= +Put PER_VECTOR_OFFSET in for HP-UX +============================================================================= +Revision 3.266 +============================================================================= +Fix compiler warning messages +============================================================================= +Revision 3.267 +============================================================================= +Enforce minimum file size of page_size +============================================================================= +Revision 3.268 +============================================================================= +Minor fixes. +============================================================================= +Revision 3.269 +============================================================================= +Check fsync and close for errors. +============================================================================= +Revision 3.270 +============================================================================= +Adding support for testing block devices. Will be done is phases. This is +phase 1. (Single threaded mode only) +============================================================================= +Revision 3.271 +============================================================================= +Adding 4 token support to client_list. Each entry may now contain 4 tokens +and the new one is the absolute path to the temp file for testing. +============================================================================= +Revision 3.272 +Editorial change. +============================================================================= +Revision 3.273 +Add support for external monitor start & stop for throughput tests. +IMON_START and IMON_STOP environmental variables used. +============================================================================= +Revision 3.274 +============================================================================= +minor change. +============================================================================= +Revision 3.275 +Bug fix for systems without O_DIRECT. Fall through in switch statement. +============================================================================= +Revision 3.276 +Fix for -c -t over NFS and initial writer close() when told by another to stop +============================================================================= +Revision 3.277 +Add Benny Halevy to contributors list. +============================================================================= +Revision 3.278 +Fix for Cygwin environment. +============================================================================= +Revision 3.279 +Code cleanup, and add arg to external trigger. +============================================================================= +Revision 3.280 +Code fixes for macosx +============================================================================= +============================================================================= +Revision 3.281 +Add support for building with Sun's Studio 11 compiler +============================================================================= +Revision 3.283 +Bug fix for fread/fwrite with > 2Gig files. +============================================================================= +Revision 3.287 +Add O_DIRECT for Windows +============================================================================= +Revision 3.288 +Add -+w dedup testing mode. +============================================================================= +Revision 3.289 +Make remaining non-dedup data unique. +============================================================================= +Revision 3.290 +Make non-dedupable more unique. +============================================================================= +Revision 3.291 +Bug fix for non-dedup. +============================================================================= +Revision 3.292 +Make random offsets unique, using Knuth shuffle. +============================================================================= +Revision 3.292 +free memory used for random offset uniqueness. +============================================================================= +Revision 3.294 +Make unique/random offsets 64bits. +============================================================================= +Revision 3.295 +Add fallback for random/unique. +============================================================================= +Revision 3.296 +Make non-dedup region more unique +============================================================================= +Revision 3.297 +Add -+y ## to set percentage of interior dedup. +============================================================================= +Revision 3.298 +Add -+y ## to set percentage of interior dedup. +============================================================================= +Revision 3.299 +Bug fixes for -+w and -+y +============================================================================= +Revision 3.300 +Minor fix for dedup +============================================================================= +Revision 3.302 +Adding -+C to set percent of dedupable within a file. +============================================================================= +Revision 3.303 +bug fix +============================================================================= +Revision 3.304 +Add solaris to read sync O_RSYNC +============================================================================= +Revision 3.305 +Add space to avoid field output touching each other. +============================================================================= +Revision 3.306 +Add check for config file exceeding MAXSTREAMS. +============================================================================= +Revision 3.307 +Add new contributor's name. +============================================================================= +Revision 3.308 +Fix type-oh +============================================================================= +Revision 3.309 +Bug fix. rewrite_rec needed to fill entire buffer, or later stride read will +fail. +============================================================================= +Revision 3.310 +============================================================================= +Add ability for remote clients to return errors to the master and +have the master display on output. +============================================================================= +Revision 3.311 +============================================================================= +fix double reporting of client errors +============================================================================= +Revision 3.312 +============================================================================= +Eliminate extra file descriptor in fwrite test. +============================================================================= +Revision 3.312 +============================================================================= +bug fix for barray allocation in -T mode +============================================================================= +Revision 3.313 +Revision 3.314 +Revision 3.315 +============================================================================= +Changes from Debian: Retry umount, add fileop for linux-sparc, and +fix column width in fileop for faster boxes. +============================================================================= +Revision 3.316 +Add O_DIRECT support to FreeBSD +============================================================================= +Revision 3.317 +Fix for defines in FreeBSD +============================================================================= +Revision 3.318 +Add IMON_SYNC to enable monitor scripts to be run sync. +============================================================================= +Revision 3.319 +Add directio() for Solaris +============================================================================= +Revision 3.320 +Add fixes for unresolved references in directio() for Solaris +============================================================================= +Revision 3.321 +Fix type oh. +============================================================================= +Revision 3.322 +Fix c++ style comment back to 'C' style comment. +============================================================================= +Revision 3.323 +Bug fix for check_filenames and large files +============================================================================= +Revision 3.324 +Replace tripple rand() calls with 64 bit Mersene twister. +============================================================================= +Revision 3.325 +Add read-only, external file, with no-verify. -+E +============================================================================= +Revision 3.325 +Permit -+E on dedup files. +============================================================================= +Revision 3.327 +Permit -+E on random read only testing, on existing file. +============================================================================= +Revision 3.328 +Add passing master listener's port to remote children if it is not +HOST_LIST_PORT +============================================================================= +Revision 3.329 +Adding Dave Boone's notruncate option -+N +============================================================================= +Revision 3.330 +Bug fix for Dave's code. +============================================================================= +Revision 3.331 +Add multi -t ops. Fabrice +============================================================================= +Revision 3.332 +Added Li Qin's multi dedup set support. -+S # +============================================================================= +Revision 3.333 +Bug fix for -+S dedup_mseed needs to be an integer +============================================================================= +Revision 3.334 +Make -+S generate more uniqueness +============================================================================= +Revision 3.335 +Make -+S generate more uniqueness +============================================================================= +Revision 3.336 +Make -+S generate more uniqueness +============================================================================= +Revision 3.337 +Bug fix for -+S +============================================================================= +Revision 3.338 +Make umount/remount more robust, in the face of server errors. +============================================================================= +Revision 3.339 +Improve the help string for the -+S option. +============================================================================= +Revision 3.340 +Add new contributor name. +============================================================================= +Revision 3.342 +Add support for the programmable interdimensional timer. +============================================================================= +Revision 3.343 +Bug fix for PIT on remote clients. +============================================================================= +Revision 3.344 +Bug fix for PIT on remote clients. +============================================================================= +Revision 3.345 +Have children re-acquire get_resolution. +============================================================================= +Revision 3.346 +Bug fix for t_range addition. +============================================================================= +Revision 3.347 +Get rid of a warning. ( An invalid warning, but none the less ) +============================================================================= +Revision 3.348 +Add more words to the usage warnings and license +============================================================================= +Revision 3.349 +Remove Ascii dependency for IBM's Z/OS that speaks EBCDIC. +============================================================================= +Revision 3.353 +Add support for SUA +============================================================================= +Revision 3.354 +Remove Sanity check so that SMB on Windows, under SUA, works +============================================================================= +Revision 3.355 +Cache the getaddrinfo call. +============================================================================= +Revision 3.356 +delete optimization.. bad...Cache the getaddrinfo call. +============================================================================= +Revision 3.358 +Change pit to use unsigned long longs +============================================================================= +Revision 3.359 +Add Linux processor affinity +============================================================================= +Revision 3.360 +Remove UDP usage +============================================================================= +Revision 3.361 +Increment protocol_version to catch incompat versions. +============================================================================= +Revision 3.362 +Fixup for new include needed by Solaris10 +============================================================================= +Revision 3.363 +Patch for Mac errno +============================================================================= +Revision 3.364 +Patch for Mac printf's +============================================================================= +Revision 3.365 +Fix Josh's introduction of new Linux warnings. +============================================================================= +Revision 3.366 +Take sleep(1) out put path, deal with it in error/retry path +============================================================================= +Revision 3.367 +Add -+z latency histogram logging. +============================================================================= +Revision 3.368 +Format change for the -+z latency histogram logging. +============================================================================= +Revision 3.369 +Added -+O Op_rate control. +============================================================================= +Revision 3.370 +Close race condition with master closing socket to child async reader +============================================================================= +Revision 3.371 +Add "mygen" generation to the remote children protocol. +This prevents zombies from coming back to life and killing future masters. +============================================================================= +Revision 3.372 +Set Listen(s,100) to Listen(s,MAXSTREAMS) +============================================================================= +Revision 3.373 +Move lable "again" to outside of cdebug. +============================================================================= +Revision 3.374 +More fixes for busted crap in Solaris !!! +============================================================================= +Revision 3.376 +AIX update. They now have errno.h +============================================================================= +Revision 3.377 +Need errno.h for FreeBSD +============================================================================= +Revision 3.379 +Need to include errno.h for Cygwin +============================================================================= +Revision 3.381 +Add SO_LINGER for master_listen and child_listen, so that wind-blows +will work like all other systems on the planet. +============================================================================= +Revision 3.382 +Fix for linger addition +============================================================================= +Revision 3.383 +Fix for linger addition +============================================================================= +Revision 3.384 +Fix for linger addition +============================================================================= +Revision 3.385 +Make linger for all +============================================================================= +Revision 3.387 +Change sleep() calls, that help connect() to nanosleep() calls. +============================================================================= +Revision 3.388 +Fixup remainder for nanosleep() +============================================================================= +Revision 3.389 +Fixup remainder for nanosleep() +============================================================================= +Revision 3.390 +Add code for pread/pwrite from Ben England (Redhat) +============================================================================= +Revision 3.391 +Add code for MDEBUG and CDEBUG from Bob England (Redhat) +============================================================================= +Revision 3.392 +Add code for building HPUX. Errno.h +============================================================================= +Revision 3.393 +Fixes for Windows (nanosleep doesn't always work ) +============================================================================= +Revision 3.394 +Fixes for preadv and pwritev from RedHat (Ben Englanc) +============================================================================= +Revision 3.395 +Add warnings for default switch cases, and exit with value for unknowns. +============================================================================= +Revision 3.396 +Fix warnings from RedHat patches +============================================================================= +Revision 3.397 +Bug fix for getopt default case, with bad parameter handed in. +============================================================================= +Revision 3.398 +Adding thread_read_test and thread_write_test. +============================================================================= diff --git a/src/components/appio/tests/iozone/Generate_Graphs b/src/components/appio/tests/iozone/Generate_Graphs new file mode 100755 index 0000000..7c14a9a --- /dev/null +++ b/src/components/appio/tests/iozone/Generate_Graphs @@ -0,0 +1,32 @@ +# +# This script will create the Iozone graphs using +# gnuplot. +# +# +# +# ------------------------------------------------ +# YOU MUST PROVIDE A FILE NAME FOR IT TO PROCESS. +# This filename is the name of the file where you +# sent Iozone's output. +# ------------------------------------------------ + +# Generate data base for all of the operation types. + +./gengnuplot.sh $1 write +./gengnuplot.sh $1 rewrite +./gengnuplot.sh $1 read +./gengnuplot.sh $1 reread +./gengnuplot.sh $1 randread +./gengnuplot.sh $1 randwrite +./gengnuplot.sh $1 bkwdread +./gengnuplot.sh $1 recrewrite +./gengnuplot.sh $1 strideread +./gengnuplot.sh $1 fwrite +./gengnuplot.sh $1 frewrite +./gengnuplot.sh $1 fread +./gengnuplot.sh $1 freread + +# Produce graphs and postscript results. +gnuplot gnu3d.dem + + diff --git a/src/components/appio/tests/iozone/Gnuplot.txt b/src/components/appio/tests/iozone/Gnuplot.txt new file mode 100644 index 0000000..5ea63d8 --- /dev/null +++ b/src/components/appio/tests/iozone/Gnuplot.txt @@ -0,0 +1,23 @@ +The script Generate_Graphs will create the 3D surface plots +and display them. It will also produce postscript outputs +for each test and leave them in their respective sub-directory. + +It processes the output from an Iozone run. The output from +Iozone that it is expecting is the text output from +the iozone default behavior. (iozone -a, or iozone -az) + +How to produce graphs: + + Generate_Graphs iozone.out + +The gen_graphs script will: +1. Create the databases for each type of operation + and then processes them with Gnuplot. +2. It will display each result on the X11 screen, and + also save a copy in postscript in the test sub-directory. + + +Thanks to Yves Rougy for providing the nifty scripts to help +with the plots. + + diff --git a/src/components/appio/tests/iozone/client_list b/src/components/appio/tests/iozone/client_list new file mode 100644 index 0000000..c3f043b --- /dev/null +++ b/src/components/appio/tests/iozone/client_list @@ -0,0 +1,36 @@ +# +# Lines that start with # in column 0 are comments. +# +# There are now two formats supported. +# Format: 3 fields, space delimited. +# Format: 4 fields, space delimited. +# +# Format: 3 fields, space delimited. +# client_name working_dir_on_client path_to_iozone_on_client +# Format: 4 fields, space delimited. +# client_name working_dir_on_client path_to_iozone_on_client path_to_testfile +# +# Example: With two clients (format 3 fields) +# +# client1 /home/user/tmp /home/user/tmp/iozone +# client2 /home/user/tmp /home/user/tmp/iozone +# +# +# Example: With two copies of Iozone on each of the two clients +# (format 3 fields) +# +# client1 /home/user/tmp /home/user/tmp/iozone +# client1 /home/user/tmp /home/user/tmp/iozone +# client2 /home/user/tmp /home/user/tmp/iozone +# client2 /home/user/tmp /home/user/tmp/iozone +# +# Example: With two clients (format 4 fields) +# client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo1 +# client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo2 +# +# Example: With two copies of Iozone on each of the two clients +# (format 4 fields) +# client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo1 +# client1 /home/user/tmp /home/user/tmp/iozone /tmp/foo2 +# client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo3 +# client2 /home/user/tmp /home/user/tmp/iozone /tmp/foo4 diff --git a/src/components/appio/tests/iozone/fileop.c b/src/components/appio/tests/iozone/fileop.c new file mode 100644 index 0000000..588a2d2 --- /dev/null +++ b/src/components/appio/tests/iozone/fileop.c @@ -0,0 +1,1389 @@ +/* + * Author: Don Capps + * 3/13/2006 + * + * Author: Don Capps (capps@iozone.org) + * 7417 Crenshaw + * Plano, TX 75025 + * + * Copyright 2006, 2007, 2008, 2009 Don Capps. + * + * License to freely use and distribute this software is hereby granted + * by the author, subject to the condition that this copyright notice + * remains intact. The author retains the exclusive right to publish + * derivative works based on this work, including, but not limited to, + * revised versions of this work", + * + * + fileop [-f X ]|[-l # -u #] [-s Y] [-e] [-b] [-w] [-d

] [-t] [-v] [-h] + -f # Force factor. X^3 files will be created and removed. + -l # Lower limit on the value of the Force factor. + -u # Upper limit on the value of the Force factor. + -s # Optional. Sets filesize for the create/write. May use suffix 'K' or 'M'. + -e Excel importable format. + -b Output best case. + -w Output worst case. + -d Specify starting directory. + -U Mount point to remount between tests. + -t Verbose output option. + -v Version information. + -h Help text. + * + * X is a force factor. The total number of files will + * be X * X * X ( X ^ 3 ) + * The structure of the file tree is: + * X number of Level 1 directories, with X number of + * level 2 directories, with X number of files in each + * of the level 2 directories. + * + * Example: fileop 2 + * + * dir_1 dir_2 + * / \ / \ + * sdir_1 sdir_2 sdir_1 sdir_2 + * / \ / \ / \ / \ + * file_1 file_2 file_1 file_2 file_1 file_2 file_1 file_2 + * + * Each file will be created, and then 1 byte is written to the file. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if defined(Windows) +#include +#endif +#if !defined(PATH_MAX) +#define PATH_MAX 255 +#endif + +#if defined(_SUA_) +extern char *optarg; +extern char *opterr; +int fsync(); +int getopt(); +#endif +int junk, *junkp; +int x,excel; +int verbose = 0; +int sz = 1; +char *mbuffer; +int incr = 1; +#define _STAT_CREATE 0 +#define _STAT_WRITE 1 +#define _STAT_CLOSE 2 +#define _STAT_LINK 3 +#define _STAT_UNLINK 4 +#define _STAT_DELETE 5 +#define _STAT_STAT 6 +#define _STAT_ACCESS 7 +#define _STAT_CHMOD 8 +#define _STAT_READDIR 9 +#define _STAT_DIR_CREATE 10 +#define _STAT_DIR_DELETE 11 +#define _STAT_READ 12 +#define _STAT_OPEN 13 +#define _STAT_DIR_TRAVERSE 14 +#define _NUM_STATS 15 +struct stat_struct { + double starttime; + double endtime; + double speed; + double best; + double worst; + double dummy; + double total_time; + double dummy1; + long long counter; +} volatile stats[_NUM_STATS]; + + +static double time_so_far(void); +void dir_create(int); +void dir_traverse(int); +void dir_delete(int); +void file_create(int); +void file_stat(int); +void file_access(int); +void file_chmod(int); +void file_readdir(int); +void file_delete(int); +void file_link(int); +void file_unlink(int); +void file_read(int); +void splash(void); +void usage(void); +void bzero(); +void clear_stats(); +int validate(char *, int , char ); + +#define THISVERSION " $Revision$" +/*#define NULL 0*/ + +char version[]=THISVERSION; +char thedir[PATH_MAX]="."; /* Default is to use the current directory */ +const char *mountname=NULL; /* Default is not to unmount anything between the tests */ + +int cret; +int lower, upper,range; +int i; +int best, worst; +int dirlen; + +/************************************************************************/ +/* Routine to purge the buffer cache by unmounting drive. */ +/************************************************************************/ +void purge_buffer_cache() +{ + if (!mountname) + return; + + char cwd[PATH_MAX]; + char command[1024]; + int ret,i; + + junkp=(int *)getcwd(cwd, sizeof(cwd)); + junk=chdir("/"); + strcpy(command,"umount "); + strcat(command, mountname); + /* + umount might fail if the device is still busy, so + retry unmounting several times with increasing delays + */ + for (i = 1; i < 10; ++i) { + ret = system(command); + if (ret == 0) + break; + sleep(i); /* seconds */ + } + strcpy(command,"mount "); + strcat(command, mountname); + junk=system(command); + junk=chdir(cwd); +} + +int main(int argc, char **argv) +{ + if(argc == 1) + { + usage(); + exit(1); + } + while((cret = getopt(argc,argv,"hbwetvf:s:l:u:d:U:i: ")) != EOF){ + switch(cret){ + case 'h': + usage(); + exit(0); + break; + case 'd' : + dirlen=strlen(optarg); + if (optarg[dirlen-1]=='/') + --dirlen; + strncpy(thedir, optarg, dirlen); + thedir[dirlen] = 0; + break; + case 'U': + mountname = optarg; + break; + case 'i': /* Increment force by */ + incr=atoi(optarg); + if(incr < 0) + incr=1; + break; + case 'f': /* Force factor */ + x=atoi(optarg); + if(x < 0) + x=1; + break; + case 's': /* Size of files */ + sz=atoi(optarg); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + sz = (1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + sz = (1024 * 1024 * atoi(optarg)); + } + if(sz < 0) + sz=1; + break; + case 'l': /* lower force value */ + lower=atoi(optarg); + range=1; + if(lower < 0) + lower=1; + break; + case 'v': /* version */ + splash(); + exit(0); + break; + case 'u': /* upper force value */ + upper=atoi(optarg); + range=1; + if(upper < 0) + upper=1; + break; + case 't': /* verbose */ + verbose=1; + break; + case 'e': /* Excel */ + excel=1; + break; + case 'b': /* Best */ + best=1; + break; + case 'w': /* Worst */ + worst=1; + break; + } + } + mbuffer=(char *)malloc(sz); + memset(mbuffer,'a',sz); + if(!excel) + printf("\nFileop: Working in %s, File size is %d, Output is in Ops/sec. (A=Avg, B=Best, W=Worst)\n", thedir, sz); + if(!verbose) + { +#ifdef Windows + printf(" . %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %10s\n", + "mkdir","chdir","rmdir","create","open","read","write","close","stat", + "access","chmod","readdir","delete"," Total_files"); +#else + + printf(" . %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %7s %10s\n", + "mkdir","chdir","rmdir","create","open", "read","write","close","stat", + "access","chmod","readdir","link ","unlink","delete", + " Total_files"); +#endif + } + junk=chdir(thedir); /* change starting point */ + if(x==0) + x=1; + if(range==0) + lower=upper=x; + for(i=lower;i<=upper;i+=incr) + { + clear_stats(); + x=i; + /* + * Dir Create test + */ + purge_buffer_cache(); + dir_create(x); + + if(verbose) + { + printf("mkdir: Dirs = %9lld ",stats[_STAT_DIR_CREATE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_DIR_CREATE].total_time); + printf(" Avg mkdir(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_DIR_CREATE].counter/stats[_STAT_DIR_CREATE].total_time, + stats[_STAT_DIR_CREATE].total_time/stats[_STAT_DIR_CREATE].counter); + printf(" Best mkdir(s)/sec = %12.2f (%12.9f seconds/op)\n",1/stats[_STAT_DIR_CREATE].best,stats[_STAT_DIR_CREATE].best); + printf(" Worst mkdir(s)/sec = %12.2f (%12.9f seconds/op)\n\n",1/stats[_STAT_DIR_CREATE].worst,stats[_STAT_DIR_CREATE].worst); + } + + /* + * Dir Traverse test + */ + purge_buffer_cache(); + dir_traverse(x); + + if(verbose) + { + printf("chdir: Dirs = %9lld ",stats[_STAT_DIR_TRAVERSE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_DIR_TRAVERSE].total_time); + printf(" Avg chdir(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_DIR_TRAVERSE].counter/stats[_STAT_DIR_TRAVERSE].total_time, + stats[_STAT_DIR_TRAVERSE].total_time/stats[_STAT_DIR_TRAVERSE].counter); + printf(" Best chdir(s)/sec = %12.2f (%12.9f seconds/op)\n",1/stats[_STAT_DIR_TRAVERSE].best,stats[_STAT_DIR_TRAVERSE].best); + printf(" Worst chdir(s)/sec = %12.2f (%12.9f seconds/op)\n\n",1/stats[_STAT_DIR_TRAVERSE].worst,stats[_STAT_DIR_TRAVERSE].worst); + } + + /* + * Dir delete test + */ + purge_buffer_cache(); + dir_delete(x); + + if(verbose) + { + printf("rmdir: Dirs = %9lld ",stats[_STAT_DIR_DELETE].counter); + printf("Total Time = %12.9f seconds\n",stats[_STAT_DIR_DELETE].total_time); + printf(" Avg rmdir(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_DIR_DELETE].counter/stats[_STAT_DIR_DELETE].total_time, + stats[_STAT_DIR_DELETE].total_time/stats[_STAT_DIR_DELETE].counter); + printf(" Best rmdir(s)/sec = %12.2f (%12.9f seconds/op)\n",1/stats[_STAT_DIR_DELETE].best,stats[_STAT_DIR_DELETE].best); + printf(" Worst rmdir(s)/sec = %12.2f (%12.9f seconds/op)\n\n",1/stats[_STAT_DIR_DELETE].worst,stats[_STAT_DIR_DELETE].worst); + } + + /* + * Create test + */ + purge_buffer_cache(); + file_create(x); + if(verbose) + { + printf("create: Files = %9lld ",stats[_STAT_CREATE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_CREATE].total_time); + printf(" Avg create(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_CREATE].counter/stats[_STAT_CREATE].total_time, + stats[_STAT_CREATE].total_time/stats[_STAT_CREATE].counter); + printf(" Best create(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_CREATE].best,stats[_STAT_CREATE].best); + printf(" Worst create(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_CREATE].worst,stats[_STAT_CREATE].worst); + printf("write: Files = %9lld ",stats[_STAT_WRITE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_WRITE].total_time); + printf(" Avg write(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_WRITE].counter/stats[_STAT_WRITE].total_time, + stats[_STAT_WRITE].total_time/stats[_STAT_WRITE].counter); + printf(" Best write(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_WRITE].best,stats[_STAT_WRITE].best); + printf(" Worst write(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_WRITE].worst,stats[_STAT_WRITE].worst); + printf("close: Files = %9lld ",stats[_STAT_CLOSE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_CLOSE].total_time); + printf(" Avg close(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_CLOSE].counter/stats[_STAT_CLOSE].total_time, + stats[_STAT_CLOSE].total_time/stats[_STAT_CLOSE].counter); + printf(" Best close(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_CLOSE].best,stats[_STAT_CLOSE].best); + printf(" Worst close(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_CLOSE].worst,stats[_STAT_CLOSE].worst); + } + + /* + * Stat test + */ + purge_buffer_cache(); + file_stat(x); + + if(verbose) + { + printf("stat: Files = %9lld ",stats[_STAT_STAT].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_STAT].total_time); + printf(" Avg stat(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_STAT].counter/stats[_STAT_STAT].total_time, + stats[_STAT_STAT].total_time/stats[_STAT_STAT].counter); + printf(" Best stat(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_STAT].best,stats[_STAT_STAT].best); + printf(" Worst stat(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_STAT].worst,stats[_STAT_STAT].worst); + } + /* + * Read test + */ + purge_buffer_cache(); + file_read(x); + + if(verbose) + { + printf("open: Files = %9lld ",stats[_STAT_OPEN].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_OPEN].total_time); + printf(" Avg open(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_OPEN].counter/stats[_STAT_OPEN].total_time, + stats[_STAT_OPEN].total_time/stats[_STAT_OPEN].counter); + printf(" Best open(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_OPEN].best,stats[_STAT_OPEN].best); + printf(" Worst open(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_OPEN].worst,stats[_STAT_OPEN].worst); + + printf("read: Files = %9lld ",stats[_STAT_READ].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_READ].total_time); + printf(" Avg read(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_READ].counter/stats[_STAT_READ].total_time, + stats[_STAT_READ].total_time/stats[_STAT_READ].counter); + printf(" Best read(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_READ].best,stats[_STAT_READ].best); + printf(" Worst read(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_READ].worst,stats[_STAT_READ].worst); + } + + /* + * Access test + */ + purge_buffer_cache(); + file_access(x); + if(verbose) + { + printf("access: Files = %9lld ",stats[_STAT_ACCESS].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_ACCESS].total_time); + printf(" Avg access(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_ACCESS].counter/stats[_STAT_ACCESS].total_time, + stats[_STAT_ACCESS].total_time/stats[_STAT_ACCESS].counter); + printf(" Best access(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_ACCESS].best,stats[_STAT_ACCESS].best); + printf(" Worst access(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_ACCESS].worst,stats[_STAT_ACCESS].worst); + } + /* + * Chmod test + */ + purge_buffer_cache(); + file_chmod(x); + + if(verbose) + { + printf("chmod: Files = %9lld ",stats[_STAT_CHMOD].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_CHMOD].total_time); + printf(" Avg chmod(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_CHMOD].counter/stats[_STAT_CHMOD].total_time, + stats[_STAT_CHMOD].total_time/stats[_STAT_CHMOD].counter); + printf(" Best chmod(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_CHMOD].best,stats[_STAT_CHMOD].best); + printf(" Worst chmod(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_CHMOD].worst,stats[_STAT_CHMOD].worst); + } + /* + * readdir test + */ + purge_buffer_cache(); + file_readdir(x); + + if(verbose) + { + printf("readdir: Files = %9lld ",stats[_STAT_READDIR].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_READDIR].total_time); + printf(" Avg readdir(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_READDIR].counter/stats[_STAT_READDIR].total_time, + stats[_STAT_READDIR].total_time/stats[_STAT_READDIR].counter); + printf(" Best readdir(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_READDIR].best,stats[_STAT_READDIR].best); + printf(" Worst readdir(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_READDIR].worst,stats[_STAT_READDIR].worst); + } +#if !defined(Windows) + /* + * link test + */ + purge_buffer_cache(); + file_link(x); + if(verbose) + { + printf("link: Files = %9lld ",stats[_STAT_LINK].counter); + printf("Total Time = %12.9f seconds\n",stats[_STAT_LINK].total_time); + printf(" Avg link(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_LINK].counter/stats[_STAT_LINK].total_time, + stats[_STAT_LINK].total_time/stats[_STAT_LINK].counter); + printf(" Best link(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_LINK].best,stats[_STAT_LINK].best); + printf(" Worst link(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_LINK].worst,stats[_STAT_LINK].worst); + } + /* + * unlink test + */ + purge_buffer_cache(); + file_unlink(x); + if(verbose) + { + printf("unlink: Files = %9lld ",stats[_STAT_UNLINK].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_UNLINK].total_time); + printf(" Avg unlink(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_UNLINK].counter/stats[_STAT_UNLINK].total_time, + stats[_STAT_UNLINK].total_time/stats[_STAT_UNLINK].counter); + printf(" Best unlink(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_UNLINK].best,stats[_STAT_UNLINK].best); + printf(" Worst unlink(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_UNLINK].worst,stats[_STAT_UNLINK].worst); + } +#endif + /* + * Delete test + */ + purge_buffer_cache(); + file_delete(x); + if(verbose) + { + printf("delete: Files = %9lld ",stats[_STAT_DELETE].counter); + printf("Total Time = %12.9f seconds\n", stats[_STAT_DELETE].total_time); + printf(" Avg delete(s)/sec = %12.2f (%12.9f seconds/op)\n", + stats[_STAT_DELETE].counter/stats[_STAT_DELETE].total_time, + stats[_STAT_DELETE].total_time/stats[_STAT_DELETE].counter); + printf(" Best delete(s)/sec = %12.2f (%12.9f seconds/op)\n", + 1/stats[_STAT_DELETE].best,stats[_STAT_DELETE].best); + printf(" Worst delete(s)/sec = %12.2f (%12.9f seconds/op)\n\n", + 1/stats[_STAT_DELETE].worst,stats[_STAT_DELETE].worst); + } + if(!verbose) + { + printf("%c %4d %7.0f ",'A',x,stats[_STAT_DIR_CREATE].counter/stats[_STAT_DIR_CREATE].total_time); + printf("%7.0f ",stats[_STAT_DIR_TRAVERSE].counter/stats[_STAT_DIR_TRAVERSE].total_time); + printf("%7.0f ",stats[_STAT_DIR_DELETE].counter/stats[_STAT_DIR_DELETE].total_time); + printf("%7.0f ",stats[_STAT_CREATE].counter/stats[_STAT_CREATE].total_time); + printf("%7.0f ",stats[_STAT_OPEN].counter/stats[_STAT_OPEN].total_time); + printf("%7.0f ",stats[_STAT_READ].counter/stats[_STAT_READ].total_time); + printf("%7.0f ",stats[_STAT_WRITE].counter/stats[_STAT_WRITE].total_time); + printf("%7.0f ",stats[_STAT_CLOSE].counter/stats[_STAT_CLOSE].total_time); + printf("%7.0f ",stats[_STAT_STAT].counter/stats[_STAT_STAT].total_time); + printf("%7.0f ",stats[_STAT_ACCESS].counter/stats[_STAT_ACCESS].total_time); + printf("%7.0f ",stats[_STAT_CHMOD].counter/stats[_STAT_CHMOD].total_time); + printf("%7.0f ",stats[_STAT_READDIR].counter/stats[_STAT_READDIR].total_time); +#ifndef Windows + printf("%7.0f ",stats[_STAT_LINK].counter/stats[_STAT_LINK].total_time); + printf("%7.0f ",stats[_STAT_UNLINK].counter/stats[_STAT_UNLINK].total_time); +#endif + printf("%7.0f ",stats[_STAT_DELETE].counter/stats[_STAT_DELETE].total_time); + printf("%10d ",x*x*x); + printf("\n"); + fflush(stdout); + + if(best) + { + printf("%c %4d %7.0f ",'B',x, 1/stats[_STAT_DIR_CREATE].best); + printf("%7.0f ",1/stats[_STAT_DIR_TRAVERSE].best); + printf("%7.0f ",1/stats[_STAT_DIR_DELETE].best); + printf("%7.0f ",1/stats[_STAT_CREATE].best); + printf("%7.0f ",1/stats[_STAT_OPEN].best); + printf("%7.0f ",1/stats[_STAT_READ].best); + printf("%7.0f ",1/stats[_STAT_WRITE].best); + printf("%7.0f ",1/stats[_STAT_CLOSE].best); + printf("%7.0f ",1/stats[_STAT_STAT].best); + printf("%7.0f ",1/stats[_STAT_ACCESS].best); + printf("%7.0f ",1/stats[_STAT_CHMOD].best); + printf("%7.0f ",1/stats[_STAT_READDIR].best); +#ifndef Windows + printf("%7.0f ",1/stats[_STAT_LINK].best); + printf("%7.0f ",1/stats[_STAT_UNLINK].best); +#endif + printf("%7.0f ",1/stats[_STAT_DELETE].best); + printf("%10d ",x*x*x); + printf("\n"); + fflush(stdout); + } + if(worst) + { + printf("%c %4d %7.0f ",'W',x, 1/stats[_STAT_DIR_CREATE].worst); + printf("%7.0f ",1/stats[_STAT_DIR_TRAVERSE].worst); + printf("%7.0f ",1/stats[_STAT_DIR_DELETE].worst); + printf("%7.0f ",1/stats[_STAT_CREATE].worst); + printf("%7.0f ",1/stats[_STAT_OPEN].worst); + printf("%7.0f ",1/stats[_STAT_READ].worst); + printf("%7.0f ",1/stats[_STAT_WRITE].worst); + printf("%7.0f ",1/stats[_STAT_CLOSE].worst); + printf("%7.0f ",1/stats[_STAT_STAT].worst); + printf("%7.0f ",1/stats[_STAT_ACCESS].worst); + printf("%7.0f ",1/stats[_STAT_CHMOD].worst); + printf("%7.0f ",1/stats[_STAT_READDIR].worst); +#ifndef Windows + printf("%7.0f ",1/stats[_STAT_LINK].worst); + printf("%7.0f ",1/stats[_STAT_UNLINK].worst); +#endif + printf("%7.0f ",1/stats[_STAT_DELETE].worst); + printf("%10d ",x*x*x); + printf("\n"); + fflush(stdout); + } + } + } + return(0); +} + +void +dir_create(int x) +{ + int i,j,k; + int ret; + char buf[100]; + stats[_STAT_DIR_CREATE].best=(double)99999.9; + stats[_STAT_DIR_CREATE].worst=(double)0.00000000; + for(i=0;i stats[_STAT_DIR_CREATE].worst) + stats[_STAT_DIR_CREATE].worst=stats[_STAT_DIR_CREATE].speed; + junk=chdir(buf); + for(j=0;j stats[_STAT_DIR_CREATE].worst) + stats[_STAT_DIR_CREATE].worst=stats[_STAT_DIR_CREATE].speed; + junk=chdir(buf); + for(k=0;k stats[_STAT_DIR_CREATE].worst) + stats[_STAT_DIR_CREATE].worst=stats[_STAT_DIR_CREATE].speed; + junk=chdir(buf); + junk=chdir(".."); + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +dir_traverse(int x) +{ + int i,j,k; + char buf[100]; + double time1, time2; + stats[_STAT_DIR_TRAVERSE].best=(double)99999.9; + stats[_STAT_DIR_TRAVERSE].worst=(double)0.00000000; + for(i=0;i stats[_STAT_DIR_TRAVERSE].worst) + stats[_STAT_DIR_TRAVERSE].worst=stats[_STAT_DIR_TRAVERSE].speed; + } + stats[_STAT_DIR_TRAVERSE].starttime=time_so_far(); + junk=chdir(".."); + stats[_STAT_DIR_TRAVERSE].endtime=time_so_far(); + stats[_STAT_DIR_TRAVERSE].speed=time2+stats[_STAT_DIR_TRAVERSE].endtime-stats[_STAT_DIR_TRAVERSE].starttime; + if(stats[_STAT_DIR_TRAVERSE].speed < (double)0.0) + stats[_STAT_DIR_TRAVERSE].speed=(double) 0.0; + stats[_STAT_DIR_TRAVERSE].total_time+=stats[_STAT_DIR_TRAVERSE].speed; + stats[_STAT_DIR_TRAVERSE].counter++; + if(stats[_STAT_DIR_TRAVERSE].speed < stats[_STAT_DIR_TRAVERSE].best) + stats[_STAT_DIR_TRAVERSE].best=stats[_STAT_DIR_TRAVERSE].speed; + if(stats[_STAT_DIR_TRAVERSE].speed > stats[_STAT_DIR_TRAVERSE].worst) + stats[_STAT_DIR_TRAVERSE].worst=stats[_STAT_DIR_TRAVERSE].speed; + } + stats[_STAT_DIR_TRAVERSE].starttime=time_so_far(); + junk=chdir(".."); + stats[_STAT_DIR_TRAVERSE].endtime=time_so_far(); + stats[_STAT_DIR_TRAVERSE].speed=time1+stats[_STAT_DIR_TRAVERSE].endtime-stats[_STAT_DIR_TRAVERSE].starttime; + if(stats[_STAT_DIR_TRAVERSE].speed < (double)0.0) + stats[_STAT_DIR_TRAVERSE].speed=(double)0.0; + stats[_STAT_DIR_TRAVERSE].total_time+=stats[_STAT_DIR_TRAVERSE].speed; + stats[_STAT_DIR_TRAVERSE].counter++; + if(stats[_STAT_DIR_TRAVERSE].speed < stats[_STAT_DIR_TRAVERSE].best) + stats[_STAT_DIR_TRAVERSE].best=stats[_STAT_DIR_TRAVERSE].speed; + if(stats[_STAT_DIR_TRAVERSE].speed > stats[_STAT_DIR_TRAVERSE].worst) + stats[_STAT_DIR_TRAVERSE].worst=stats[_STAT_DIR_TRAVERSE].speed; + } +} + +void +file_create(int x) +{ + int i,j,k; + int fd; + int ret; + char buf[100]; + char value; + stats[_STAT_CREATE].best=(double)999999.9; + stats[_STAT_CREATE].worst=(double)0.0; + stats[_STAT_WRITE].best=(double)999999.9; + stats[_STAT_WRITE].worst=(double)0.0; + stats[_STAT_CLOSE].best=(double)999999.9; + stats[_STAT_CLOSE].worst=(double)0.0; + for(i=0;i stats[_STAT_CREATE].worst) + stats[_STAT_CREATE].worst=stats[_STAT_CREATE].speed; + + stats[_STAT_WRITE].starttime=time_so_far(); + junk=write(fd,mbuffer,sz); + stats[_STAT_WRITE].endtime=time_so_far(); + stats[_STAT_WRITE].counter++; + stats[_STAT_WRITE].speed=stats[_STAT_WRITE].endtime-stats[_STAT_WRITE].starttime; + if(stats[_STAT_WRITE].speed < (double)0.0) + stats[_STAT_WRITE].speed=(double)0.0; + stats[_STAT_WRITE].total_time+=stats[_STAT_WRITE].speed; + if(stats[_STAT_WRITE].speed < stats[_STAT_WRITE].best) + stats[_STAT_WRITE].best=stats[_STAT_WRITE].speed; + if(stats[_STAT_WRITE].speed > stats[_STAT_WRITE].worst) + stats[_STAT_WRITE].worst=stats[_STAT_WRITE].speed; + + fsync(fd); + stats[_STAT_CLOSE].starttime=time_so_far(); + close(fd); + stats[_STAT_CLOSE].endtime=time_so_far(); + stats[_STAT_CLOSE].speed=stats[_STAT_CLOSE].endtime-stats[_STAT_CLOSE].starttime; + if(stats[_STAT_CLOSE].speed < (double)0.0) + stats[_STAT_CLOSE].speed=(double)0.0; + stats[_STAT_CLOSE].total_time+=stats[_STAT_CLOSE].speed; + stats[_STAT_CLOSE].counter++; + if(stats[_STAT_CLOSE].speed < stats[_STAT_CLOSE].best) + stats[_STAT_CLOSE].best=stats[_STAT_CLOSE].speed; + if(stats[_STAT_CLOSE].speed > stats[_STAT_CLOSE].worst) + stats[_STAT_CLOSE].worst=stats[_STAT_CLOSE].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_stat(int x) +{ + int i,j,k,y; + char buf[100]; + struct stat mystat; + stats[_STAT_STAT].best=(double)99999.9; + stats[_STAT_STAT].worst=(double)0.00000000; + for(i=0;i stats[_STAT_STAT].worst) + stats[_STAT_STAT].worst=stats[_STAT_STAT].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_access(int x) +{ + int i,j,k,y; + char buf[100]; + stats[_STAT_ACCESS].best=(double)999999.9; + stats[_STAT_ACCESS].worst=(double)0.0; + for(i=0;i stats[_STAT_ACCESS].worst) + stats[_STAT_ACCESS].worst=stats[_STAT_ACCESS].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_chmod(int x) +{ + int i,j,k,y; + char buf[100]; + stats[_STAT_CHMOD].best=(double)999999.9; + stats[_STAT_CHMOD].worst=(double)0.0; + for(i=0;i stats[_STAT_CHMOD].worst) + stats[_STAT_CHMOD].worst=stats[_STAT_CHMOD].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_readdir(int x) +{ + int i,j,ret1; + char buf[100]; + DIR *dirbuf; + struct dirent *y; + stats[_STAT_READDIR].best=(double)999999.9; + stats[_STAT_READDIR].worst=(double)0.0; + for(i=0;i stats[_STAT_READDIR].worst) + stats[_STAT_READDIR].worst=stats[_STAT_READDIR].speed; + ret1=closedir(dirbuf); + if(ret1 < 0) + { + printf("closedir failed\n"); + exit(1); + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_link(int x) +{ + int i,j,k,y; + char buf[100]; + char bufn[100]; + stats[_STAT_LINK].best=(double)999999.9; + stats[_STAT_LINK].worst=(double)0.0; + for(i=0;i stats[_STAT_LINK].worst) + stats[_STAT_LINK].worst=stats[_STAT_LINK].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +file_unlink(int x) +{ + int i,j,k,y; + char buf[100]; + char bufn[100]; + stats[_STAT_UNLINK].best=(double)999999.9; + stats[_STAT_UNLINK].worst=(double)0.0; + for(i=0;i stats[_STAT_UNLINK].worst) + stats[_STAT_UNLINK].worst=stats[_STAT_UNLINK].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +void +dir_delete(int x) +{ + int i,j,k; + char buf[100]; + stats[_STAT_DIR_DELETE].best=(double)99999.9; + stats[_STAT_DIR_DELETE].worst=(double)0.00000000; + for(i=0;i stats[_STAT_DIR_DELETE].worst) + stats[_STAT_DIR_DELETE].worst=stats[_STAT_DIR_DELETE].speed; + } + junk=chdir(".."); + sprintf(buf,"fileop_L1_%d_L2_%d",i,j); + stats[_STAT_DIR_DELETE].starttime=time_so_far(); + rmdir(buf); + stats[_STAT_DIR_DELETE].endtime=time_so_far(); + stats[_STAT_DIR_DELETE].speed=stats[_STAT_DIR_DELETE].endtime-stats[_STAT_DIR_DELETE].starttime; + if(stats[_STAT_DIR_DELETE].speed < (double)0.0) + stats[_STAT_DIR_DELETE].speed=(double)0.0; + stats[_STAT_DIR_DELETE].total_time+=stats[_STAT_DIR_DELETE].speed; + stats[_STAT_DIR_DELETE].counter++; + if(stats[_STAT_DIR_DELETE].speed < stats[_STAT_DIR_DELETE].best) + stats[_STAT_DIR_DELETE].best=stats[_STAT_DIR_DELETE].speed; + if(stats[_STAT_DIR_DELETE].speed > stats[_STAT_DIR_DELETE].worst) + stats[_STAT_DIR_DELETE].worst=stats[_STAT_DIR_DELETE].speed; + } + junk=chdir(".."); + sprintf(buf,"fileop_L1_%d",i); + stats[_STAT_DIR_DELETE].starttime=time_so_far(); + rmdir(buf); + stats[_STAT_DIR_DELETE].endtime=time_so_far(); + stats[_STAT_DIR_DELETE].speed=stats[_STAT_DIR_DELETE].endtime-stats[_STAT_DIR_DELETE].starttime; + if(stats[_STAT_DIR_DELETE].speed < (double)0.0) + stats[_STAT_DIR_DELETE].speed=(double)0.0; + stats[_STAT_DIR_DELETE].total_time+=stats[_STAT_DIR_DELETE].speed; + stats[_STAT_DIR_DELETE].counter++; + if(stats[_STAT_DIR_DELETE].speed < stats[_STAT_DIR_DELETE].best) + stats[_STAT_DIR_DELETE].best=stats[_STAT_DIR_DELETE].speed; + if(stats[_STAT_DIR_DELETE].speed > stats[_STAT_DIR_DELETE].worst) + stats[_STAT_DIR_DELETE].worst=stats[_STAT_DIR_DELETE].speed; + } +} + +void +file_delete(int x) +{ + int i,j,k; + char buf[100]; + stats[_STAT_DELETE].best=(double)999999.9; + stats[_STAT_DELETE].worst=(double)0.0; + for(i=0;i stats[_STAT_DELETE].worst) + stats[_STAT_DELETE].worst=stats[_STAT_DELETE].speed; + } + junk=chdir(".."); + sprintf(buf,"fileop_L1_%d_L2_%d",i,j); + rmdir(buf); + } + junk=chdir(".."); + sprintf(buf,"fileop_L1_%d",i); + rmdir(buf); + } +} +void +file_read(int x) +{ + int i,j,k,y,fd; + char buf[100]; + char value; + stats[_STAT_READ].best=(double)99999.9; + stats[_STAT_READ].worst=(double)0.00000000; + stats[_STAT_OPEN].best=(double)99999.9; + stats[_STAT_OPEN].worst=(double)0.00000000; + for(i=0;i stats[_STAT_OPEN].worst) + stats[_STAT_OPEN].worst=stats[_STAT_OPEN].speed; + + stats[_STAT_READ].starttime=time_so_far(); + y=read(fd,mbuffer,sz); + if(y < 0) + { + printf("Read failed\n"); + exit(1); + } + if(validate(mbuffer,sz, value) !=0) + printf("Error: Data Mis-compare\n");; + stats[_STAT_READ].endtime=time_so_far(); + close(fd); + stats[_STAT_READ].speed=stats[_STAT_READ].endtime-stats[_STAT_READ].starttime; + if(stats[_STAT_READ].speed < (double)0.0) + stats[_STAT_READ].speed=(double)0.0; + stats[_STAT_READ].total_time+=stats[_STAT_READ].speed; + stats[_STAT_READ].counter++; + if(stats[_STAT_READ].speed < stats[_STAT_READ].best) + stats[_STAT_READ].best=stats[_STAT_READ].speed; + if(stats[_STAT_READ].speed > stats[_STAT_READ].worst) + stats[_STAT_READ].worst=stats[_STAT_READ].speed; + } + junk=chdir(".."); + } + junk=chdir(".."); + } +} + +/************************************************************************/ +/* Time measurement routines. Thanks to Iozone :-) */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +static double +time_so_far(void) +#else +static double +time_so_far() +#endif +{ +#ifdef Windows + LARGE_INTEGER freq,counter; + double wintime,bigcounter; + /* For Windows the time_of_day() is useless. It increments in 55 milli second */ + /* increments. By using the Win32api one can get access to the high performance */ + /* measurement interfaces. With this one can get back into the 8 to 9 */ + /* microsecond resolution. */ + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + bigcounter=(double)counter.HighPart *(double)0xffffffff + + (double)counter.LowPart; + wintime = (double)(bigcounter/(double)freq.LowPart); + return((double)wintime); +#else +#if defined (OSFV4) || defined(OSFV3) || defined(OSFV5) + struct timespec gp; + + if (getclock(TIMEOFDAY, (struct timespec *) &gp) == -1) + perror("getclock"); + return (( (double) (gp.tv_sec)) + + ( ((float)(gp.tv_nsec)) * 0.000000001 )); +#else + struct timeval tp; + + if (gettimeofday(&tp, (struct timezone *) NULL) == -1) + perror("gettimeofday"); + return ((double) (tp.tv_sec)) + + (((double) tp.tv_usec) * 0.000001 ); +#endif +#endif +} + +void +splash(void) +{ + printf("\n"); + printf(" --------------------------------------\n"); + printf(" | Fileop | \n"); + printf(" | %s | \n",version); + printf(" | | \n"); + printf(" | by |\n"); + printf(" | | \n"); + printf(" | Don Capps |\n"); + printf(" --------------------------------------\n"); + printf("\n"); +} + +void +usage(void) +{ + splash(); + printf(" fileop [-f X ]|[-l # -u #] [-s Y] [-e] [-b] [-w] [-d ] [-t] [-v] [-h]\n"); + printf("\n"); + printf(" -f # Force factor. X^3 files will be created and removed.\n"); + printf(" -l # Lower limit on the value of the Force factor.\n"); + printf(" -u # Upper limit on the value of the Force factor.\n"); + printf(" -s # Optional. Sets filesize for the create/write. May use suffix 'K' or 'M'.\n"); + printf(" -e Excel importable format.\n"); + printf(" -b Output best case results.\n"); + printf(" -i # Increment force factor by this increment.\n"); + printf(" -w Output worst case results.\n"); + printf(" -d Specify starting directory.\n"); + printf(" -U Mount point to remount between tests.\n"); + printf(" -t Verbose output option.\n"); + printf(" -v Version information.\n"); + printf(" -h Help text.\n"); + printf("\n"); + printf(" The structure of the file tree is:\n"); + printf(" X number of Level 1 directories, with X number of\n"); + printf(" level 2 directories, with X number of files in each\n"); + printf(" of the level 2 directories.\n"); + printf("\n"); + printf(" Example: fileop 2\n"); + printf("\n"); + printf(" dir_1 dir_2\n"); + printf(" / \\ / \\ \n"); + printf(" sdir_1 sdir_2 sdir_1 sdir_2\n"); + printf(" / \\ / \\ / \\ / \\ \n"); + printf(" file_1 file_2 file_1 file_2 file_1 file_2 file_1 file_2\n"); + printf("\n"); + printf(" Each file will be created, and then Y bytes is written to the file.\n"); + printf("\n"); +} +void +clear_stats() +{ + int i; + for(i=0;i<_NUM_STATS;i++) + bzero((char *)&stats[i],sizeof(struct stat_struct)); +} +int +validate(char *buffer, int size, char value) +{ + register int i; + register char *cp; + register int size1; + register char v1; + v1=value; + cp = buffer; + size1=size; + for(i=0;i " >> /dev/stderr ; + echo "filename is the output of iozone -a" >> /dev/stderr ; + echo "test is one of write rewrite read reread randread randwrite bkwdread recrewrite strideread fwrite frewrite fread freread" >> /dev/stderr ;; + esac } + +#filename=$1 +filename=iozone_gen_out +query=$2 +if (! [ -e $query ] ) ; then mkdir $query; fi +if ( [ $# -eq 2 ] ) ; + then + write_gnuplot_file > $query/`basename $file_name.gnuplot` + else + echo "Usage : gengnuplot.sh " 2>&1 + echo "filename is the output of iozone -a" 2>&1 + echo "test is one of write rewrite read reread randread randwrite bkwdread recrewrite strideread fwrite frewrite fread freread" 2>&1 +fi diff --git a/src/components/appio/tests/iozone/gnu3d.dem b/src/components/appio/tests/iozone/gnu3d.dem new file mode 100644 index 0000000..bcbf48f --- /dev/null +++ b/src/components/appio/tests/iozone/gnu3d.dem @@ -0,0 +1,146 @@ +# +# $Id: 3D plot of performance +# +# Processes files that were created by Generate_Graphs +# and displays the results. Also, saves a postscript copy. +# +# Don Capps + +dirs = "write rewrite read reread randread randwrite bkwdread recrewrite strideread fwrite frewrite fread freread" +titles = "Write ReWrite Read Reread Random_read Random_write Read_Backwards Record_rewrite Stride_read Fwrite Frewrite Fread Freread" + +file(n) = sprintf("%s/iozone_gen_out.gnuplot", word(dirs,n)) +outfile(n) = sprintf("%s/%s.ps", word(dirs,n), word(dirs,n)) +title(n) = word(titles,n) + +set title "Iozone performance" +set grid lt 2 lw 1 +set surface +set parametric +set xtics +set ytics +set logscale x 2 +set logscale y 2 +set xlabel "File size in 2^n KBytes" +set ylabel "Record size in 2^n Kbytes" +set zlabel "Kbytes/sec" +set style data lines +set dgrid3d 80,80,3 + +i = 1 +set terminal x11 +set output +splot file(i) title title(i) +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 2 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 3 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 4 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 5 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 6 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 7 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 8 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 9 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 10 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 11 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 12 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + +i = 13 +set terminal x11 +set output +replot +pause -1 "Hit return to continue" +set terminal postscript color +set output outfile(i) +replot + diff --git a/src/components/appio/tests/iozone/gnuplot.dem b/src/components/appio/tests/iozone/gnuplot.dem new file mode 100644 index 0000000..d1abdf5 --- /dev/null +++ b/src/components/appio/tests/iozone/gnuplot.dem @@ -0,0 +1,60 @@ +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "wol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system write latency " +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'wol.dat' using 1:2 title "Latency Plot" with lines +pause -1 "Hit return to continue" + +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rwol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system re-write latency " +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'rwol.dat' using 1:2 title "Latency Plot" with lines +pause -1 "Hit return to continue" + +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system read latency " +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'rol.dat' using 1:2 title "Latency Plot" with lines +pause -1 "Hit return to continue" + +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rrol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system re-read latency " +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'rrol.dat' using 1:2 title "Latency Plot" with lines +pause -1 "Hit return to continue" + diff --git a/src/components/appio/tests/iozone/gnuplotps.dem b/src/components/appio/tests/iozone/gnuplotps.dem new file mode 100644 index 0000000..39e1c71 --- /dev/null +++ b/src/components/appio/tests/iozone/gnuplotps.dem @@ -0,0 +1,63 @@ +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "wol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system write latency " +set terminal postscript +set output "gnu_wol.ps" +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'wol.dat' using 1:2 title "Latency Plot" with lines +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rwol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system re-write latency " +set terminal postscript +set output "gnu_rwol.ps" +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'rwol.dat' using 1:2 title "Latency Plot" with lines + +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system read latency " +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +set terminal postscript +set output "gnu_rol.ps" +plot 'rol.dat' using 1:2 title "Latency Plot" with lines + +# +# $Id: Plot of latency versus offset in a file +# +# Requires data file "rrol.dat" from this directory, +# so change current working directory to this directory before running. +# + +set title "File system re-read latency " +set terminal postscript +set output "gnu_rrol.ps" +set autoscale x +set xtics +set xlabel "Offset in file (KB)" +set ylabel "Latency in Microseconds" +plot 'rrol.dat' using 1:2 title "Latency Plot" with lines + diff --git a/src/components/appio/tests/iozone/iozone.c b/src/components/appio/tests/iozone/iozone.c new file mode 100644 index 0000000..7708cd7 --- /dev/null +++ b/src/components/appio/tests/iozone/iozone.c @@ -0,0 +1,25297 @@ +/************************************************************************/ +/* Original Author: */ +/* William Norcott (wnorcott@us.oracle.com) */ +/* 4 Dunlap Drive */ +/* Nashua, NH 03060 */ +/* */ +/************************************************************************/ +/* Enhancements by: */ +/* Don Capps (capps@iozone.org) */ +/* 7417 Crenshaw */ +/* Plano, TX 75025 */ +/* */ +/************************************************************************/ +/* Copyright 1991, 1992, 1994, 1998, 2000, 2001 William D. Norcott */ +/************************************************************************/ +/* */ +/* Iozone is based on the original work done by William Norrcot. It has */ +/* been enhanced so that it provides a more complete filesystem */ +/* characterization. */ +/* Its purpose is to provide automated filesystem characterization. */ +/* Enhancements have been made by: */ +/* */ +/* Don Capps capps@iozone.org */ +/* */ +/* Iozone can perform single stream and multi stream I/O */ +/* also it now performs read, write, re-read, re-write, */ +/* read backwards, read/write random, re-read record, */ +/* pread, re-pread, re-pwrite, preadv, re-preadv, pwritev, */ +/* stride read, and re-pwritev,mmap, POSIX async I/O, NFS */ +/* cluster testing and much more. */ +/* */ +/* The frontend now uses getopt() and the user can control many more */ +/* of the actions. */ +/* */ +/* */ +/************************************************************************/ +/* THIS SOFTWARE IS PROVIDED BY DON CAPPS AND THE IOZONE CREW "AS IS */ +/* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ +/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A */ +/* PARTICULAR PURPOSE ARE DISCLAIMED. */ +/* */ +/* IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY */ +/* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL */ +/* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ +/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER */ +/* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */ +/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE. */ +/************************************************************************/ + +/************************************************************************/ +/* For the beginner... */ +/* */ +/* 1. make linux (linux, hpux, convex, hpux_no_ansi) */ +/* 2. type ./iozone -Ra */ +/* */ +/* Hint: Type make (it will give you a list of valid targets) */ +/* */ +/************************************************************************/ + + +/* The version number */ +#define THISVERSION " Version $Revision$" + +#if defined(linux) + #define _GNU_SOURCE +#endif +/* Include for Cygnus development environment for Windows */ +#if defined (Windows) +#include +#include +#else +#if defined(linux) || defined(solaris) || defined(macosx) || defined(__AIX__) || defined(FreeBSD) || defined(_HPUX_SOURCE) +#include +#else +extern int errno; /* imported for errors */ +extern int h_errno; /* imported for errors */ +#endif +#endif + + +#include +#include +#if defined (__LP64__) || defined(OSF_64) || defined(__alpha__) || defined(__arch64__) || defined(_LP64) || defined(__s390x__) || defined(__AMD64__) +#define MODE "\tCompiled for 64 bit mode." +#define _64BIT_ARCH_ +#else +#define MODE "\tCompiled for 32 bit mode." +#endif + +#ifndef NO_THREADS +#include +#endif + +#if defined(HAVE_ANSIC_C) && defined(linux) +#include +#include +#endif + +#ifdef HAVE_PROTO +#include "proto.h" +#else +int atoi(); +int close(); +int unlink(); +int main(); +void record_command_line(); +#if !defined(linux) +int wait(); +#endif +int fsync(); +void srand48(); +long lrand48(); +void create_list(); +void Poll(); +void print_header(); +void Kill(); +long long l_min(); +long long l_max(); +long long mythread_create(); +int gen_new_buf(); +void touch_dedup(); +void init_by_array64(unsigned long long *, unsigned long long ); +unsigned long long genrand64_int64(void); +#endif + +#include + +char *help[] = { +" Usage: iozone [-s filesize_Kb] [-r record_size_Kb] [-f [path]filename] [-h]", +" [-i test] [-E] [-p] [-a] [-A] [-z] [-Z] [-m] [-M] [-t children]", +" [-l min_number_procs] [-u max_number_procs] [-v] [-R] [-x] [-o]", +" [-d microseconds] [-F path1 path2...] [-V pattern] [-j stride]", +" [-T] [-C] [-B] [-D] [-G] [-I] [-H depth] [-k depth] [-U mount_point]", +" [-S cache_size] [-O] [-L cacheline_size] [-K] [-g maxfilesize_Kb]", +" [-n minfilesize_Kb] [-N] [-Q] [-P start_cpu] [-e] [-c] [-b Excel.xls]", +" [-J milliseconds] [-X write_telemetry_filename] [-w] [-W]", +" [-Y read_telemetry_filename] [-y minrecsize_Kb] [-q maxrecsize_Kb]", +" [-+u] [-+m cluster_filename] [-+d] [-+x multiplier] [-+p # ]", +" [-+r] [-+t] [-+X] [-+Z] [-+w percent dedupable] [-+y percent_interior_dedup]", +" [-+C percent_dedup_within]", +" ", +" -a Auto mode", +" -A Auto2 mode", +" -b Filename Create Excel worksheet file", +" -B Use mmap() files", +" -c Include close in the timing calculations", +" -C Show bytes transferred by each child in throughput testing", +" -d # Microsecond delay out of barrier", +" -D Use msync(MS_ASYNC) on mmap files", +" -e Include flush (fsync,fflush) in the timing calculations", +" -E Run extension tests", +" -f filename to use", +" -F filenames for each process/thread in throughput test", +" -g # Set maximum file size (in Kbytes) for auto mode (or #m or #g)", +" -G Use msync(MS_SYNC) on mmap files", +" -h help", +" -H # Use POSIX async I/O with # async operations", +" -i # Test to run (0=write/rewrite, 1=read/re-read, 2=random-read/write", +" 3=Read-backwards, 4=Re-write-record, 5=stride-read, 6=fwrite/re-fwrite", +" 7=fread/Re-fread, 8=random_mix, 9=pwrite/Re-pwrite, 10=pread/Re-pread", +" 11=pwritev/Re-pwritev, 12=preadv/Re-preadv)", +" -I Use VxFS VX_DIRECT, O_DIRECT,or O_DIRECTIO for all file operations", +" -j # Set stride of file accesses to (# * record size)", +" -J # milliseconds of compute cycle before each I/O operation", +" -k # Use POSIX async I/O (no bcopy) with # async operations", +" -K Create jitter in the access pattern for readers", +" -l # Lower limit on number of processes to run", +" -L # Set processor cache line size to value (in bytes)", +" -m Use multiple buffers", +" -M Report uname -a output", +" -n # Set minimum file size (in Kbytes) for auto mode (or #m or #g)", +" -N Report results in microseconds per operation", +" -o Writes are synch (O_SYNC)", +" -O Give results in ops/sec.", +" -p Purge on", +" -P # Bind processes/threads to processors, starting with this cpu", +" -q # Set maximum record size (in Kbytes) for auto mode (or #m or #g)", +" -Q Create offset/latency files", +" -r # record size in Kb", +" or -r #k .. size in Kb", +" or -r #m .. size in Mb", +" or -r #g .. size in Gb", +" -R Generate Excel report", +" -s # file size in Kb", +" or -s #k .. size in Kb", +" or -s #m .. size in Mb", +" or -s #g .. size in Gb", +" -S # Set processor cache size to value (in Kbytes)", +" -t # Number of threads or processes to use in throughput test", +" -T Use POSIX pthreads for throughput tests", +" -u # Upper limit on number of processes to run", +" -U Mount point to remount between tests", +" -v version information", +" -V # Verify data pattern write/read", +" -w Do not unlink temporary file", +" -W Lock file when reading or writing", +" -x Turn off stone-walling", +" -X filename Write telemetry file. Contains lines with (offset reclen compute_time) in ascii", +" -y # Set minimum record size (in Kbytes) for auto mode (or #m or #g)", +" -Y filename Read telemetry file. Contains lines with (offset reclen compute_time) in ascii", +" -z Used in conjunction with -a to test all possible record sizes", +" -Z Enable mixing of mmap I/O and file I/O", +" -+E Use existing non-Iozone file for read-only testing", +" -+K Sony special. Manual control of test 8.", +" -+m Cluster_filename Enable Cluster testing", +" -+d File I/O diagnostic mode. (To troubleshoot a broken file I/O subsystem)", +" -+u Enable CPU utilization output (Experimental)", +" -+x # Multiplier to use for incrementing file and record sizes", +" -+p # Percentage of mix to be reads", +" -+r Enable O_RSYNC|O_SYNC for all testing.", +" -+t Enable network performance test. Requires -+m ", +" -+n No retests selected.", +" -+k Use constant aggregate data set size.", +" -+q Delay in seconds between tests.", +" -+l Enable record locking mode.", +" -+L Enable record locking mode, with shared file.", +" -+B Sequential mixed workload.", +#if defined(O_DSYNC) +" -+D Enable O_DSYNC mode.", +#endif +#ifndef NO_MADVISE +" -+A # Enable madvise. 0 = normal, 1=random, 2=sequential", +" 3=dontneed, 4=willneed", +#endif +" -+N Do not truncate existing files on sequential writes.", +" -+S # Dedup-able data is limited to sharing within each numerically", +" identified file set", +" -+V Enable shared file. No locking.", +#if defined(Windows) +" -+U Windows Unbufferd I/O API (Very Experimental)", +#endif +" -+X Enable short circuit mode for filesystem testing ONLY", +" ALL Results are NOT valid in this mode.", +" -+Z Enable old data set compatibility mode. WARNING.. Published", +" hacks may invalidate these results and generate bogus, high", +" values for results.", +" -+w ## Percent of dedup-able data in buffers.", +" -+y ## Percent of dedup-able within & across files in buffers.", +" -+C ## Percent of dedup-able within & not across files in buffers.", +" -+H Hostname Hostname of the PIT server.", +" -+P Service Service of the PIT server.", +" -+z Enable latency histogram logging.", +"" }; + +char *head1[] = { + " 'Iozone' Filesystem Benchmark Program", + " ", +THISVERSION, + MODE, + " ", + " Original Author: William Norcott (wnorcott@us.oracle.com)", + " 4 Dunlap Drive", + " Nashua, NH 03060", + " ", + " Enhancements: Don Capps (capps@iozone.org)", + " 7417 Crenshaw", + " Plano, TX 75025", + " ", + " Copyright 1991, 1992, 1994, 1998, 1999, 2002 William D. Norcott", + " ", + " License to freely use and distribute this software is hereby granted ", + " by the author, subject to the condition that this copyright notice ", + " remains intact. The author retains the exclusive right to publish ", + " derivative works based on this work, including, but not limited to, ", + " revised versions of this work", + " ", + " Other contributors:", + " ", + " Don Capps (Network Appliance) capps@iozone.org", + " ", + ""}; + +/****************************************************************** + + INCLUDE FILES (system-dependent) + +******************************************************************/ +#include +#include +#include +#include + +#include +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__APPLE__) && !defined(__DragonFly__) +#include +#endif +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__DragonFly__) +#include +#include +#endif + +#if defined (__FreeBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__APPLE__) || defined(__DragonFly__) +#ifndef O_SYNC +#define O_SYNC O_FSYNC +#endif +#endif + +#if defined (__FreeBSD__) +#ifndef O_RSYNC +#define O_RSYNC O_FSYNC +#endif +#endif + +#if ((defined(solaris) && defined(__LP64__)) || defined(__s390x__)) +/* If we are building for 64-bit Solaris, all functions that return pointers + * must be declared before they are used; otherwise the compiler will assume + * that they return ints and the top 32 bits of the pointer will be lost, + * causing segmentation faults. The following includes take care of this. + * It should be safe to add these for all other OSs too, but we're only + * doing it for Solaris now in case another OS turns out to be a special case. + */ +#include +#include +#include +#include +#include + +#endif +#if ( defined(solaris) && defined(studio11) ) +#include +#include +#endif + +#if defined(OSFV5) || defined(linux) +#include +#endif + +#if defined(linux) +#include +#include +#include +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED -1 +#endif + +#ifdef generic +typedef long long off64_t; +#endif + +#if defined(__DragonFly__) +#define __off64_t_defined +typedef off_t off64_t; +#endif + + +#ifndef solaris +#ifndef off64_t +#ifndef _OFF64_T +#ifndef __AIX__ +#ifndef __off64_t_defined +#ifndef SCO_Unixware_gcc +#ifndef UWIN +#ifndef __DragonFly__ +typedef long long off64_t; +#endif +#endif +#endif +#endif +#endif +#endif +#endif +#endif + +#ifdef __AIX__ +#include +#endif + +#ifdef VXFS +#include +#endif + +#ifdef unix +#if defined (__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) \ + || defined(_SUA_) +#include +#endif +#include +#include +#include +#ifndef NULL +#define NULL 0 +#endif + +#ifndef nolimits +#include +#endif +#endif + +#ifdef HAVE_ANSIC_C +#define VOLATILE volatile +#else +#define VOLATILE +#endif + +#include + +#ifdef SHARED_MEM +#include +#endif + +#if defined(bsd4_2) && !defined(MS_SYNC) +#define MS_SYNC 0 +#define MS_ASYNC 0 +#endif + +#if defined(bsd4_4) || defined(__DragonFly__) +#define MAP_ANONYMOUS MAP_ANON +#endif + +#if defined(SCO_Unixware_gcc) || defined(solaris) || defined(UWIN) || defined(SCO) +#define MAP_FILE (0) +#endif + +#if defined(IRIX) || defined(IRIX64) || defined(Windows) || defined(bsd4_2) || defined(bsd4_4) || defined(SCO) || defined(Solaris) || defined(SCO_Unixware_gcc) +long long page_size = 4096; +#define GOT_PAGESIZE 1 +#elif defined(NBPG) +long long page_size = NBPG; +#define GOT_PAGESIZE 1 +#elif defined(old_linux) +#include +long long page_size = PAGE_SIZE; +#define GOT_PAGESIZE 1 +#elif !defined(GOT_PAGESIZE) +long long page_size = 4096; /* Used when all else fails */ +#endif + +#ifdef HAVE_PREAD +#ifdef HAVE_PREADV +#define PVECMAX 16 + +#ifdef _HPUX_SOURCE +#define PER_VECTOR_OFFSET +#include +struct piovec piov[PVECMAX]; +#else +#include +struct iovec piov[PVECMAX]; +#define piov_base iov_base +#define piov_len iov_len +#endif + +#endif +#endif + +#define DEDUPSEED 0x2719362 + + +/* + * In multi thread/process throughput mode each child keeps track of + * statistics and communicates them through various flavors of + * shared memory, and via messages. + */ +struct child_stats { + long long flag; /* control space */ + long long flag1; /* pad */ + float walltime; /* child elapsed time */ + float cputime; /* child CPU time */ + float throughput; /* Throughput in either kb/sec or ops/sec */ + float actual; /* Either actual kb read or # of ops performed */ +} VOLATILE *child_stat; + +/* + * Used for cpu time statistics. + */ +struct runtime { + float walltime; + float cputime; + float cpuutil; +}; + +#ifdef __convex_spp +#include +#endif + +#include +#include +#include + + +/* + * Messages the controlling process sends to children. + * Internal representation that is arch specific. + * This is used when using the network distributed mode. + */ +struct client_command { + char c_host_name[100]; + char c_pit_hostname[40]; + char c_pit_service[8]; + char c_client_name[100]; + char c_working_dir[200]; + char c_file_name[200]; + char c_path_dir[200]; + char c_execute_name[200]; + char c_write_traj_filename[200]; + char c_read_traj_filename[200]; + int c_oflag; + int c_mfflag; + int c_unbuffered; + int c_noretest; + int c_notruncate; + int c_read_sync; + int c_jflag; + int c_async_flag; + int c_k_flag; + int c_h_flag; + int c_mflag; + int c_pflag; + int c_stride_flag; + int c_verify; + int c_sverify; + int c_odsync; + int c_diag_v; + int c_dedup; + int c_dedup_interior; + int c_dedup_compress; + int c_dedup_mseed; + int c_hist_summary; + int c_op_rate; + int c_op_rate_flag; + int c_Q_flag; + int c_L_flag; + int c_OPS_flag; + int c_mmapflag; + int c_mmapasflag; + int c_mmapnsflag; + int c_mmapssflag; + int c_no_copy_flag; + int c_include_close; + int c_include_flush; + int c_disrupt_flag; + int c_compute_flag; + int c_xflag; + int c_MS_flag; + int c_mmap_mix; + int c_Kplus_flag; + int c_stop_flag; + int c_w_traj_flag; + int c_r_traj_flag; + int c_direct_flag; + int c_cpuutilflag; + int c_seq_mix; + int c_client_number; + int c_command; + int c_testnum; + int c_no_unlink; + int c_no_write; + int c_file_lock; + int c_rec_lock; + int c_Kplus_readers; + int c_multiplier; + int c_share_file; + int c_pattern; + int c_version; + int c_base_time; + int c_num_child; + int c_pct_read; + int c_advise_op; + int c_advise_flag; + int c_restf; + int c_mygen; + long long c_stride; + long long c_rest_val; + long long c_delay; + long long c_purge; + long long c_fetchon; + long long c_numrecs64; + long long c_reclen; + long long c_child_flag; + long long c_delay_start; + long long c_depth; + float c_compute_time; +}; + +/* + * All data in this is in string format for portability in a + * hetrogeneous environment. + * + * Messages that the master will send to the clients + * over the socket. This provides neutral format + * so that heterogeneous clusters will work. + * This is used when using the network distributed mode. + * WARNING !!! This data structure MUST not be bigger + * than 1448 bytes or fragmentation will kick your butt. + */ +struct client_neutral_command { + char c_host_name[100]; + char c_pit_hostname[40]; + char c_pit_service[8]; + char c_client_name[100]; + char c_working_dir[200]; + char c_file_name[200]; + char c_path_dir[200]; + char c_execute_name[200]; + char c_write_traj_filename[200]; + char c_read_traj_filename[200]; + char c_oflag[2]; + char c_mfflag[2]; + char c_unbuffered[2]; + char c_noretest[2]; + char c_notruncate[2]; + char c_read_sync[2]; + char c_jflag[2]; + char c_async_flag[2]; + char c_k_flag[2]; + char c_h_flag[2]; + char c_mflag[2]; + char c_pflag[2]; + char c_stride_flag[2]; + char c_verify[2]; + char c_sverify[2]; + char c_odsync[2]; + char c_diag_v[2]; + char c_dedup[4]; + char c_dedup_interior[4]; + char c_dedup_compress[4]; + char c_dedup_mseed[4]; + char c_hist_summary[4]; + char c_op_rate[4]; + char c_op_rate_flag[2]; + char c_Q_flag[2]; + char c_L_flag[2]; + char c_OPS_flag[2]; + char c_mmapflag[2]; + char c_mmapasflag[2]; + char c_mmapnsflag[2]; + char c_mmapssflag[2]; + char c_no_copy_flag[2]; + char c_include_close[2]; + char c_include_flush[2]; + char c_disrupt_flag[2]; + char c_compute_flag[2]; + char c_stop_flag[2]; + char c_xflag[2]; + char c_MS_flag[2]; + char c_mmap_mix[2]; + char c_Kplus_flag[2]; + char c_w_traj_flag[2]; /* small int */ + char c_r_traj_flag[2]; /* small int */ + char c_direct_flag[2]; /* small int */ + char c_cpuutilflag[2]; /* small int */ + char c_seq_mix[2]; /* small int */ + char c_stride[10]; /* small long long */ + char c_rest_val[10]; /* small long long */ + char c_purge[10]; /* very small long long */ + char c_fetchon[10]; /* very small long long */ + char c_multiplier[10]; /* small int */ + char c_share_file[10]; /* small int */ + char c_file_lock[10]; /* small int */ + char c_rec_lock[10]; /* small int */ + char c_Kplus_readers[10]; /* small int */ + char c_client_number[20]; /* int */ + char c_command[20]; /* int */ + char c_testnum[20]; /* int */ + char c_no_unlink[4]; /* int */ + char c_no_write[4]; /* int */ + char c_pattern[20]; /* int */ + char c_version[20]; /* int */ + char c_base_time[20]; /* int */ + char c_num_child[20]; /* int */ + char c_pct_read[6]; /* small int */ + char c_advise_op[4]; /* small int */ + char c_advise_flag[4]; /* small int */ + char c_restf[4]; /* small int */ + char c_mygen[20]; /* long */ + char c_depth[20]; /* small long long */ + char c_child_flag[40]; /* small long long */ + char c_delay[80]; /* long long */ + char c_numrecs64[80]; /* long long */ + char c_reclen[80]; /* long long */ + char c_delay_start[80]; /* long long */ + char c_compute_time[80]; /* float */ +}; + +/* + * Messages the clients will send to the master. + * Internal representation on each client and the master. + * This is used when using the network distributed mode. + */ +struct master_command { + char m_host_name[100]; + char m_client_name[100]; + char m_stop_flag; + int m_client_number; + int m_client_error; + int m_child_port; + int m_child_async_port; + int m_command; + int m_testnum; + int m_version; + int m_mygen; + float m_throughput; + float m_cputime; + float m_walltime; + float m_actual; + long long m_child_flag; +}; + +/* + * Messages that the clients will send to the master + * over the socket. This provides neutral format + * so that heterogeneous clusters will work. + * This is used when using the network distributed mode. + */ +struct master_neutral_command { + char m_host_name[100]; + char m_client_name[100]; + char m_client_number[20]; /* int */ + char m_client_error[20]; /* int */ + char m_stop_flag[4]; /* char +space */ + char m_child_port[20]; /* int */ + char m_child_async_port[20]; /* int */ + char m_command[20]; /* int */ + char m_testnum[20]; /* int */ + char m_version[20]; /* int */ + char m_mygen[20]; /* int */ + char m_throughput[80]; /* float */ + char m_cputime[80]; /* float */ + char m_walltime[80]; /* float */ + char m_actual[80]; /* float */ + char m_child_flag[80]; /* long long */ +}; + + +/* + * Possible values for the commands sent to the master + */ +#define R_CHILD_JOIN 1 +#define R_STAT_DATA 2 +#define R_FLAG_DATA 3 + +/* + * Possible values for the master's commands sent to a client + * + * The R_FLAG_DATA is also used by the master to tell the + * client to update its flags. + */ +#define R_JOIN_ACK 4 +#define R_STOP_FLAG 5 +#define R_TERMINATE 6 +#define R_DEATH 7 + + +/* These are the defaults for the processor. They can be + * over written by the command line options. + */ +#define CACHE_LINE_SIZE 32 +#define CACHE_SIZE ( 1024 * 1024 ) + + +#define MEG (1024 * 1024) + +/* + * For stride testing use a prime number to avoid stripe + * wrap hitting the same spindle. + */ +#define STRIDE 17 + + + +/************************************************************************/ +/* */ +/* DEFINED CONSTANTS */ +/* */ +/* Never add a comment to the end of a #define. Some compilers will */ +/* choke and fail the compile. */ +/************************************************************************/ + +/* + * Size of buffer for capturing the machine's name. + */ +#define IBUFSIZE 100 +/* + * How many I/Os before a non-uniform access. + */ +#define DISRUPT 100 + +/* + * Set the crossover size. This is where the small transfers + * are skipped to save time. There is an option to + * disable the skipping. + */ +#define LARGE_REC 65536 + +/* Default number of kilobytes in file */ +#define KILOBYTES 512 + +/* Default number of bytes in a record */ +#define RECLEN 1024 + +/* Default size of file in bytes*/ +#define FILESIZE (KILOBYTES*1024) + +/* Default number of records */ +#define NUMRECS FILESIZE/RECLEN + +#ifdef __bsdi__ +/* At 8 Meg switch to large records */ +#define CROSSOVER (8*1024) +/*maximum buffer size*/ +#define MAXBUFFERSIZE (8*1024*1024) +#else +/* At 16 Meg switch to large records */ +#define CROSSOVER (16*1024) +/* Maximum buffer size*/ +#define MAXBUFFERSIZE (16*1024*1024) +#endif + +/* Maximum number of children. Threads/procs/clients */ +#define MAXSTREAMS 256 + +/* Minimum buffer size */ +#define MINBUFFERSIZE 128 +/* If things ran way too fast */ +#define TOOFAST 10 +/* Set the maximum number of types of tests */ +#define MAXTESTS 12 +/* Default fill pattern for verification */ +#define PATTERN get_pattern(); +#define PATTERN1 0xBB +/* Used for Excel internal tables */ +#define MAX_X 100 +/* Used for Excel internal tables */ +#define MAX_Y 512 + +#define USAGE "\tUsage: For usage information type iozone -h \n\n" + + +/* Maximum number of characters in filename */ +#define MAXNAMESIZE 1000 + +/* + * Define the typical output that the user will see on their + * screen. + */ +#ifdef NO_PRINT_LLD +#ifdef HAVE_PREAD +#include +#if defined(HAVE_PREAD) && defined(HAVE_PREADV) +#define CONTROL_STRING1 "%16ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%9ld%9ld%8ld%10ld%9ld%10ld%9ld%10ld%10ld%9ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s%8s%9s%7s%10s%10s%10s%9s%9s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#else +#define CONTROL_STRING1 "%16ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%9ld%9ld%8ld%10ld%9ld%10ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s%8s%9s%7s%10s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#endif +#else +#define CONTROL_STRING1 "%16ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%9ld%9ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#endif +#endif + +#ifndef NO_PRINT_LLD +#ifdef HAVE_PREAD +#include +#if defined(HAVE_PREAD) && defined(HAVE_PREADV) +#define CONTROL_STRING1 "%16lld%8ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%9ld%9ld%8ld%10ld%9ld%10ld%9ld%10ld%10ld%9ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s%8s%9s%7s%10s%10s%10s%9s%9s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#else +#define CONTROL_STRING1 "%16lld%8ld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%9ld%9ld%8ld%10ld%9ld%10ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s%8s%9s%7s%10s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#endif +#else +#define CONTROL_STRING1 "%16lld%8ld%8ld%8ld%8ld%8ld%8ld%8ld %8ld %8ld%8ld%8ld%8ld%9ld%9ld\n" +#define CONTROL_STRING2 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING3 "%16s%8s%8s%8s%8s%10s%8s%8s%8s %8s %8s%9s%9s%8s%9s\n" +#define CONTROL_STRING4 "%16s%8s%8s%8s%8s%10s\n" +#endif +#endif + +/* + For 'auto mode', these defines determine the number of iterations + to perform for both the file size and the record length. +*/ + +/* Start with 64 kbyte minimum file size by default */ +#define KILOBYTES_START 64 +/* Default maximum file size. This is 512 Mbytes */ +#define KILOBYTES_END (1024*512) +/* Default starting record size */ +#define RECLEN_START 4096 +/* Default maximum record size */ +#define RECLEN_END (MAXBUFFERSIZE) +/* Multiplier for each itteration on file and record size */ +#define MULTIPLIER 2 + +/* + * Assign numeric values to each of the tests. + */ +#define WRITER_TEST 0 +#define READER_TEST 1 +#define RANDOM_RW_TEST 2 +#define REVERSE_TEST 3 +#define REWRITE_REC_TEST 4 +#define STRIDE_READ_TEST 5 +#define FWRITER_TEST 6 +#define FREADER_TEST 7 +#define RANDOM_MIX_TEST 8 +#ifdef HAVE_PREAD +#define PWRITER_TEST 9 +#define PREADER_TEST 10 +#endif /* HAVE_PREAD */ +#ifdef HAVE_PREADV +#define PWRITEV_TEST 11 +#define PREADV_TEST 12 +#endif /* HAVE_PREADV */ + +#define WRITER_MASK (1 << WRITER_TEST) +#define READER_MASK (1 << READER_TEST) +#define RANDOM_RW_MASK (1 << RANDOM_RW_TEST) +#define RANDOM_MIX_MASK (1 << RANDOM_MIX_TEST) +#define REVERSE_MASK (1 << REVERSE_TEST) +#define REWRITE_REC_MASK (1 << REWRITE_REC_TEST) +#define STRIDE_READ_MASK (1 << STRIDE_READ_TEST) +#define FWRITER_MASK (1 << FWRITER_TEST) +#define FREADER_MASK (1 << FREADER_TEST) +#ifdef HAVE_PREAD +#define PWRITER_MASK (1 << PWRITER_TEST) +#define PREADER_MASK (1 << PREADER_TEST) +#endif /* HAVE_PREAD */ +#ifdef HAVE_PREADV +#define PWRITEV_MASK (1 << PWRITEV_TEST) +#define PREADV_MASK (1 << PREADV_TEST) +#endif /* HAVE_PREADV */ + +/* + * child_stat->flag values and transitions + */ +/* Parent initializes children to HOLD */ +#define CHILD_STATE_HOLD 0 +/* Child tells master when it's READY */ +#define CHILD_STATE_READY 1 +/* Parent tells child to BEGIN */ +#define CHILD_STATE_BEGIN 2 +/* Child tells parent that it's DONE */ +#define CHILD_STATE_DONE 3 + +#define MERSENNE + +/******************************************************************/ +/* */ +/* FUNCTION DECLARATIONS */ +/* */ +/******************************************************************/ +char *initfile(); +/*int pit_gettimeofday( struct timeval *, struct timezone *, char *, char *);*/ +int pit_gettimeofday( ); +static int openSckt( const char *, const char *, unsigned int ); +static void pit( int, struct timeval *); +void mmap_end(); +void alloc_pbuf(); +void auto_test(); /* perform automatic test series */ +void show_help(); /* show development help */ +static double time_so_far(); /* time since start of program */ +#ifdef unix +static double utime_so_far(); /* user time */ +static double stime_so_far(); /* system time */ +static double clk_tck(); /* Get clocks/tick */ +static double cputime_so_far(); +#else +#define cputime_so_far() time_so_far() +#endif +static double time_so_far1(); /* time since start of program */ +void get_resolution(); +void get_rusage_resolution(); +void signal_handler(); /* clean up if user interrupts us */ +void begin(); /* The main worker in the app */ +void fetchit(); /* Prime on chip cache */ +void purgeit(); /* Purge on chip cache */ +void throughput_test(); /* Multi process throughput */ +void multi_throughput_test(); /* Multi process throughput */ +void prepage(); /* Pre-fault user buffer */ +void get_date(); +int get_pattern(); /* Set pattern based on version */ +#ifdef HAVE_ANSIC_C +float do_compute(float); /* compute cycle simulation */ +#else +float do_compute(); /* compute cycle simulation */ +#endif +void write_perf_test(); /* write/rewrite test */ +void fwrite_perf_test(); /* fwrite/refwrite test */ +void fread_perf_test(); /* fread/refread test */ +void read_perf_test(); /* read/reread test */ +void mix_perf_test(); /* read/reread test */ +void random_perf_test(); /* random read/write test */ +void reverse_perf_test(); /* reverse read test */ +void rewriterec_perf_test(); /* rewrite record test */ +void read_stride_perf_test(); /* read with stride test */ +#ifdef HAVE_PREAD +void pread_perf_test(); /* pread/re-pread test */ +void pwrite_perf_test(); /* pwrite/re-pwrite test */ +#endif /* HAVE_PREAD */ +#ifdef HAVE_PREADV +void preadv_perf_test(); /* preadv/re-preadv test */ +void pwritev_perf_test(); /* pwritev/re-pwritev test */ +#endif /* HAVE_PREADV */ +void store_dvalue(); /* Store doubles array */ +void dump_excel(); +void dump_throughput(); +int sp_start_child_send(); +int sp_start_master_listen(); +#ifdef HAVE_ANSIC_C +#if defined (HAVE_PREAD) && defined(_LARGEFILE64_SOURCE) +ssize_t pwrite64(); +ssize_t pread64(); +#endif +#if !defined(linux) +char *getenv(); +char *inet_ntoa(); +int system(); +#endif +void my_nap(); +void my_unap(); +int thread_exit(); +#ifdef ASYNC_IO +size_t async_write(); +void async_release(); +int async_read(); +int async_read_no_copy(); +size_t async_write_no_copy(); +void end_async(); +void async_init(); +#else +size_t async_write(); +size_t async_write_no_copy(); +void async_release(); +#endif +void do_float(); +int create_xls(); +void close_xls(); +void do_label(); +int mylockf(int, int, int); +int mylockr(int,int, int, off64_t, off64_t); +int rand(void); +void srand(unsigned int); +int get_client_info(void); +void exit(int); +void find_remote_shell(char *); +void find_external_mon(char *,char *); +void start_monitor(char *); +void stop_monitor(char *); +void takeoff_cache(); +void del_cache(); +void fill_area(long long *, long long *, long long); +void fill_buffer(char *,long long ,long long ,char, long long ); +void store_value(off64_t); +void store_times(double, double); +static double cpu_util(double, double); +void dump_cputimes(void); +void purge_buffer_cache(void); +char *alloc_mem(long long,int); +void *(thread_rwrite_test)(void *); +void *(thread_write_test)(void *); +void *(thread_fwrite_test)(void *); +void *(thread_fread_test)(void *); +void *(thread_read_test)(void*); +#ifdef HAVE_PREAD +void *(thread_pread_test)(void*); +void *(thread_pwrite_test)(void*); +#endif +void *(thread_cleanup_test)(void*); +void *(thread_cleanup_quick)(void*); +void *(thread_ranread_test)(void *); +void *(thread_mix_test)(void *); +void *(thread_ranwrite_test)(void *); +void *(thread_rread_test)(void *); +void *(thread_reverse_read_test)(void *); +void *(thread_stride_read_test)(void *); +void *(thread_set_base)(void *); +void *(thread_join)(long long, void *); +void disrupt(int); +#if defined(Windows) +void disruptw(HANDLE); +#endif +long long get_traj(FILE *, long long *, float *, long); +void create_temp(off64_t, long long ); +FILE *open_w_traj(void); +FILE *open_r_traj(void); +void traj_vers(void); +void r_traj_size(void); +long long w_traj_size(void); +void init_file_sizes(); +off64_t get_next_file_size(off64_t); +void add_file_size(off64_t); +void init_file_sizes( off64_t, off64_t); +off64_t get_next_record_size(off64_t); +void add_record_size(off64_t); +void init_record_sizes( off64_t, off64_t); +void del_record_sizes( void ); +void hist_insert(double ); +void dump_hist(char *,int ); +void do_speed_check(int); +#else +void do_speed_check(); +#if !defined(linux) +char *getenv(); +char *inet_ntoa(); +int system(); +#endif +void my_nap(); +void my_unap(); +int thread_exit(); +void close_xls(); +void do_label(); +int create_xls(); +void do_float(); +#ifdef ASYNC_IO +void async_release(); +size_t async_write(); +size_t async_write_no_copy(); +int async_read(); +int async_read_no_copy(); +#endif +int mylockf(); +int mylockr(); +int rand(); +void srand(); +int get_client_info(); +void exit(); +void find_remote_shell(); +void traj_vers(); +void r_traj_size(); +long long w_traj_size(); +FILE *open_w_traj(); +FILE *open_r_traj(); +void create_temp(); +void fill_buffer(); +char *alloc_mem(); +void *(thread_rwrite_test)(); +void *(thread_write_test)(); +void *(thread_fwrite_test)(); +void *(thread_fread_test)(); +void *(thread_read_test)(); +void *(thread_cleanup_test)(); +void *(thread_ranread_test)(); +void *(thread_mix_test)(); +void *(thread_ranwrite_test)(); +void *(thread_rread_test)(); +void *(thread_reverse_read_test)(); +void *(thread_stride_read_test)(); +void *(thread_set_base)(); +void *(thread_join)(); +void disrupt(); +long long get_traj(); +void init_file_sizes(); +off64_t get_next_file_size(); +void add_file_size(); +void init_record_sizes(); +off64_t get_next_record_size(); +void add_record_size(); +void dump_cputimes(); +static double cpu_util(); +void del_record_sizes(); +void hist_insert(); +void dump_hist(); +#endif + +#ifdef _LARGEFILE64_SOURCE +#define I_LSEEK(x,y,z) lseek64(x,(off64_t)(y),z) +#define I_OPEN(x,y,z) open64(x,(int)(y),(int)(z)) +#define I_CREAT(x,y) creat64(x,(int)(y)) +#define I_FOPEN(x,y) fopen64(x,y) +#define I_STAT(x,y) stat64(x,y) +#ifdef HAVE_PREAD +#define I_PREAD(a,b,c,d) pread64(a,b,(size_t)(c),(off64_t)(d)) +#define I_PWRITE(a,b,c,d) pwrite64(a,b,(size_t)(c),(off64_t)(d)) +#endif +#define I_MMAP(a,b,c,d,e,f) mmap64((void *)(a),(size_t)(b),(int)(c),(int)(d),(int)(e),(off64_t)(f)) +#else +#define I_LSEEK(x,y,z) lseek(x,(off_t)(y),z) +#define I_OPEN(x,y,z) open(x,(int)(y),(int)(z)) +#define I_CREAT(x,y) creat(x,(int)(y)) +#define I_FOPEN(x,y) fopen(x,y) +#define I_STAT(x,y) stat(x,y) +#ifdef HAVE_PREAD +#define I_PREAD(a,b,c,d) pread(a,b,(size_t)(c),(off_t)(d)) +#define I_PWRITE(a,b,c,d) pwrite(a,b,(size_t)(c),(off_t)(d)) +#endif +#define I_MMAP(a,b,c,d,e,f) mmap((void *)(a),(size_t)(b),(int)(c),(int)(d),(int)(e),(off_t)(f)) +#endif + + +/************************************************************************/ +/* The list of tests to be called. */ +/************************************************************************/ +void (*func[])() = { + write_perf_test, + read_perf_test, + random_perf_test, + reverse_perf_test, + rewriterec_perf_test, + read_stride_perf_test, + fwrite_perf_test, + fread_perf_test, + mix_perf_test +#ifdef HAVE_PREAD + , + pwrite_perf_test, + pread_perf_test +#ifdef HAVE_PREADV + , + pwritev_perf_test, + preadv_perf_test +#endif /* HAVE_PREADV */ +#endif /* HAVE_PREAD */ + }; + +/* +char *test_output[] = {" ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " \n" }; +*/ +char *test_output[] = {" ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + "", + " ", + " ", + " ", + " ", + " ", + " ", + " \n" }; +long long test_soutput[] = {2,2,2,1,1,1,2,2,2,2,2,2,2,2}; + + +/******************************************************************/ +/* */ +/* GLOBAL VARIABLES */ +/* */ +/*******************************************************************/ + +/* + * Set the size of the shared memory segment for the children + * to put their results. + */ +#define SHMSIZE ((( sizeof(struct child_stats) * MAXSTREAMS) )+4096 ) +/* + * Pointer to the shared memory segment. + */ +VOLATILE struct child_stats *shmaddr; +double totaltime,total_time, temp_time ,total_kilos; +off64_t report_array[MAX_X][MAX_Y]; +double report_darray[MAX_X][MAXSTREAMS]; +double time_res,cputime_res; +long long throughput_array[MAX_X]; /* Filesize & record size are constants */ +short current_x, current_y; +long long orig_size; +long long max_x, max_y; +unsigned long long goodkilos; +off64_t kilobytes64 = (off64_t)KILOBYTES; +long long goodrecl; +off64_t offset = 0; /*offset for random I/O */ +off64_t offset64 = 0; /*offset for random I/O */ +off64_t filebytes64; +off64_t r_range[100]; +off64_t s_range[100]; +int t_range[100]; +int t_count = 0; +int r_count,s_count; +char *barray[MAXSTREAMS]; +char *haveshm; +extern int optind; +long long onetime, auto_mode, sfd, multi_buffer; +int fd; +int sp_msfd,sp_mrfd,sp_csfd,sp_crfd; +int begin_proc,num_processors,ioz_processor_bind; +long long res_prob,rec_prob; +char silent,read_sync; +char master_iozone, client_iozone,distributed; +int bif_fd,s_count; +int bif_row,bif_column; +int dedup_mseed = 1; +int hist_summary; +int op_rate; +int op_rate_flag; +char aflag, Eflag, hflag, Rflag, rflag, sflag; +char diag_v,sent_stop,dedup,dedup_interior,dedup_compress; +char *dedup_ibuf; +char *dedup_temp; +char bif_flag; +int rlocking; +int share_file; +int ecount; +char gflag,nflag; +char yflag,qflag; +#ifdef Windows +char *build_name = "Windows"; +#else +char *build_name = NAME; +#endif +char imon_start[256],imon_stop[256]; +char imon_sync; +char trflag; +char cpuutilflag; +char seq_mix; +long base_time; +long long mint, maxt; +long long w_traj_ops, r_traj_ops, w_traj_fsize,r_traj_fsize; +long long r_traj_ops_completed,r_traj_bytes_completed; +long long w_traj_ops_completed,w_traj_bytes_completed; +int w_traj_items, r_traj_items; +char fflag, Uflag,uflag,lflag,include_tflag; +struct runtime runtimes [MAX_X] [MAX_Y]; /* in parallel with report_array[][] */ +long long include_test[50]; +long long include_mask; +char RWONLYflag, NOCROSSflag; /*auto mode 2 - kcollins 8-21-96*/ +char mfflag; +long long status, x, y, childids[MAXSTREAMS+1], myid, num_child; +int pct_read,speed_code; +#ifndef NO_THREADS +pthread_t p_childids[MAXSTREAMS+1]; +#endif +off64_t next64; +char wol_opened, rol_opened; +FILE *wqfd,*rwqfd,*rqfd,*rrqfd; + +extern char *optarg; +#ifndef __AIX__ +long long ret; +#else +short ret; +#endif +struct size_entry { + struct size_entry *next; + off64_t size; +}; +struct size_entry *size_list=0; +struct size_entry *rec_size_list=0; +off64_t maximum_file_size; +off64_t minimum_file_size; + +char bif_filename [MAXNAMESIZE]; /* name of biff file */ +char filename [MAXNAMESIZE]; /* name of temporary file */ +char mountname [MAXNAMESIZE]; /* name of device */ +char dummyfile [MAXSTREAMS][MAXNAMESIZE]; /* name of dummy file */ +char dummyfile1 [MAXNAMESIZE]; /* name of dummy file */ +char *filearray[MAXSTREAMS]; /* array of file names */ +char tfile[] = "iozone"; +char *buffer,*buffer1, *mbuffer,*mainbuffer; +FILE *pi,*r_traj_fd,*w_traj_fd; +VOLATILE char *pbuffer; +char *default_filename="iozone.tmp"; /*default name of temporary file*/ +VOLATILE char stoptime; +char Cflag; +char use_thread = 0; +long long debug1=0; +long long debug=0; +unsigned long cache_size=CACHE_SIZE; +unsigned long cache_line_size=CACHE_LINE_SIZE; +long long *pstatus; +off64_t min_file_size = KILOBYTES_START; +off64_t max_file_size = KILOBYTES_END; +long long min_rec_size = RECLEN_START; +long long max_rec_size = RECLEN_END; +long long orig_min_rec_size = RECLEN_START; +long long orig_max_rec_size = RECLEN_END; +long long xover = CROSSOVER; +char *throughput_tests[] = {"Initial write","Rewrite","Read","Re-read", + "Reverse Read","Stride read","Random read","Mixed workload","Random write","Pwrite","Pread","Fwrite","Fread"}; +char command_line[1024] = "\0"; +#ifdef unix +double sc_clk_tck; +#endif + +int argcsave; +char **argvsave; +char splash[80][80]; +int splash_line; +char client_filename[256]; +char remote_shell[256]; +int client_error; + +char pit_hostname[40]; +char pit_service[8]; +int junk; + +/* + * Host ports used to listen, and handle errors. + */ +#define HOST_LIST_PORT 20000 +#define HOST_ESEND_PORT (HOST_LIST_PORT+MAXSTREAMS) +#define HOST_ASEND_PORT (HOST_ESEND_PORT+MAXSTREAMS) +int controlling_host_port = HOST_LIST_PORT; + +/* + * Childs ports used to listen, and handle errors. + */ +#define CHILD_ESEND_PORT (HOST_ASEND_PORT+MAXSTREAMS) +#define CHILD_LIST_PORT (CHILD_ESEND_PORT+MAXSTREAMS) + +/* Childs async message port. Used for stop flag and terminate */ +#define CHILD_ALIST_PORT (CHILD_LIST_PORT+MAXSTREAMS) + +/* Ports for the network speed code */ +#define SP_CHILD_LISTEN_PORT 31000 +#define SP_CHILD_ESEND_PORT (SP_CHILD_LISTEN_PORT+10) +#define SP_MASTER_LISTEN_PORT (SP_CHILD_ESEND_PORT+10) +#define SP_MASTER_ESEND_PORT (SP_MASTER_LISTEN_PORT+10) +#define SP_MASTER_RESULTS_PORT (SP_MASTER_ESEND_PORT+10) + + +#define THREAD_WRITE_TEST 1 +#define THREAD_REWRITE_TEST 2 +#define THREAD_READ_TEST 3 +#define THREAD_REREAD_TEST 4 +#define THREAD_STRIDE_TEST 5 +#define THREAD_RANDOM_READ_TEST 6 +#define THREAD_RANDOM_WRITE_TEST 7 +#define THREAD_REVERSE_READ_TEST 8 +#define THREAD_RANDOM_MIX_TEST 9 +#define THREAD_PWRITE_TEST 10 +#define THREAD_PREAD_TEST 11 +#define THREAD_FWRITE_TEST 12 +#define THREAD_FREAD_TEST 13 +#define THREAD_CLEANUP_TEST 14 + +/* + * Child states that the master is tracking. + * The master uses these to determine how to shutdown + * the clients when some fool hits control-C. + */ +#define C_STATE_ZERO 1 +#define C_STATE_WAIT_WHO 2 +#define C_STATE_WAIT_BARRIER 3 + + +int c_port,a_port; /* port number */ +int child_port; /* Virtualized due to fork */ +int child_async_port; /* Virtualized due to fork */ +int client_listen_pid; /* Virtualized due to fork */ +int master_join_count; /* How many children have joined */ +int l_sock,l_async_sock; /* Sockets for listening */ +char master_rcv_buf[4096]; /* Master's receive buffer */ +int master_listen_pid; /* Pid of the master's async listener proc */ +char master_send_buf[4096]; /* Master's send buffer */ +char child_rcv_buf[4096]; /* Child's receive buffer */ +char child_async_rcv_buf[4096]; /* Child's async recieve buffer */ +char child_send_buf[4096]; /* Child's send buffer */ +int child_send_socket; /* Child's send socket */ +int child_listen_socket; /* Child's listener socket */ +int child_listen_socket_async; /* Child's async listener socket */ +int master_send_socket; /* Needs to be an array. One for each child*/ +int master_send_sockets[MAXSTREAMS]; /* Needs to be an array. One for each child*/ +int master_send_async_sockets[MAXSTREAMS]; /* Needs to be an array. One for each child*/ +int master_listen_port; /* Master's listener port number */ +int master_listen_socket; /* Master's listener socket */ +int clients_found; /* Number of clients found in the client file */ +FILE *newstdin, *newstdout, *newstderr; /* used for debug in cluster mode.*/ +char toutput[20][20]; /* Used to help format the output */ +int toutputindex; /* Index to the current output line */ +int cdebug = 0; /* Use to turn on child/client debugging in cluster mode. */ +int mdebug = 0; /* Use to turn on master debug in cluster mode */ +int aggflag; /* Used to indicate constant aggregate data set size */ +struct sockaddr_in child_sync_sock, child_async_sock; + +/* + * Change this whenever you change the message format of master or client. + */ +int proto_version = 25; + +/******************************************************************************/ +/* Tele-port zone. These variables are updated on the clients when one is */ +/* using cluster mode. (-+m) */ +/* Do not touch these unless you have become one with the universe !! */ +/******************************************************************************/ +char controlling_host_name[100]; +struct child_ident { + char child_name[100]; + char workdir[200]; + char execute_path[200]; + char file_name[200]; + int state; + int child_number; + int child_port; + int child_async_port; + int master_socket_num; + int master_async_socket_num; +}child_idents[MAXSTREAMS]; +int Kplus_readers; +char write_traj_filename [MAXNAMESIZE]; /* name of write telemetry file */ +char read_traj_filename [MAXNAMESIZE]; /* name of read telemetry file */ +char oflag,jflag,k_flag,h_flag,mflag,pflag,unbuffered,Kplus_flag; +char noretest; +char notruncate; /* turn off truncation of files */ +char async_flag,stride_flag,mmapflag,mmapasflag,mmapssflag,mmapnsflag,mmap_mix; +char verify = 1; +int restf; +char sverify = 1; +char odsync = 0; +char Q_flag,OPS_flag; +char L_flag=0; +char no_copy_flag,include_close,include_flush; +char disrupt_flag,compute_flag,xflag,Z_flag, X_flag; +int no_unlink = 0; +int no_write = 0; +int r_traj_flag,w_traj_flag; +int mygen; +char MS_flag; +int advise_op,advise_flag; +int direct_flag; +int current_client_number; +long long chid; +int file_lock; +unsigned int pattern; +long long stride = STRIDE; +long long delay,purge,fetchon; +off64_t numrecs64 = (off64_t)NUMRECS; +long long reclen = RECLEN; +long long delay_start,depth; +VOLATILE char *stop_flag; /* Used to stop all children */ +float compute_time; +int multiplier = MULTIPLIER; +long long rest_val; +#if defined(Windows) + HANDLE hand; +#endif + +/******************************************************************************/ +/* End of Tele-port zone. */ +/******************************************************************************/ + + +/* + * Prototypes + * Sort of... Full prototypes break non-ansi C compilers. No protos is + * a bit sloppy, so the compromise is this. + */ +void child_send(); +int start_child_listen(); +int start_child_listen_async(); +void start_child_listen_loop(); +void child_listen(); +void child_listen_async(); +void stop_child_send(); +void stop_child_listen(); +void cleanup_comm(); +void master_send(); +int start_master_send(); +int start_master_listen(); +int check_filename(); +void master_listen(); +void stop_master_send(); +void stop_master_listen(); +long long start_child_proc(); +int parse_client_line(); +void wait_dist_join(); +void tell_children_begin(); +void start_master_listen_loop(); +void wait_for_master_go(); +void tell_master_ready(); +void stop_master_listen_loop(); +void tell_master_stats(); +void become_client(); +int pick_client(); +long long start_child_proc(); +int start_master_send(); +void child_listen(); +int start_child_listen(); +void stop_master_send(); +void stop_master_listen(); +void stop_child_send(); +void stop_child_listen(); +void master_send(); +void child_send(); +void master_listen(); +int start_master_listen(); +void child_remove_files(); +void terminate_child_async(); +void distribute_stop(); +void send_stop(); +void cleanup_children(); + + +/****************************************************************/ +/* */ +/* MAIN () */ +/* */ +/****************************************************************/ + +int +main(argc,argv) +int argc; +char **argv; +{ + + long long fileindx,i,tval; + long long ind; + int ret; + FILE *pi; + char reply[IBUFSIZE]; + unsigned char inp_pat; + time_t time_run; + char *port,*m,*subarg; + int num_child1; + int cret; + int anwser,bind_cpu; + char *evalue; + + + anwser=bind_cpu=0; + /* Used to make fread/fwrite do something better than their defaults */ + setvbuf( stdout, NULL, _IONBF, (size_t) NULL ); + setvbuf( stderr, NULL, _IONBF, (size_t) NULL ); + + /* Save the master's name */ + gethostname(controlling_host_name,100); + + /* Let user activate mdebug or cdebug via environmental variables */ + evalue = (char *)NULL; + evalue=(char *)getenv("CDEBUG"); + if(evalue) + cdebug=atoi(evalue); + evalue = (char *)NULL; + evalue=(char *)getenv("MDEBUG"); + if(evalue) + mdebug=atoi(evalue); + + srand(time(0)); + mygen=rand(); /* Pick a random generation number */ + + /* Try to find the actual VM page size, if possible */ +#if defined (solaris) || defined (_HPUX_SOURCE) || defined (linux) || defined(IRIX) || defined (IRIX64) +#ifndef __convex_spp + page_size=getpagesize(); +#endif +#endif + /* Try to find the actual number of ticks per second */ +#ifdef unix + sc_clk_tck = clk_tck(); +#endif + for(ind=0;ind 60) + depth=60; + */ +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tPOSIX Async I/O (no bcopy). Depth %ld \n",depth); +#else + sprintf(splash[splash_line++],"\tPOSIX Async I/O (no bcopy). Depth %lld \n",depth); +#endif + no_copy_flag=1; + async_flag++; + k_flag++; + break; + case 'T': /* Switch to POSIX thread based */ +#ifndef NO_THREADS + use_thread++; +#else + printf("\tThreads not supported in this version\n"); + exit(2); +#endif + break; + case 'H': /* Use POSIX async_io */ + h_flag++; + depth = (long long)(atoi(optarg)); + if(depth <0) + depth=0; + /* + * Hmmm. many systems fail is strange ways when the maximum + * number of async I/Os per user or proc is exceeded. + */ + /* + if(depth > 60) + depth=60; + */ +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tPOSIX async I/O (with bcopy). Depth %ld\n",depth); +#else + sprintf(splash[splash_line++],"\tPOSIX async I/O (with bcopy). Depth %lld\n",depth); +#endif + async_flag++; + break; + case 'I': /* Use VXFS direct advisory or O_DIRECT from Linux or AIX , or O_DIRECTIO for TRU64 or Solaris directio */ +#ifdef VXFS + direct_flag++; + sprintf(splash[splash_line++],"\tVxFS advanced feature SET_CACHE, VX_DIRECT enabled\n"); + break; +#endif +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined(__FreeBSD__) || defined(solaris) + direct_flag++; + sprintf(splash[splash_line++],"\tO_DIRECT feature enabled\n"); + break; +#endif +#if defined(TRU64) + direct_flag++; + sprintf(splash[splash_line++],"\tO_DIRECTIO feature enabled\n"); + break; +#endif +#else + break; +#endif +#if defined(Windows) + sprintf(splash[splash_line++],"\tO_DIRECTIO feature not available in Windows version.\n"); + break; +#endif + case 'B': /* Use mmap file for test file */ + sprintf(splash[splash_line++],"\tUsing mmap files\n"); + mmapflag++; + mmapnsflag++; + break; + case 'D': /* Use async msync mmap file */ + sprintf(splash[splash_line++],"\tUsing msync(MS_ASYNC) on mmap files\n"); + mmapflag++; + mmapasflag++; + mmapnsflag=0; + break; + case 'G': /* Use msync sync for mmap file */ + sprintf(splash[splash_line++],"\tUsing msync(MS_SYNC) on mmap files\n"); + mmapssflag++; + mmapnsflag=0; + break; + case 'C': /* show children xfer counts */ + Cflag++; + break; + case 'Q': /* Enable output offset/latency files */ + sprintf(splash[splash_line++],"\tOffset/latency files enabled.\n"); + Q_flag++; + break; + case 'x': /* Disable stone_wall */ + sprintf(splash[splash_line++],"\tStonewall disabled\n"); + xflag++; + break; + + case 'a': /* auto mode */ + fetchon=1; + purge=0; + multi_buffer=0; + auto_mode = 1; + aflag++; + sprintf(splash[splash_line++],"\tAuto Mode\n"); + break; + case 'c': /* Include close in timing */ + include_close++; + sprintf(splash[splash_line++],"\tInclude close in write timing\n"); + break; + case 'e': /* Include fsync in timing */ + include_flush++; + sprintf(splash[splash_line++],"\tInclude fsync in write timing\n"); + break; + case 'A': /* auto2 mode. Soon to go away. Please use -az */ + fetchon=1; + purge=0; + multi_buffer=0; + auto_mode = 1; + aflag++; + sprintf(splash[splash_line++],"\tAuto Mode 2. This option is obsolete. Use -az -i0 -i1 \n"); + RWONLYflag++; + NOCROSSflag++; + include_tflag++; /* automatically set WRITER_TEST and READER_TEST */ + include_test[WRITER_TEST]++; + include_test[READER_TEST]++; + break; + case 's': /* Set file size */ +#ifdef NO_PRINT_LLD + sscanf(optarg,"%ld",&kilobytes64); +#else + sscanf(optarg,"%lld",&kilobytes64); +#endif + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + ; + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + kilobytes64 = kilobytes64 * 1024; + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + kilobytes64 = kilobytes64 *1024 * 1024; + } + if(kilobytes64 <= 0) + kilobytes64=512; + + s_range[s_count++]=kilobytes64; + max_file_size = (off64_t)s_range[s_count-1]; /* Make visable globally */ + min_file_size = (off64_t)s_range[0]; /* Make visable globally */ + +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tFile size set to %ld KB\n",kilobytes64); +#else + sprintf(splash[splash_line++],"\tFile size set to %lld KB\n",kilobytes64); +#endif + sflag++; + break; + case 'l': /* Set lower thread/proc limit */ + mint = (long long)(atoi(optarg)); + if(mint <= 0) + { + mint=1; + num_child=1; + }else + num_child=mint; + if(mint > (unsigned long long)MAXSTREAMS){ + printf("Invalid options: maximum streams for "); + printf("throughput is MAXSTREAMS\n"); + exit(4); + } + lflag++; + trflag++; + if(Uflag) + { + printf("Can not run throughput tests with unmount & remounts.\n"); + exit(5); + } + break; + case 'u': /* Set upper thread/proc limit */ + maxt = (long long)(atoi(optarg)); + if(maxt <= 0) + maxt=1; + if(maxt > MAXSTREAMS){ + printf("Invalid options: maximum streams for "); + printf("throughput is MAXSTREAMS\n"); + exit(6); + } + uflag++; + trflag++; + if(Uflag) + { + printf("Can not run throughput tests with unmount & remounts.\n"); + exit(7); + } + break; + case 'm': /* Use multiple buffers */ + fetchon=0; + multi_buffer=1; + mflag++; + mbuffer = (char *) alloc_mem((long long)MAXBUFFERSIZE,(int)0); + if(mbuffer == 0) { + perror("Memory allocation failed:"); + exit(8); + } + sprintf(splash[splash_line++],"\tMulti_buffer. Work area %d bytes\n", + MAXBUFFERSIZE); + break; + case 'M': /* Report machine name and OS */ + bzero(reply,sizeof(reply)); + pi=popen("uname -a", "r"); + if(pi == (FILE *)0) + { + sprintf(splash[splash_line++],"\n\tError using popen() on uname\n"); + sprintf(splash[splash_line++],"\t-M option suppressed.\n"); + } + else + { + junk=fread(reply,IBUFSIZE-1,1,pi); + pclose(pi); + m=reply; + while(*m) /* Strip new line */ + { + if(*m=='\n') + *m=0; + else + m++; + } + sprintf(splash[splash_line++],"\n\tMachine = %s\n",reply); + } + break; + + case 'P': /* Set beginning processor for binding. */ +#ifndef NO_THREADS +#if defined(_HPUX_SOURCE) || defined(linux) +#if defined(_HPUX_SOURCE) + num_processors= pthread_num_processors_np(); +#else + num_processors = sysconf(_SC_NPROCESSORS_ONLN); +#endif + begin_proc = atoi(optarg); + if(begin_proc < 0) + begin_proc=0; + if(begin_proc > num_processors) + begin_proc=0; + sprintf(splash[splash_line++],"\tBinding of processors beginning with %d \n",begin_proc); + ioz_processor_bind++; +#else + sprintf(splash[splash_line++],"\tProcessor binding not available in this version\n"); +#endif +#endif + break; + case 'p': /* purge the processor cache */ + sprintf(splash[splash_line++],"\tPurge Mode On\n"); + fetchon=0; + pflag++; + purge=1; + break; + case 'h': /* show help */ + hflag++; + show_help(); + exit(0); + break; + case 'E': /* Extended testing for pread/pwrite... */ + Eflag++; + break; + case 'R': /* Generate Excel compatible Report */ + Rflag++; + sprintf(splash[splash_line++],"\tExcel chart generation enabled\n"); + break; + case 'o': /* Open OSYNC */ + sprintf(splash[splash_line++],"\tSYNC Mode. \n"); + oflag++; + break; + case 'O': /* Report in Ops/sec instead of KB/sec */ + sprintf(splash[splash_line++],"\tOPS Mode. Output is in operations per second.\n"); + OPS_flag++; + break; + case 'N': /* Report in usec/op */ + sprintf(splash[splash_line++],"\tMicroseconds/op Mode. Output is in microseconds per operation.\n"); + MS_flag++; + break; + case 'V': /* Turn on Verify every byte */ + sverify=0; + inp_pat = (char)(atoi(optarg)); + if(inp_pat == 0) + inp_pat = PATTERN; + pattern = ((inp_pat << 24) | (inp_pat << 16) | (inp_pat << 8) + | inp_pat); + verify=1; + sprintf(splash[splash_line++],"\tVerify Mode. Pattern %x\n",pattern); + sprintf(splash[splash_line++],"\tPerformance measurements are invalid in this mode.\n"); + break; + case 'S': /* Set the processor cache size */ + cache_size = (long)(atoi(optarg)*1024); + if(cache_size == 0) + cache_size = CACHE_SIZE; + break; + case 'L': /* Set processor cache line size */ + cache_line_size = (long)(atoi(optarg)); + if(cache_line_size == 0) + cache_line_size = CACHE_LINE_SIZE; + break; + case 'f': /* Specify the file name */ + if(mfflag) { + printf("invalid options: -f and -F are mutually exclusive\n"); + exit(10); + } + fflag++; + strcpy(filename,optarg); + sprintf(dummyfile[0],"%s.DUMMY",optarg); + break; + case 'b': /* Specify the biff file name */ + Rflag++; + bif_flag++; + strcpy(bif_filename,optarg); + break; + case 'F': /* Specify multiple file names for -t */ + mfflag++; + if(fflag) { + printf("invalid options: -f and -F are mutually exclusive\n"); + exit(11); + } + if(!trflag) { + printf("invalid options: must specify -t N before -F\n"); + exit(12); + } + optind--; + for(fileindx=0;fileindx argc) { +#ifdef NO_PRINT_LLD + printf("invalid options: not enough filenames for %ld streams\n",num_child); +#else + printf("invalid options: not enough filenames for %lld streams\n",num_child); +#endif + exit(13); + } + } + break; + case 'r': /* Specify the record size to use */ + rflag++; + reclen = ((long long)(atoi(optarg))*1024); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + reclen = (long long)(1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + reclen = (long long)(1024 * 1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + reclen = (long long)(1024 * 1024 * 1024 *(long long)atoi(optarg)); + } + if(reclen <= 0) + reclen=(long long)4096; + + r_range[r_count++]=reclen; + max_rec_size = (off64_t)r_range[r_count-1]; /* Make visable globally */ + min_rec_size = (off64_t)r_range[0]; /* Make visable globally */ +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tRecord Size %ld KB\n",reclen/1024); +#else + sprintf(splash[splash_line++],"\tRecord Size %lld KB\n",reclen/1024); +#endif + if(max_rec_size > MAXBUFFERSIZE) { +#ifdef NO_PRINT_LLD + printf("Error: maximum record size %ld KB is greater than maximum buffer size %ld KB\n ", + max_rec_size/1024, MAXBUFFERSIZE/1024); +#else + printf("Error: maximum record size %lld KB is greater than maximum buffer size %lld KB\n ", + (long long)(max_rec_size/1024LL), (long long)MAXBUFFERSIZE/1024LL); +#endif + exit(23); + } + break; + case 'J': /* Specify the compute time in millisecs */ + compute_time = (float)(atoi(optarg)); + compute_time=compute_time/1000; + if(compute_time < (float)0) + compute_time=(float)0; + else + compute_flag=1; + jflag++; + break; + case 'j': /* Specify the stride in records */ + stride = (long long)(atoi(optarg)); + if(stride < 0) + stride=0; + stride_flag=1; + break; + case 't': /* Specify the number of children to run */ + num_child1=(atoi(optarg)); + num_child = (long long)num_child1; + if(num_child > (long long)MAXSTREAMS) { + printf("invalid options: maximum streams for throughput is MAXSTREAMS\n"); +#ifdef NO_PRINT_LLD + printf("Numchild %ld %s\n",num_child,optarg); +#else + printf("Numchild %lld %s\n",num_child,optarg); +#endif + exit(14); + } + if(num_child <= 0) + num_child = 8; + if(num_child == 0) + num_child=1; + t_range[t_count++]=num_child; + maxt = (maxt>num_child?maxt:num_child); + trflag++; + if(Uflag) + { + printf("Can not run throughput tests with unmount & remounts.\n"); + exit(15); + } + break; + case 'd': /* Specify the delay of children to run */ + delay_start = (long long)(atoi(optarg)); + if(delay_start < 0) + delay_start=0; + break; + case 'i': /* Specify specific tests */ + tval=(long long)(atoi(optarg)); + if(tval < 0) tval=0; +#ifndef HAVE_PREAD + if(tval > RANDOM_MIX_TEST) + { + printf("\tPread tests not available on this operating system.\n"); + exit(183); + } +#endif + if(tval > sizeof(func)/sizeof(char *)) + { + tval=0; + sprintf(splash[splash_line++],"\tSelected test not available on the version.\n"); + } + include_test[tval]++; + include_tflag++; + break; + case 'v': /* Show version information */ + for(ind=0; strlen(head1[ind]); ind++) + { + printf("%s\n", head1[ind]); + } + exit(0); + break; + case 'U': /* Specify the dev name for umount/mount*/ + Uflag++; + strcpy(mountname,optarg); + if(trflag) + { + printf("Can not run throughput tests with unmount & remounts.\n"); + exit(16); + } + break; + case 'w': /* Do not unlink files */ + sprintf(splash[splash_line++],"\tSetting no_unlink\n"); + no_unlink = 1; + break; + case 'Z': /* Turn on the mmap and file I/O mixing */ + sprintf(splash[splash_line++],"\tEnable mmap & file I/O mixing.\n"); + mmap_mix = 1; + break; + case 'W': /* Read/Write with file locked */ + file_lock=1; + sprintf(splash[splash_line++],"\tLock file when reading/writing.\n"); + break; + case 'K': /* Cause disrupted read pattern */ + disrupt_flag=1; + sprintf(splash[splash_line++],"\tDisrupted read patterns selected.\n"); + break; + case 'X': /* Open write telemetry file */ + compute_flag=1; + sverify=2; /* touch lightly */ + w_traj_flag=1; + strcpy(write_traj_filename,optarg); + traj_vers(); + w_traj_size(); + sprintf(splash[splash_line++],"\tUsing write telemetry file \"%s\"\n", + write_traj_filename); + w_traj_fd=open_w_traj(); + if(w_traj_fd == (FILE *)0) + exit(200); + break; + case 'Y': /* Open Read telemetry file */ + compute_flag=1; + sverify=2; /* touch lightly */ + r_traj_flag=1; + strcpy(read_traj_filename,optarg); + sprintf(splash[splash_line++],"\tUsing read telemetry file \"%s\"\n", + read_traj_filename); + traj_vers(); + r_traj_size(); + r_traj_fd=open_r_traj(); + if(r_traj_fd == (FILE*) 0) + exit(200); + break; + case 'n': /* Set min file size for auto mode */ + nflag=1; + minimum_file_size = (off64_t)atoi(optarg); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + ; + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + minimum_file_size = (long long)(1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + minimum_file_size = (long long)(1024 * 1024 * (long long)atoi(optarg)); + } + if(minimum_file_size < RECLEN_START/1024) + minimum_file_size=(off64_t)(RECLEN_START/1024); + if(minimum_file_size < page_size/1024) + minimum_file_size=(off64_t)(page_size/1024); +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tUsing minimum file size of %ld kilobytes.\n",minimum_file_size); +#else + sprintf(splash[splash_line++],"\tUsing minimum file size of %lld kilobytes.\n",minimum_file_size); +#endif + break; + case 'g': /* Set maximum file size for auto mode */ + gflag=1; + maximum_file_size = (off64_t)atoi(optarg); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + ; + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + maximum_file_size = (long long)(1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + maximum_file_size = (long long)(1024 * 1024 * (long long)atoi(optarg)); + } + if(maximum_file_size < RECLEN_START/1024) + maximum_file_size=(off64_t)(RECLEN_START/1024); +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tUsing maximum file size of %ld kilobytes.\n",maximum_file_size); +#else + sprintf(splash[splash_line++],"\tUsing maximum file size of %lld kilobytes.\n",maximum_file_size); +#endif + break; + case 'z': /* Set no cross over */ + sprintf(splash[splash_line++],"\tCross over of record size disabled.\n"); + NOCROSSflag=1; + break; + case 'y': /* Set min record size for auto mode */ + yflag=1; + min_rec_size = ((long long)(atoi(optarg))*1024); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + min_rec_size = (long long)(1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + min_rec_size = (long long)(1024 * 1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + min_rec_size = (long long)(1024 * 1024 * 1024 *(long long)atoi(optarg)); + } + if(min_rec_size <= 0) + min_rec_size=(long long)RECLEN_START; +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tUsing Minimum Record Size %ld KB\n", min_rec_size/1024); +#else + sprintf(splash[splash_line++],"\tUsing Minimum Record Size %lld KB\n", min_rec_size/1024); +#endif + break; + case 'q': /* Set max record size for auto mode */ + qflag=1; + max_rec_size = ((long long)(atoi(optarg))*1024); + if(optarg[strlen(optarg)-1]=='k' || + optarg[strlen(optarg)-1]=='K'){ + max_rec_size = (long long)(1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='m' || + optarg[strlen(optarg)-1]=='M'){ + max_rec_size = (long long)(1024 * 1024 * atoi(optarg)); + } + if(optarg[strlen(optarg)-1]=='g' || + optarg[strlen(optarg)-1]=='G'){ + max_rec_size = (long long)(1024 * 1024 * 1024 *(long long)atoi(optarg)); + } + if(max_rec_size <= 0) + min_rec_size=(long long)RECLEN_END; + if(max_rec_size > MAXBUFFERSIZE) { +#ifdef NO_PRINT_LLD + printf("Error: maximum record size %ld KB is greater than maximum buffer size %ld KB\n ", + max_rec_size/1024, MAXBUFFERSIZE/1024); +#else + printf("Error: maximum record size %lld KB is greater than maximum buffer size %lld KB\n ", + (long long)(max_rec_size/1024LL), (long long)MAXBUFFERSIZE/1024LL); +#endif + exit(23); + } +#ifdef NO_PRINT_LLD + sprintf(splash[splash_line++],"\tUsing Maximum Record Size %ld KB\n", max_rec_size/1024); +#else + sprintf(splash[splash_line++],"\tUsing Maximum Record Size %lld KB\n", max_rec_size/1024); +#endif + break; + + /* + * The + operator is for the new extended options mechanism + * Syntax is -+ followed by option leter, and if the optino + * takes an operand then it is implemented below. An example + * -+a arg is shown below. This is a sub option with an argument. + * -+b is shown below. This is a sub option with no argument. + */ + case '+': + /* printf("Plus option = >%s<\n",optarg);*/ + switch (*((char *)optarg)) + { + case 'a': /* Example: Has argument */ + subarg=argv[optind++]; + /* if(subarg!=(char *)0) Error checking. */ + /* printf("Plus option argument = >%s<\n",subarg);*/ + break; + case 'b': /* Example: Does not have an argument */ + break; + case 'c': /* Argument is the controlling host name */ + /* I am a client for distributed Iozone */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+c takes an operand !!\n"); + exit(200); + } + strcpy(controlling_host_name,subarg); + distributed=1; + client_iozone=1; + master_iozone=0; + break; + case 'h': /* Argument is the controlling host name */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+h takes an operand !!\n"); + exit(200); + } + strcpy(controlling_host_name,subarg); + sprintf(splash[splash_line++],"\tHostname = %s\n",controlling_host_name); + break; + case 'm': /* I am the controlling process for distributed Iozone */ + /* Does not have an argument */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+m takes an operand. ( filename )\n"); + exit(201); + } + strcpy(client_filename,subarg); + ret=get_client_info(); + if(ret <= 0) + { + printf("Error reading client file\n"); + exit(178); + } + clients_found=ret; + distributed=1; + master_iozone=1; + client_iozone=0; + sprintf(splash[splash_line++],"\tNetwork distribution mode enabled.\n"); + break; + case 'N': /* turn off truncating the file before write test */ + notruncate = 1; + break; + case 'u': /* Set CPU utilization output flag */ + cpuutilflag = 1; /* only used if R(eport) flag is also set */ + get_rusage_resolution(); + sprintf(splash[splash_line++],"\tCPU utilization Resolution = %5.3f seconds.\n",cputime_res); + sprintf(splash[splash_line++],"\tCPU utilization Excel chart enabled\n"); + break; + case 's': /* Clients operate in silent mode. */ + /* Does not have an argument */ + silent=1; + break; + case 'd': /* Diagnostics mode */ + sprintf(splash[splash_line++],"\t>>> I/O Diagnostic mode enabled. <<<\n"); + sprintf(splash[splash_line++],"\tPerformance measurements are invalid in this mode.\n"); + diag_v=1; + sverify=0; + break; + case 'x': /* Argument is the multiplier for rec size and file size */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+c takes an operand !!\n"); + exit(200); + } + multiplier = atoi(subarg); + if(multiplier <=1) + multiplier = 2; + break; + case 'i': /* Argument is the host port */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+i takes an operand !!\n"); + exit(200); + } + controlling_host_port = atoi(subarg); + break; + case 'p': /* Argument is the percentage read */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+p takes an operand !!\n"); + exit(200); + } + pct_read = atoi(subarg); + if(pct_read < 1) + pct_read = 1; + if(pct_read >=100) + pct_read = 100; + sprintf(splash[splash_line++],"\tPercent read in mix test is %d\n",pct_read); + break; + case 't': /* Speed code activated */ + speed_code=1; + break; +#if defined(_HPUX_SOURCE) || defined(linux) || defined(solaris) + case 'r': /* Read sync too */ + read_sync=1; + sprintf(splash[splash_line++],"\tRead & Write sync mode active.\n"); + break; +#endif +#ifndef NO_MADVISE + case 'A': /* Argument is madvise selector */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+A take an operand !!\n"); + exit(200); + } + advise_flag=1; + advise_op=atoi(subarg); + sprintf(splash[splash_line++],"\tMadvise enabled: %d\n",advise_op); + break; +#endif + case 'n': /* Set no-retest */ + noretest = 1; + sprintf(splash[splash_line++],"\tNo retest option selected\n"); + break; + case 'k': /* Constant aggregate data set size */ + aggflag=1; + break; + case 'q': /* Argument is the rest time between tests in seconds */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+q takes an operand !!\n"); + exit(200); + } + rest_val = (long long)atoi(subarg); + if(rest_val <=0) + rest_val = 0; + restf=1; + sprintf(splash[splash_line++],"\tDelay %d seconds between tests enabled.\n",atoi(subarg)); + break; +#if defined(O_DSYNC) + case 'D': /* O_DSYNC mode */ + sprintf(splash[splash_line++],"\t>>> O_DSYNC mode enabled. <<<\n"); + odsync=1; + break; +#endif + case 'l': /* Record locking mode */ + sprintf(splash[splash_line++],"\t>>> Record locking mode enabled. <<<\n"); + rlocking=1; + break; + case 'L': /* Record locking mode shared files*/ + sprintf(splash[splash_line++],"\t>>> Record locking, shared file mode enabled. <<<\n"); + share_file=1; + rlocking=1; + break; + case 'V': /* No Record locking shared files*/ + sprintf(splash[splash_line++],"\t>>> Shared file mode enabled. <<<\n"); + share_file=1; + break; + case 'B': /* Sequential mix */ + sprintf(splash[splash_line++],"\t>>> Sequential Mixed workload. <<<\n"); + seq_mix=1; + break; + /* Use an existing user file, that does + not contain Iozone's pattern. Use file + for testing, but read only, and no + delete at the end of the test. Also, + no pattern verification, but do touch + the pages. */ + case 'E': + sprintf(splash[splash_line++],"\t>>> No Verify mode. <<<\n"); + sverify=2; + no_unlink=1; + no_write=1; + break; + case 'T': /* Time stamps on */ + L_flag=1; + break; + case 'X': /* Short circuit test mode */ + X_flag = 1; + sverify=1; + verify=1; + inp_pat = 0xBB; + pattern = ((inp_pat << 24) | + (inp_pat << 16) | (inp_pat << 8) | + inp_pat); + sprintf(splash[splash_line++],"\tShort circuit mode. For\n"); + sprintf(splash[splash_line++],"\t filesystem development testing ONLY !\n"); + break; + case 'Z': /* Compatibility mode for 0xA5 */ + Z_flag = 1; + sverify=1; + verify=1; + inp_pat = 0xA5; + pattern = ((inp_pat << 24) | + (inp_pat << 16) | (inp_pat << 8) | + inp_pat); + sprintf(splash[splash_line++],"\tUsing old data sets.\n"); + sprintf(splash[splash_line++],"\t Performance measurements may be invalid in this\n"); + sprintf(splash[splash_line++],"\t mode due to published hack.\n"); + break; +#if defined(Windows) + case 'U': /* Windows only Unbufferd I/O */ + unbuffered=1; + sprintf(splash[splash_line++],"\tUnbuffered Windows API usage. >>> Very Experimental <<<\n"); + break; +#endif + case 'K': /* Sony special for manual control of test 8 */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+K takes an operand !!\n"); + exit(204); + } + Kplus_readers = (int)atoi(subarg); + if(Kplus_readers <=0) + Kplus_readers = 1; + Kplus_flag=1; + sprintf(splash[splash_line++],"\tManual control of test 8. >>> Very Experimental. Sony special <<<\n"); + break; + case 'w': /* Argument is the percent of dedup */ + /* Sets size of dedup region across files */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+w takes an operand !!\n"); + exit(200); + } + dedup = atoi(subarg); + if(dedup <=0) + dedup = 0; + if(dedup >100) + dedup = 100; + sprintf(splash[splash_line++],"\tDedup activated %d percent.\n",dedup); + break; + case 'y': /* Argument is the percent of interior dedup */ + /* Sets size of dedup region within and across files */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+y takes an operand !!\n"); + exit(200); + } + dedup_interior = atoi(subarg); + if(dedup_interior <0) + dedup_interior = 0; + if(dedup_interior >100) + dedup_interior = 100; + sprintf(splash[splash_line++],"\tDedupe within & across %d percent.\n",dedup_interior); + break; + case 'C': /* Argument is the percent of dedupe within & !across */ + /* Sets size of dedup region within and !across files */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+C takes an operand !!\n"); + exit(200); + } + dedup_compress = atoi(subarg); + if(dedup_compress <0) + dedup_compress = 0; + if(dedup_compress >100) + dedup_compress = 100; + sprintf(splash[splash_line++],"\tDedupe within %d percent.\n",dedup_compress); + break; + case 'S': /* Argument is the seed for dedup */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+S takes an operand !!\n"); + exit(200); + } + dedup_mseed = atoi(subarg); + if(dedup_mseed ==0) + dedup_mseed = 1; + sprintf(splash[splash_line++],"\tDedup manual seed %d .\n",dedup_mseed); + break; + case 'H': /* Argument is hostname of the PIT */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+H takes operand !!\n"); + exit(200); + } + strcpy(pit_hostname,subarg); + sprintf(splash[splash_line++],"\tPIT_host %s\n",pit_hostname); + + break; + case 'P': /* Argument is port of the PIT */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+P takes operand !!\n"); + exit(200); + } + strcpy(pit_service,subarg); + sprintf(splash[splash_line++],"\tPIT_port %s\n",pit_service); + break; + case 'z': /* Enable hist summary*/ + hist_summary=1; + sprintf(splash[splash_line++],"\tHistogram summary enabled.\n"); + break; + case 'O': /* Argument is the Op rate */ + subarg=argv[optind++]; + if(subarg==(char *)0) + { + printf("-+O takes an operand !!\n"); + exit(200); + } + op_rate = atoi(subarg); + if(op_rate <= 0) + op_rate = 1; + op_rate_flag = 1; + sprintf(splash[splash_line++],"\tRate control active %d Ops/sec .\n",op_rate); + break; + default: + printf("Unsupported Plus option -> %s <-\n",optarg); + exit(255); + break; + } + break; + default: + printf("Unsupported option -> %s <-\n",optarg); + exit(255); + } + } + base_time=(long)time_so_far(); + get_resolution(); /* Get clock resolution */ + if(speed_code) + { + do_speed_check(client_iozone); + exit(0); + } + if(r_count > 1) + { + aflag=1; + rflag=0; + NOCROSSflag=1; + } + if(s_count > 1) + { + aflag=1; + sflag=0; + NOCROSSflag=1; + } + /* + * If not in silent mode then display the splash screen. + */ + for(i=0;i clients_found) + { + printf("You can not specify more threads/processes than you have in the client file list\n"); + exit(202); + } + } + + if(!OPS_flag && !MS_flag) + { + if(!silent) printf("\tOutput is in Kbytes/sec\n"); + } + if (min_rec_size > max_rec_size) { +#ifdef NO_PRINT_LLD + printf("Error: minimum record size %ld KB is greater than maximum record size %ld KB\n ", + min_rec_size/1024, max_rec_size/1024); +#else + printf("Error: minimum record size %lld KB is greater than maximum record size %lld KB\n ", + min_rec_size/1024, max_rec_size/1024); +#endif + exit(23); + } + orig_min_rec_size=min_rec_size; + orig_max_rec_size=max_rec_size; + /* + * No telemetry files... just option selected + */ + if(compute_flag && jflag && !(r_traj_flag || w_traj_flag)) + if(!silent) printf("\tCompute time %f seconds for reads and writes.\n",compute_time); + /* + * Read telemetry file and option selected + */ + if(compute_flag && r_traj_flag && !w_traj_flag) + { + if(r_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for reads.\n"); + } + else + { + if(jflag) + if(!silent) printf("\tCompute time %f seconds for reads.\n",compute_time); + } + if(jflag) + if(!silent) printf("\tCompute time %f seconds for writes.\n",compute_time); + } + /* + * Write telemetry file and option selected + */ + if(compute_flag && !r_traj_flag && w_traj_flag) + { + if(w_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for writes.\n"); + } + else + { + if(jflag) + if(!silent) printf("\tCompute time %f seconds for writes.\n",compute_time); + } + if(jflag) + if(!silent) printf("\tCompute time %f seconds for reads.\n",compute_time); + } + if(compute_flag && r_traj_flag && w_traj_flag && jflag) + { + if(r_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for reads.\n"); + } + else + { + if(!silent) printf("\tCompute time %f seconds for reads.\n",compute_time); + } + if(w_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for writes.\n"); + } + else + { + if(!silent) printf("\tCompute time %f seconds for writes.\n",compute_time); + } + } + if(compute_flag && r_traj_flag && w_traj_flag && !jflag) + { + if(r_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for reads.\n"); + } + else + { + if(!silent) printf("\tNo compute time for reads.\n"); + } + + if(w_traj_items==3) + { + if(!silent) printf("\tCompute time from telemetry files for writes.\n"); + } + else + { + if(!silent) printf("\tNo compute time for writes.\n"); + } + } + + /* Enforce only write,rewrite,read,reread */ + if(w_traj_flag || r_traj_flag) + { + for(i=2;i> %llx",include_mask); HERE */ + } + if(no_write) /* Disable if any writer would disturbe existing file */ + { + if(include_test[0] || include_test[4] || + include_test[6] || include_test[8] || include_test[9] || + include_test[11]) + { + printf("You must disable any test that writes when using -+E\n"); + exit(20); + } + } + if(no_write) /* User must specify the existing file name */ + { + if(!(fflag | mfflag)) + { + printf("You must use -f or -F when using -+E\n"); + exit(20); + } + } + if(h_flag && k_flag) + { + printf("\n\tCan not do both -H and -k\n"); + exit(20); + } + if((dedup | dedup_interior) && diag_v) + { + printf("\n\tCan not do both -+d and -+w\n"); + exit(20); + } + + if(!aflag && !rflag) + max_rec_size=min_rec_size; + + init_record_sizes(min_rec_size,max_rec_size); + if(!silent) printf("\tTime Resolution = %1.6f seconds.\n",time_res); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tProcessor cache size set to %ld Kbytes.\n",cache_size/1024); + if(!silent) printf("\tProcessor cache line size set to %ld bytes.\n",cache_line_size); + if(!silent) printf("\tFile stride size set to %ld * record size.\n",stride); +#else + if(!silent) printf("\tProcessor cache size set to %ld Kbytes.\n",cache_size/1024); + if(!silent) printf("\tProcessor cache line size set to %ld bytes.\n",cache_line_size); + if(!silent) printf("\tFile stride size set to %lld * record size.\n",stride); +#endif + if(!rflag) + reclen=(long long)4096; + + if(uflag && !lflag) + num_child=mint = 1; + if(lflag && !uflag) + maxt = mint; + if(use_thread) + port="thread"; + else + port="process"; + if(lflag || uflag){ +#ifdef NO_PRINT_LLD + if(!silent) printf("\tMin %s = %ld \n",port,mint); + if(!silent) printf("\tMax %s = %ld \n",port,maxt); +#else + if(!silent) printf("\tMin %s = %lld \n",port,mint); + if(!silent) printf("\tMax %s = %lld \n",port,maxt); +#endif + } + if(trflag) + { + if(num_child > 1) + { + if(use_thread) + { + port="threads"; + } + else + { + port="processes"; + } + } + +#ifdef NO_PRINT_LLD + if(!silent) printf("\tThroughput test with %ld %s\n", num_child,port); +#else + if(!silent) printf("\tThroughput test with %lld %s\n", num_child,port); +#endif + } + numrecs64 = (long long)(kilobytes64*1024)/reclen; + if (reclen > (long long)MAXBUFFERSIZE) { +#ifdef NO_PRINT_LLD + printf("Error: Maximum record length is %ld bytes\n", + MAXBUFFERSIZE); +#else + printf("Error: Maximum record length is %lld bytes\n", + (long long)MAXBUFFERSIZE); +#endif + exit(21); + } + if (reclen < (long long)MINBUFFERSIZE) { +#ifdef NO_PRINT_LLD + printf("Error: Minimum record length is %ld bytes\n", + MINBUFFERSIZE); +#else + printf("Error: Minimum record length is %lld bytes\n", + (long long)MINBUFFERSIZE); +#endif + exit(22); + } + /* Only bzero or fill that which you will use. The buffer is very large */ + if(verify ) + { + fill_buffer((char *)buffer,l_min(reclen,(long long)cache_size),(long long)pattern,(char)sverify,(long long)0); + if(pflag) + fill_buffer((char *)pbuffer,l_min(reclen,(long long)cache_size),(long long)pattern,(char)sverify,(long long)0); + if(mflag) + fill_buffer((char *)mbuffer,l_min(reclen,(long long)cache_size),(long long)pattern,(char)sverify,(long long)0); + } + else + { + bzero(buffer,(size_t)l_min(reclen,(long long)cache_size)); + } + +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=begin_proc; +#if defined( _HPUX_SOURCE ) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + orig_size=kilobytes64; + if(trflag){ + (void)multi_throughput_test(mint,maxt); + goto out; + } + if(trflag && (mint == maxt)){ + auto_mode=0; + throughput_test(); + goto out; + } + if (aflag) { + print_header(); + auto_test(); + goto out; + } + print_header(); + (void) begin(kilobytes64,reclen); +out: + if(r_traj_flag) + fclose(r_traj_fd); + if(w_traj_flag) + fclose(w_traj_fd); + if (!no_unlink) + { + if(check_filename(dummyfile[0])) + unlink(dummyfile[0]); /* delete the file */ + } + if(!silent) printf("\niozone test complete.\n"); + if(res_prob) + { + printf("Timer resolution is poor. Some small transfers may have \n"); + printf("reported inaccurate results. Sizes %ld Kbytes and below.\n", + (long)(rec_prob/(long long)1024)); + } + + if(Rflag && !trflag){ + dump_excel(); + } + return(0); +} + +#ifdef HAVE_ANSIC_C +void +record_command_line(int argc, char **argv) +#else +void +record_command_line(argc, argv) +int argc; +char **argv; +#endif +{ + int ix, len = 0; + + /* print and save the entire command line */ + if(!silent) printf("\tCommand line used:"); + for (ix=0; ix < argc; ix++) { + if(!silent) printf(" %s", argv[ix]); + if ((len + strlen(argv[ix])) < sizeof(command_line)) { + strcat (command_line, argv[ix]); + strcat (command_line, " "); + len += strlen(argv[ix]) + 1; + } + else { + printf ("Command line too long to save completely.\n"); + break; + } + } + if(!silent) printf("\n"); +} + +/*************************************************************************/ +/* BEGIN() */ +/* This is the main work horse. It is called from main and from */ +/* auto_test. The caller provides the size of file and the record length.*/ +/*************************************************************************/ +#ifdef HAVE_ANSIC_C +void +begin(off64_t kilos64,long long reclength) +#else +void +begin(kilos64,reclength) +off64_t kilos64; +long long reclength; +#endif +{ + long long num_tests,test_num,i,j; + long long data1[MAXTESTS], data2[MAXTESTS]; + num_tests = sizeof(func)/sizeof(char *); +#if defined(HAVE_PREAD) + if(!Eflag) + { +#if defined(HAVE_PREAD) && defined(HAVE_PREADV) + num_tests -= 4; +#else + num_tests -= 2; +#endif + if(mmapflag || async_flag) + { + num_tests -= 2; + } + } + else + { + if(mmapflag || async_flag) +#if defined(HAVE_PREAD) && defined(HAVE_PREADV) + num_tests -= 6; +#else + num_tests -= 4; +#endif + } +#else + if(mmapflag || async_flag) + { + num_tests -= 2; + } +#endif + + if(RWONLYflag) num_tests = 2; /* kcollins 8-21-96*/ + sync(); /* just in case there were some dirty */ + sync(); + kilobytes64=kilos64; + reclen=reclength; + numrecs64 = (kilobytes64*1024)/reclen; + store_value(kilobytes64); + if(r_traj_flag || w_traj_flag) + store_value((off64_t)0); + else + store_value((off64_t)(reclen/1024)); + +#ifdef NO_PRINT_LLD + if(!silent) printf("%16ld",kilobytes64); + if(r_traj_flag || w_traj_flag) + { + if(!silent) printf("%8ld",0); + } + else + { + if(!silent) printf("%8ld",reclen/1024); + } +#else + if(!silent) printf("%16lld",kilobytes64); + if(r_traj_flag || w_traj_flag) + { + if(!silent) printf("%8lld",(long long )0); + } + else + { + if(!silent) printf("%8lld",reclen/1024); + } +#endif + if(include_tflag) + { + for(i=0;i (long long)(min_file_size*1024)) { +#ifdef NO_PRINT_LLD + printf("Error: record length %ld is greater than filesize %ld KB\n ", + min_rec_size,min_file_size); +#else + printf("Error: record length %lld is greater than filesize %lld KB\n ", + min_rec_size,min_file_size); +#endif + exit(23); + } + + if(NOCROSSflag) xover = max_file_size; + + init_file_sizes(min_file_size, max_file_size); + del_record_sizes(); + orig_min_rec_size=min_rec_size; + orig_max_rec_size=max_rec_size; + init_record_sizes(min_rec_size, max_rec_size); + + for(kilosi=get_next_file_size((off64_t)0); kilosi>0; kilosi=get_next_file_size(kilosi)) + { + /****************************************************************/ + /* Start with record size of min_rec_size bytes and repeat the */ + /* test, multiplying the record size by MULTIPLIER each time, */ + /* until we reach max_rec_size. At the CROSSOVER we stop doing */ + /* small buffers as it takes forever and becomes very */ + /* un-interesting. */ + /****************************************************************/ + if(!rflag && !sflag && !yflag && !qflag) + if(kilosi > xover){ + min_rec_size = LARGE_REC; + mult = orig_min_rec_size/1024; + del_record_sizes(); + init_record_sizes(min_rec_size, max_rec_size); + /************************************/ + /* Generate dummy entries in the */ + /* Excel buffer for skipped */ + /* record sizes */ + /************************************/ + for(count1=min_rec_size; + (count1 != orig_min_rec_size) && ( + mult <= (kilosi*1024)) ; + count1=(count1>>1)) + { + current_x=0; + store_value((off64_t)kilosi); + store_value((off64_t)mult); + for(xx=0;xx<20;xx++) + store_value((off64_t)0); + mult=mult*2; + current_y++; + if(current_y>max_y) + max_y=current_y; + current_x=0; + } + } + + for (recszi=get_next_record_size((off64_t)0);recszi!=0;recszi=get_next_record_size(recszi)) + { + if(recszi > (kilosi*1024)) + break; + begin(kilosi, recszi ); + current_x=0; + current_y++; + } + } +} + + +/****************************************************************/ +/* */ +/* THROUGHPUT_TEST () Multi process throughput test */ +/* */ +/* Note: States for share memory barrier are: */ +/* 0 = Child not running or has finished. */ +/* 1 = Child is ready to begin. */ +/* 2 = Child is told to begin. */ +/****************************************************************/ +/* Data in shared memory format is: */ +/* */ +/* struct child_stats { */ +/* long long flag; Used to barrier */ +/* double walltime; Child's elapsed time */ +/* double cputime; Child's CPU time */ +/* double throughput; Child's throughput */ +/* double actual; Child's actual read/written */ +/* } */ +/* */ +/* There is an array of child_stat structures layed out in */ +/* shared memory. */ +/* */ +/****************************************************************/ + +#ifdef HAVE_ANSIC_C +void +throughput_test(void) +#else +void +throughput_test() +#endif +{ + char *unit; + double starttime1 = 0; + double jstarttime = 0; + double jtime = 0; + double walltime = 0; + double cputime = 0; + char *port; + char getout; + long long throughsize = KILOBYTES; + long long xx,xy,i; + long long xyz; + double ptotal; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + VOLATILE char *temp; + double min_throughput = 0; + double max_throughput = 0; + double avg_throughput = 0; + double min_xfer = 0; + + + toutputindex=0; + strcpy(&toutput[0][0],throughput_tests[0]); + ptotal=written_so_far=read_so_far=re_written_so_far=re_read_so_far=0 ; + + if(OPS_flag) + unit="ops"; + else + unit="KB"; + + if(!haveshm) + { + shmaddr=(struct child_stats *)alloc_mem((long long)SHMSIZE,(int)1); +#ifdef _64BIT_ARCH_ + if((long long)shmaddr==(long long)-1) +#else + if((long )shmaddr==(long)-1) +#endif + { + printf("\nShared memory not working\n"); + exit(24); + } + haveshm=(char*)shmaddr; + } + else + shmaddr=(struct child_stats *)haveshm; + + if(use_thread) + stop_flag = &stoptime; + else + { + temp = (char *)&shmaddr[0]; + stop_flag = (char *)&temp[(long long)SHMSIZE]-4; + } + for(xyz=0;xyzflag=CHILD_STATE_HOLD; + child_stat->actual=0; + child_stat->throughput=0; + child_stat->cputime=0; + child_stat->walltime=0; + } + *stop_flag = 0; + if(!sflag) + kilobytes64=throughsize; + if(!rflag) + reclen=(long long)4096; + if(aggflag) + kilobytes64=orig_size/num_child; + numrecs64 = (long long)(kilobytes64*1024)/reclen; + buffer=mainbuffer; + if(use_thread) + port="thread"; + else + port="process"; + if(w_traj_flag) + { +#ifdef NO_PRINT_LLD + if(!silent) printf("\tEach %s writes a %ld Kbyte file in telemetry controlled records\n", + port,kilobytes64); +#else + if(!silent) printf("\tEach %s writes a %lld Kbyte file in telemetry controlled records\n", + port,kilobytes64); +#endif + } + else + { +#ifdef NO_PRINT_LLD + if(!silent) printf("\tEach %s writes a %ld Kbyte file in %ld Kbyte records\n", + port,kilobytes64,reclen/1024); +#else + if(!silent) printf("\tEach %s writes a %lld Kbyte file in %lld Kbyte records\n", + port,kilobytes64,reclen/1024); +#endif + } + + if(fflag) /* Each child has a file name to write */ + for(xx=0;xxflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag=CHILD_STATE_BEGIN; + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); /* Start parents timer */ + goto waitout; + } + +waitout: + getout=0; + if((long long)getpid() == myid) { /* Parent only */ + starttime1 = time_so_far(); /* Wait for all children */ + for( i = 0; i < num_child; i++){ + child_stat = (struct child_stats *) &shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* get parents total time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + + total_kilos=0; + ptotal=0; + walltime = 0.0; + cputime = 0.0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; /* add up the children */ + ptotal += child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + /* Add up the cpu times of all children */ + cputime += child_stat->cputime; + + /* and find the child with the longest wall time */ + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + + for(xyz=0;xyzflag = CHILD_STATE_HOLD; /* Start children at state 0 (HOLD) */ + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %2ld initial writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2ld initial writers \t= %10.2f %s/sec\n",num_child,((double)(ptotal)/total_time),unit); +#else + if(!silent) printf("\tChildren see throughput for %2lld initial writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2lld initial writers \t= %10.2f %s/sec\n",num_child,((double)(ptotal)/total_time),unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU Utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Write"); + /**********************************************************/ + /*************** End of intitial writer *******************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + *stop_flag=0; + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } + + /**********************************************************/ + /* Re-write throughput performance test. ******************/ + /**********************************************************/ + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + total_kilos=0; + toutputindex=1; + strcpy(&toutput[1][0],throughput_tests[1]); + if(noretest) + { + store_dvalue( (double)0); + goto next0; + } + if((!distributed) || (distributed && master_iozone)) + start_monitor("Rewrite"); + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_REWRITE_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(28); + } + if(childids[xx] == 0){ +#ifdef _64BIT_ARCH_ + thread_rwrite_test((void *)xx); +#else + thread_rwrite_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_rwrite_test,xx); +#else + childids[xx] = mythread_create( thread_rwrite_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)myid,(long long)SIGTERM); + } + exit(29); + } + } + } +#endif + if((long long)myid == getpid()) + { + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + goto jump3; + } + +jump3: + getout=0; + if((long long)myid == getpid()){ /* Parent only here */ + for( i = 0; i < num_child; i++){ + child_stat=(struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents total time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + + + total_kilos=0; + ptotal=0; + + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { +/* + if (walltime < cputime_res) + walltime = 0.0; +*/ + if (cputime < cputime_res) + cputime = 0.0; + } + + for(xyz=0;xyzflag = CHILD_STATE_HOLD; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %2ld rewriters \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2ld rewriters \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %2lld rewriters \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2lld rewriters \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + *stop_flag=0; + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Rewrite"); + /**********************************************************/ + /*************** End of rewrite throughput ****************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next0: + if(include_tflag) + if(!(include_mask & (long long)READER_MASK)) + goto next1; + /**************************************************************/ + /*** Reader throughput tests **********************************/ + /**************************************************************/ + if((!distributed) || (distributed && master_iozone)) + start_monitor("Read"); + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[2]); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + total_kilos=0; + if(distributed) + { + use_thread=0; + if(master_iozone) + master_listen_socket=start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_READ_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(30); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_read_test((void *)xx); +#else + thread_read_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_read_test,xx); +#else + childids[xx] = mythread_create( thread_read_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(31); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + goto jumpend4; + } +jumpend4: + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %2ld readers \t\t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2ld readers \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %2lld readers \t\t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2lld readers \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Read"); + /**********************************************************/ + /*************** End of readers throughput ****************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } + + /**************************************************************/ + /*** ReReader throughput tests **********************************/ + /**************************************************************/ + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[3]); + if(noretest) + { + store_dvalue( (double)0); + goto next1; + } + if((!distributed) || (distributed && master_iozone)) + start_monitor("Reread"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_REREAD_TEST, numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(32); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_rread_test((void *)xx); +#else + thread_rread_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_rread_test,xx); +#else + childids[xx] = mythread_create( thread_rread_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(33); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + goto jumpend2; + } + +jumpend2: + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + min_throughput=max_throughput=min_xfer=0; + total_kilos=0; + ptotal=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { +/* + if (walltime < cputime_res) + walltime = 0.0; +*/ + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld re-readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld re-readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld re-readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld re-readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Reread"); + /**********************************************************/ + /*************** End of re-readers throughput ****************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } + +next1: + if(include_tflag) + if(!(include_mask & (long long)REVERSE_MASK)) + goto next2; + sync(); + sleep(2); + + /**************************************************************/ + /*** Reverse reader throughput tests **************************/ + /**************************************************************/ + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[4]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Revread"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_REVERSE_READ_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(34); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_reverse_read_test((void *)xx); +#else + thread_reverse_read_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_reverse_read_test,xx); +#else + childids[xx] = mythread_create( thread_reverse_read_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(35); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + /* walltime += child_stat->walltime; */ + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { +/* + if (walltime < cputime_res) + walltime = 0.0; +*/ + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld reverse readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld reverse readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld reverse readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld reverse readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Revread"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next2: + if(include_tflag) + if(!(include_mask & (long long)STRIDE_READ_MASK)) + goto next3; + /**************************************************************/ + /*** stride reader throughput tests **************************/ + /**************************************************************/ + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[5]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Strideread"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_STRIDE_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(36); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_stride_read_test((void *)xx); +#else + thread_stride_read_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_stride_read_test,xx); +#else + childids[xx] = mythread_create( thread_stride_read_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(37); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + /* walltime += child_stat->walltime; */ + cputime += child_stat->cputime; + /* Get the biggest walltime */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { +/* + if (walltime < cputime_res) + walltime = 0.0; +*/ + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld stride readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld stride readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld stride readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld stride readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Strideread"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } + /**************************************************************/ + /*** random reader throughput tests ***************************/ + /**************************************************************/ +next3: + if(include_tflag) + if(!(include_mask & (long long)RANDOM_RW_MASK)) + goto next4; + + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[6]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Randread"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_RANDOM_READ_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(38); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_ranread_test((void *)xx); +#else + thread_ranread_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_ranread_test,xx); +#else + childids[xx] = mythread_create( thread_ranread_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(39); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the biggest walltime */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld random readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld random readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld random readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld random readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Randread"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } + /**************************************************************/ + /*** mixed workload throughput tests ***************************/ + /**************************************************************/ +next4: + if(include_tflag) + if(!(include_mask & (long long)RANDOM_MIX_MASK)) + goto next5; + + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[7]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Mixed"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_RANDOM_MIX_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(38); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_mix_test((void *)xx); +#else + thread_mix_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_mix_test,xx); +#else + childids[xx] = mythread_create( thread_mix_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(39); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the biggest walltime */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld mixed workload \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld mixed workload \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld mixed workload \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld mixed workload \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Mixed"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next5: + /**************************************************************/ + /*** random writer throughput tests **************************/ + /**************************************************************/ + if(include_tflag) + if(!(include_mask & (long long)RANDOM_RW_MASK) || no_write) + goto next6; + + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[8]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Randwrite"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_RANDOM_WRITE_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(38); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_ranwrite_test((void *)xx); +#else + thread_ranwrite_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_ranwrite_test,xx); +#else + childids[xx] = mythread_create( thread_ranwrite_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(39); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the biggest walltime */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld random writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld random writers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld random writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld random writers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Randwrite"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next6: + /**************************************************************/ + /*** Pwrite writer throughput tests **************************/ + /**************************************************************/ +#ifndef HAVE_PREAD + goto next7; +#else + if(include_tflag) + if(!(include_mask & (long long)PWRITER_MASK)) + goto next7; + + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[9]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Pwrite"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_PWRITE_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(38); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_pwrite_test((void *)xx); +#else + thread_pwrite_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_pwrite_test,xx); +#else + childids[xx] = mythread_create( thread_pwrite_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(39); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the biggest walltime*/ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld pwrite writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld pwrite writers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld pwrite writers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld pwrite writers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Pwrite"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +#endif + /**************************************************************/ + /*** Pread reader throughput tests **************************/ + /**************************************************************/ +next7: + +#ifndef HAVE_PREAD + goto next8; +#else + if(include_tflag) + if(!(include_mask & (long long)PREADER_MASK)) + goto next8; + + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[10]); + if((!distributed) || (distributed && master_iozone)) + start_monitor("Pread"); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + sync(); + sleep(2); + *stop_flag=0; + total_kilos=0; + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_PREAD_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(38); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_pread_test((void *)xx); +#else + thread_pread_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + chid=xx; + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_pread_test,xx); +#else + childids[xx] = mythread_create( thread_pread_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(39); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + } + + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ /* wait for children to stop */ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the biggest walltime*/ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %ld pread readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %ld pread readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %lld pread readers \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %lld pread readers \t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Pread"); + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +#endif +next8: + if(include_tflag) + if(!(include_mask & (long long)FWRITER_MASK)) + goto next9; + /**************************************************************/ + /*** fwriter throughput tests *********************************/ + /**************************************************************/ + if((!distributed) || (distributed && master_iozone)) + start_monitor("Fwrite"); + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[11]); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + total_kilos=0; + if(distributed) + { + use_thread=0; + if(master_iozone) + master_listen_socket=start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_FWRITE_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(30); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_fwrite_test((void *)xx); +#else + thread_fwrite_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_fwrite_test,xx); +#else + childids[xx] = mythread_create( thread_fwrite_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(31); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + goto jumpend1; + } +jumpend1: + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %2ld fwriters \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2ld fwriters \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %2lld fwriters \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2lld fwriters \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Fwrite"); + /**********************************************************/ + /*************** End of fwrite throughput ****************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next9: + if(include_tflag) + if(!(include_mask & (long long)FREADER_MASK)) + goto next10; + /**************************************************************/ + /*** freader throughput tests *********************************/ + /**************************************************************/ + if((!distributed) || (distributed && master_iozone)) + start_monitor("Fread"); + toutputindex++; + strcpy(&toutput[toutputindex][0],throughput_tests[12]); + walltime = 0.0; + cputime = 0.0; + jstarttime=0; + total_kilos=0; + if(distributed) + { + use_thread=0; + if(master_iozone) + master_listen_socket=start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_FREAD_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(30); + } + if(childids[xx]==0){ +#ifdef _64BIT_ARCH_ + thread_fread_test((void *)xx); +#else + thread_fread_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ + if(!barray[xx]) + { + barray[xx]=(char *) alloc_mem((long long)(MAXBUFFERSIZE+cache_size),(int)0); + if(barray[xx] == 0) { + perror("Memory allocation failed:"); + exit(26); + } + barray[xx] =(char *)(((long)barray[xx] + cache_size ) & + ~(cache_size-1)); + } +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_fread_test,xx); +#else + childids[xx] = mythread_create( thread_fread_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + kill((pid_t)myid,(int)SIGTERM); + } + exit(31); + } + } + } +#endif + if(myid == (long long)getpid()){ + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + starttime1 = time_so_far(); + goto jumpend3; + } +jumpend3: + getout=0; + if(myid == (long long)getpid()){ /* Parent here */ + for( i = 0; i < num_child; i++){ + child_stat = (struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest running:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + if(!jstarttime) + jstarttime = time_so_far(); + } + jtime = (time_so_far()-jstarttime)-time_res; + if(jtime < (double).000001) + { + jtime=time_res; + } + } + total_time = (time_so_far() - starttime1)-time_res; /* Parents time */ + if(total_time < (double).000001) + { + total_time=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } +#ifdef JTIME + total_time=total_time-jtime;/* Remove the join time */ + if(!silent) printf("\nJoin time %10.2f\n",jtime); +#endif + + total_kilos=0; + ptotal=0; + min_throughput=max_throughput=min_xfer=0; + if(!silent) printf("\n"); + for(xyz=0;xyzthroughput; + ptotal+=child_stat->actual; + if(!min_xfer) + min_xfer=child_stat->actual; + if(child_stat->actual < min_xfer) + min_xfer=child_stat->actual; + if(!min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput < min_throughput) + min_throughput=child_stat->throughput; + if(child_stat->throughput > max_throughput) + max_throughput=child_stat->throughput; + cputime += child_stat->cputime; + /* Get the earliest start time and latest fini time to calc. elapsed time. */ + if (child_stat->walltime < child_stat->cputime) + child_stat->walltime = child_stat->cputime; + if (child_stat->walltime > walltime) + walltime = child_stat->walltime; + } + avg_throughput=total_kilos/num_child; + if(cpuutilflag) + { + if (cputime < cputime_res) + cputime = 0.0; + } + if(cpuutilflag) + store_times (walltime, cputime); /* Must be Before store_dvalue(). */ + store_dvalue(total_kilos); +#ifdef NO_PRINT_LLD + if(!silent) printf("\tChildren see throughput for %2ld freaders \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2ld freaders \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#else + if(!silent) printf("\tChildren see throughput for %2lld freaders \t= %10.2f %s/sec\n", num_child, total_kilos,unit); + if(!silent && !distributed) printf("\tParent sees throughput for %2lld freaders \t\t= %10.2f %s/sec\n", num_child, (double)(ptotal)/total_time,unit); +#endif + if(!silent) printf("\tMin throughput per %s \t\t\t= %10.2f %s/sec \n", port,min_throughput,unit); + if(!silent) printf("\tMax throughput per %s \t\t\t= %10.2f %s/sec\n", port,max_throughput,unit); + if(!silent) printf("\tAvg throughput per %s \t\t\t= %10.2f %s/sec\n", port,avg_throughput,unit); + if(!silent) printf("\tMin xfer \t\t\t\t\t= %10.2f %s\n", min_xfer,unit); + /* CPU% can be > 100.0 for multiple CPUs */ + if(cpuutilflag) + { + if(walltime == 0.0) + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 0.0); + } + else + { + if(!silent) printf("\tCPU utilization: Wall time %8.3f CPU time %8.3f CPU utilization %6.2f %%\n\n", + walltime, cputime, 100.0 * cputime / walltime); + } + } + if(Cflag) + { + for(xyz=0;xyzactual, unit, child_stat->throughput, unit, child_stat->walltime, + child_stat->cputime, cpu_util(child_stat->cputime, child_stat->walltime)); + } + else + { + if(!silent) printf("\tChild[%ld] xfer count = %10.2f %s, Throughput = %10.2f %s/sec\n", + (long)xyz, child_stat->actual, unit, child_stat->throughput, unit); + } + } + } + if((!distributed) || (distributed && master_iozone)) + stop_monitor("Fread"); + /**********************************************************/ + /*************** End of fread throughput ******************/ + /**********************************************************/ + sync(); + sleep(2); + if(restf) + sleep((int)rest_val); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); + cleanup_comm(); + } +next10: + sleep(2); /* You need this. If you stop and restart the + master_listen it will fail on Linux */ + if (!no_unlink) { + /**********************************************************/ + /* Cleanup all of the temporary files */ + /* This is not really a test. It behaves like a test so */ + /* it can unlink all of the same files that the other */ + /* tests left hanging around. */ + /**********************************************************/ + /* Hooks to start the distributed Iozone client/server code */ + if(distributed) + { + use_thread=0; /* Turn of any Posix threads */ + if(master_iozone) + master_listen_socket = start_master_listen(); + else + become_client(); + } + if(!use_thread) + { + for(xx = 0; xx< num_child ; xx++){ + chid=xx; + childids[xx] = start_child_proc(THREAD_CLEANUP_TEST,numrecs64,reclen); + if(childids[xx]==-1){ + printf("\nFork failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)childids[xy],(long long)SIGTERM); + } + exit(28); + } + if(childids[xx] == 0){ +#ifdef _64BIT_ARCH_ + thread_cleanup_test((void *)xx); +#else + thread_cleanup_test((void *)((long)xx)); +#endif + } + } + } +#ifndef NO_THREADS + else + { + for(xx = 0; xx< num_child ; xx++){ /* Create the children */ +#ifdef _64BIT_ARCH_ + childids[xx] = mythread_create( thread_cleanup_test,xx); +#else + childids[xx] = mythread_create( thread_cleanup_test,(void *)(long)xx); +#endif + if(childids[xx]==-1){ + printf("\nThread create failed\n"); + for(xy = 0; xy< xx ; xy++){ + Kill((long long)myid,(long long)SIGTERM); + } + exit(29); + } + } + } +#endif + if((long long)myid == getpid()) + { + if(distributed && master_iozone) + { + start_master_listen_loop((int) num_child); + } + for(i=0;iflag==CHILD_STATE_HOLD) + Poll((long long)1); + } + for(i=0;iflag = CHILD_STATE_BEGIN; /* tell children to go */ + if(delay_start!=0) + Poll((long long)delay_start); + if(distributed && master_iozone) + tell_children_begin(i); + } + } + + getout=0; + if((long long)myid == getpid()){ /* Parent only here */ + for( i = 0; i < num_child; i++){ + child_stat=(struct child_stats *)&shmaddr[i]; + if(distributed && master_iozone) + { + printf("\n\tTest cleanup:"); + wait_dist_join(); + break; + } + else + { + if(use_thread) + { + thread_join(childids[i],(void *)&pstatus); + } + else + { + wait(0); + } + } + } + } + + for(xyz=0;xyzflag = CHILD_STATE_HOLD; + } + sync(); + sleep(2); + if(distributed && master_iozone) + { + stop_master_listen(master_listen_socket); +#ifdef Windows + /* windows needs time before shutting down sockets */ + sleep(1); +#endif + cleanup_comm(); + } + } + /********************************************************/ + /* End of cleanup */ + /********************************************************/ + sync(); + if(!silent) printf("\n"); + if(!silent) printf("\n"); + return; +} + + +/************************************************************************/ +/* Time measurement routines. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +static double +time_so_far(void) +#else +static double +time_so_far() +#endif +{ +#ifdef Windows + LARGE_INTEGER freq,counter; + double wintime,bigcounter; + struct timeval tp; + /* For Windows the time_of_day() is useless. It increments in 55 milli + * second increments. By using the Win32api one can get access to the + * high performance measurement interfaces. With this one can get back + * into the 8 to 9 microsecond resolution. + */ + if(pit_hostname[0]){ + if (pit_gettimeofday(&tp, (struct timezone *) NULL, pit_hostname, + pit_service) == -1) + perror("pit_gettimeofday"); + return ((double) (tp.tv_sec)) + (((double) tp.tv_usec) * 0.000001 ); + } + else + { + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + bigcounter=(double)counter.HighPart *(double)0xffffffff + + (double)counter.LowPart; + wintime = (double)(bigcounter/(double)freq.LowPart); + return((double)wintime); + } +#else +#if defined (OSFV4) || defined(OSFV3) || defined(OSFV5) + struct timespec gp; + + if (getclock(TIMEOFDAY, (struct timespec *) &gp) == -1) + perror("getclock"); + return (( (double) (gp.tv_sec)) + + ( ((float)(gp.tv_nsec)) * 0.000000001 )); +#else + struct timeval tp; + + if(pit_hostname[0]){ + if (pit_gettimeofday(&tp, (struct timezone *) NULL, pit_hostname, pit_service) == -1) + perror("pit_gettimeofday"); + return ((double) (tp.tv_sec)) + (((double) tp.tv_usec) * 0.000001 ); + } + else + { + if (gettimeofday(&tp, (struct timezone *) NULL) == -1) + perror("gettimeofday"); + return ((double) (tp.tv_sec)) + (((double) tp.tv_usec) * 0.000001 ); + } +#endif +#endif +} + +/************************************************************************/ +/* FETCHIT () */ +/* */ +/* Routine to make the on chip data cache hot for this buffer. The */ +/* on chip cache may have been blown by other code in the application */ +/* or in the OS. Remember, on some machines, the data cache is direct */ +/* mapped and virtual indexed. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void fetchit(char *buffer,long long length) +#else +void fetchit(buffer,length) +char *buffer; +long long length; +#endif +{ + char *where; + volatile long long x[4]; + long long i; + where=(char *)buffer; + for(i=0;i<(length/cache_line_size);i++) + { + x[(i & 3)]=*(where); + where+=cache_line_size; + + } +} + +/************************************************************************/ +/* Verify that the buffer contains expected pattern */ +/************************************************************************/ +/* sverify == 0 means full check of pattern for every byte. */ +/* severify == 1 means partial check of pattern for each page. */ +/* sverify == 2 means no check, but partial touch for each page. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +long long +verify_buffer(volatile char *buffer,long long length, off64_t recnum, long long recsize,unsigned long long patt, + char sverify) +#else +long long +verify_buffer(buffer,length, recnum, recsize,patt,sverify) +char *buffer; +long long length; +off64_t recnum; +long long recsize; +unsigned long long patt; +char sverify; +#endif +{ + volatile unsigned long long *where; + volatile unsigned long long dummy; + long *de_ibuf, *de_obuf; + long long j,k; + off64_t file_position=0; + off64_t i; + char *where2; + char *pattern_ptr; + long long mpattern,xx2; + unsigned int seed; + unsigned long x; + unsigned long long value,value1; + unsigned long long a= 0x01020304; + unsigned long long b = 0x05060708; + unsigned long long c= 0x01010101; + unsigned long long d = 0x01010101; + unsigned long long pattern_buf; + int lite = 1; /* Only validate 1 long when running + de-deup validation */ + + value = (a<<32) | b; + value1 = (c<<32) | d; + + /* printf("Verify Sverify %d verify %d diag_v %d\n",sverify,verify,diag_v); */ + x=0; + xx2=chid; + if(share_file) + xx2=(long long)0; + mpattern=patt; + pattern_buf=patt; + where=(unsigned long long *)buffer; + if(sverify == 2) + { + for(i=0;i<(length);i+=page_size) + { + dummy = *where; + where+=(page_size/sizeof(long long)); + } + return(0); + } + if(dedup) + { + gen_new_buf((char *)dedup_ibuf,(char *)dedup_temp, (long)recnum, (int)length,(int)dedup, (int) dedup_interior, dedup_compress, 0); + de_ibuf = (long *)buffer; + de_obuf = (long *)dedup_temp; + if(lite) /* short touch to reduce intrusion */ + length = (long) sizeof(long); + for(i=0;i>%c<< Expecting >>%c<<\n", *where2,*pattern_ptr); + printf("Found pattern: Hex >>%x<< Expecting >>%x<<\n", *where2,*pattern_ptr); + return(1); + } + where++; + if(diag_v) + mpattern=mpattern+value1; + } + } + } + return(0); +} +/************************************************************************/ +/* Fill the buffer */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +fill_buffer(char *buffer,long long length,long long pattern,char sverify,long long recnum) +#else +void +fill_buffer(buffer,length,pattern,sverify,recnum) +char *buffer; +long long length; +long long pattern; +long long recnum; +char sverify; +#endif +{ + unsigned long long *where; + long long i,j,xx2; + long long mpattern; + unsigned int seed; + unsigned long x; + unsigned long long value,value1; + unsigned long long a = 0x01020304; + unsigned long long b = 0x05060708; + unsigned long long c = 0x01010101; + unsigned long long d = 0x01010101; + + value = (a << 32) | b; + value1 = (c << 32) | d; + + xx2=chid; + if(share_file) + xx2=(long long)0; + x=0; + mpattern=pattern; + /* printf("Fill: Sverify %d verify %d diag_v %d\n",sverify,verify,diag_v);*/ + if(dedup) + { + gen_new_buf((char *)dedup_ibuf,(char *)buffer, (long)recnum, (int)length,(int)dedup, (int) dedup_interior, dedup_compress, 1); + return; + } + if(diag_v) + { + /*if(client_iozone) + base_time=0; + */ + if(no_unlink) + base_time=0; + seed= (unsigned int)(base_time+xx2+recnum); + srand(seed); + mpattern=(long long)rand(); + mpattern=(mpattern<<48) | (mpattern<<32) | (mpattern<<16) | mpattern; + mpattern=mpattern+value; + } + where=(unsigned long long *)buffer; + if(sverify == 1) + { + for(i=0;i<(length);i+=page_size) + { + *where = (long long)((pattern<<32) | pattern); + where+=(page_size/sizeof(long long)); + /* printf("Filling page %lld \n",i/page_size);*/ + } + } + else + { + for(i=0;i<(length/cache_line_size);i++) + { + for(j=0;j<(cache_line_size/sizeof(long long));j++) + { + if(diag_v) + { + *where = (long long)(mpattern); + mpattern=mpattern+value1; + } + else + *where = (long long)((pattern<<32) | pattern); + where++; + } + } + } +} + +/************************************************************************/ +/* PURGEIT() */ +/* */ +/* Routine to make the on chip data cache cold for this buffer. */ +/* Remember, on some machines, the data cache is direct mapped and */ +/* virtual indexed. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void +purgeit(char *buffer,long long reclen) +#else +void +purgeit(buffer,reclen) +char *buffer; +long long reclen; +#endif +{ + char *where; + long rsize; + long tsize; + VOLATILE long long x[200]; + long i,cache_lines_per_rec; + long cache_lines_per_cache; + tsize = 200; + cache_lines_per_rec = (long)(reclen/cache_line_size); + cache_lines_per_cache = (long)(cache_size/cache_line_size); + rsize = (long)l_min((long long)cache_lines_per_rec,(long long)cache_lines_per_cache); +#ifdef _64BIT_ARCH_ + where=(char *)pbuffer + ((unsigned long long)buffer & (cache_size-1)); +#else + where=(char *)pbuffer + ((long)buffer & ((long)cache_size-1)); +#endif + for(i=0;i<(rsize);i++) + { + x[i%tsize]=*(where); + where+=cache_line_size; + + } +} + +#ifdef HAVE_ANSIC_C +void +prepage(char *buffer,long long reclen) +#else +void +prepage(buffer, reclen) +char *buffer; +long long reclen; +#endif +{ + char *where; + long long i; + where=(char *)buffer; + for(i=0;i<(reclen/cache_line_size);i++) + { + *(where)=PATTERN; + where+=cache_line_size; + } +} + +/************************************************************************/ +/* write_perf_test () */ +/* Write and re-write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void write_perf_test(off64_t kilo64,long long reclen ,long long *data1,long long *data2) +#else +void write_perf_test(kilo64,reclen ,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1; +long long *data2; +#endif +{ + double starttime1; + double writetime[2]; + double walltime[2], cputime[2]; + double qtime_start,qtime_stop; + double hist_time; + double compute_val = (double)0; +#ifdef unix + double qtime_u_start,qtime_u_stop; + double qtime_s_start,qtime_s_stop; +#endif + long long i,j; + off64_t numrecs64,traj_offset; + off64_t lock_offset=0; + long long Index = 0; + long long file_flags = 0; + long long traj_size; + unsigned long long writerate[2]; + off64_t filebytes64; + int ltest; + char *maddr; + char *wmaddr,*free_addr; + char *pbuff; + char *nbuff; + int fd,wval; +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + int test_foo; + +#ifdef unix + qtime_u_start=qtime_u_stop=0; + qtime_s_start=qtime_s_stop=0; +#endif + nbuff=wmaddr=free_addr=0; + traj_offset=0; + test_foo=0; + hist_time=qtime_start=qtime_stop=0; + maddr=0; + pbuff=mainbuffer; + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + numrecs64 = (kilo64*1024)/reclen; + filebytes64 = numrecs64*reclen; + } + + if(Q_flag && (!wol_opened)) + { + wol_opened++; + wqfd=fopen("wol.dat","a"); + if(wqfd==0) + { + printf("Unable to open wol.dat\n"); + exit(40); + } + fprintf(wqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + rwqfd=fopen("rwol.dat","a"); + if(rwqfd==0) + { + printf("Unable to open rwol.dat\n"); + exit(41); + } + fprintf(rwqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + fd = 0; + if(oflag) + file_flags = O_RDWR|O_SYNC; + else + file_flags = O_RDWR; +#if defined(O_DSYNC) + if(odsync) + file_flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) || defined(__FreeBSD__) || defined(__DragonFly__) + if(read_sync) + file_flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + file_flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + file_flags |=O_DIRECTIO; +#endif +#endif + +/* Sanity check */ +/* Some filesystems do not behave correctly and fail + * when this sequence is performned. This is a very + * bad thing. It breaks many applications and lurks + * around quietly. This code should never get + * triggered, but in the case of running iozone on + * an NFS client, the filesystem type on the server + * that is being exported can cause this failure. + * If this failure happens, then the NFS client is + * going to going to have problems, but the acutal + * problem is the filesystem on the NFS server. + * It's not NFS, it's the local filesystem on the + * NFS server that is not correctly permitting + * the sequence to function. + */ +/* _SUA_ Services for Unix Applications, under Windows + does not have a truncate, so this must be skipped */ +#if !defined(_SUA_) + if((fd = I_OPEN(filename, (int)O_CREAT|O_WRONLY,0))<0) + { + printf("\nCan not open temp file: %s\n", + filename); + perror("open"); + exit(44); + } + if(!notruncate) + { + if(check_filename(filename)) + { + wval=ftruncate(fd,0); + if(wval < 0) + { + printf("\n\nSanity check failed. Do not deploy this filesystem in a production environment !\n"); + exit(44); + } + } + close(fd); + + if(check_filename(filename)) + unlink(filename); + } +/* Sanity check */ + +#endif + if(noretest) + ltest=1; + else + ltest=2; + + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + pbuff = mbuffer + Index; + if(verify || dedup || dedup_interior) + fill_buffer(pbuff,reclen,(long long)pattern,sverify,(long long)0); + } + if(async_flag && no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,i); + if(purge) + purgeit(nbuff,reclen); + } + if(purge) + purgeit(pbuff,reclen); + if(Q_flag || hist_summary) + { + qtime_start=time_so_far(); + } + if(mmapflag) + { + wmaddr = &maddr[i*reclen]; + fill_area((long long*)pbuff,(long long*)wmaddr,(long long)reclen); + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_write_no_copy(gc, (long long)fd, nbuff, reclen, (i*reclen), depth,free_addr); + else + async_write(gc, (long long)fd, pbuff, reclen, (i*reclen), depth); + } + else + { +#if defined(Windows) + if(unbuffered) + { + WriteFile(hand, pbuff, reclen,(LPDWORD)&wval, + 0); + } + else + { +#endif + wval=write(fd, pbuff, (size_t ) reclen); + if(wval != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError writing block %ld, fd= %d\n", i, + fd); +#else + printf("\nError writing block %lld, fd= %d\n", i, + fd); +#endif + if(wval == -1) + perror("write"); + signal_handler(); + } +#if defined(Windows) + } +#endif + } + } + if(hist_summary) + { + qtime_stop=time_so_far(); + hist_time =(qtime_stop-qtime_start-time_res); + hist_insert(hist_time); + } + if(Q_flag) + { + qtime_stop=time_so_far(); + if(j==0) +#ifdef NO_PRINT_LLD + fprintf(wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); + else + fprintf(rwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); +#else + fprintf(wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); + else + fprintf(rwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((qtime_stop-qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + } +#ifdef unix + if(Q_flag) + { + qtime_u_stop=utime_so_far(); + qtime_s_stop=stime_so_far(); + if(j==0) + fprintf(wqfd,"\nSystem time %10.3f User time %10.3f Real %10.3f (seconds)\n", + (qtime_s_stop-qtime_s_start)/sc_clk_tck, + (qtime_u_stop-qtime_u_start)/sc_clk_tck, + time_so_far()-starttime1); + else + fprintf(rwqfd,"\nSystem time %10.3f User time %10.3f Real %10.3f (seconds)\n", + (qtime_s_stop-qtime_s_start)/sc_clk_tck, + (qtime_u_stop-qtime_u_start)/sc_clk_tck, + time_so_far()-starttime1); + } +#endif + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag){ + msync(maddr,(size_t)filebytes64,MS_SYNC); + } + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + } + if(file_lock) + if(mylockf((int) fd,(int)0,(int)0)) + printf("Unlock failed %d\n",errno); + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + writetime[j] = ((time_so_far() - starttime1)-time_res) + -compute_val; + if(writetime[j] < (double).000001) + { + writetime[j]=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC);/* Clean up before read starts */ + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + if(OPS_flag || MS_flag){ + filebytes64=w_traj_ops_completed; + /*filebytes64=filebytes64/reclen;*/ + }else + filebytes64=w_traj_bytes_completed; + + for(j=0;j>= 10; + } + data1[0]=writerate[0]; + if(noretest) + { + writerate[1]=(long long) 0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)writerate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)writerate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",writerate[0]); + if(!silent) printf("%8ld",writerate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%8lld",writerate[0]); + if(!silent) printf("%8lld",writerate[1]); + if(!silent) fflush(stdout); +#endif +} +/************************************************************************/ +/* fwrite_perf_test () */ +/* fWrite and fre-write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void fwrite_perf_test(off64_t kilo64,long long reclen ,long long *data1,long long *data2) +#else +void fwrite_perf_test(kilo64,reclen ,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1; +long long *data2; +#endif +{ + double starttime1; + double writetime[2]; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long i,j; + off64_t numrecs64; + long long Index = 0; + unsigned long long writerate[2]; + off64_t filebytes64; + FILE *stream = NULL; + int fd; + int wval; + int ltest; + char *how; + char *stdio_buf; + + if(mmapflag || async_flag) + return; + numrecs64 = (kilo64*1024)/reclen; + filebytes64 = numrecs64*reclen; + stdio_buf=(char *)malloc((size_t)reclen); + if(noretest) + ltest=1; + else + ltest=2; + + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + buffer = mbuffer + Index; + } + if((verify & diag_v) || dedup || dedup_interior) + fill_buffer(buffer,reclen,(long long)pattern,sverify,i); + if(purge) + purgeit(buffer,reclen); + if(fwrite(buffer, (size_t) reclen, 1, stream) != 1) + { +#ifdef NO_PRINT_LLD + printf("\nError fwriting block %ld, fd= %d\n", i, + fd); +#else + printf("\nError fwriting block %lld, fd= %d\n", i, + fd); +#endif + perror("fwrite"); + signal_handler(); + } + } + + if(include_flush) + { + fflush(stream); + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(include_close) + { + wval=fclose(stream); + if(wval==-1){ + perror("fclose"); + signal_handler(); + } + } + writetime[j] = ((time_so_far() - starttime1)-time_res) + -compute_val; + if(writetime[j] < (double).000001) + { + writetime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + wval=fflush(stream); + if(wval==-1){ + perror("fflush"); + signal_handler(); + } + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + wval=fclose(stream); + if(wval==-1){ + perror("fclose"); + signal_handler(); + } + } + + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)(int)rest_val); + } + free(stdio_buf); + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j>= 10; + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(noretest) + { + writerate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)writerate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)writerate[1]); + data1[0]=writerate[0]; +#ifdef NO_PRINT_LLD + if(!silent) printf("%9ld",writerate[0]); + if(!silent) printf("%9ld",writerate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%9lld",writerate[0]); + if(!silent) printf("%9lld",writerate[1]); + if(!silent) fflush(stdout); +#endif +} + +/************************************************************************/ +/* fread_perf_test */ +/* fRead and fre-read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void fread_perf_test(off64_t kilo64,long long reclen,long long *data1,long long *data2) +#else +void fread_perf_test(kilo64,reclen,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1,*data2; +#endif +{ + double starttime2; + double readtime[2]; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long j; + off64_t i,numrecs64; + long long Index = 0; + unsigned long long readrate[2]; + off64_t filebytes64; + FILE *stream = 0; + char *stdio_buf; + int fd,ltest; + + if(mmapflag || async_flag) + return; + numrecs64 = (kilo64*1024)/reclen; + filebytes64 = numrecs64*reclen; + stdio_buf=(char *)malloc((size_t)reclen); + + if(noretest) + ltest=1; + else + ltest=2; + + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + buffer = mbuffer + Index; + } + if(purge) + purgeit(buffer,reclen); + if(fread(buffer, (size_t) reclen,1, stream) != 1) + { +#ifdef _64BIT_ARCH_ +#ifdef NO_PRINT_LLD + printf("\nError freading block %lu %lx\n", i, + (unsigned long long)buffer); +#else + printf("\nError freading block %llu %llx\n", i, + (unsigned long long)buffer); +#endif +#else +#ifdef NO_PRINT_LLD + printf("\nError freading block %lu %lx\n", i, + (long)buffer); +#else + printf("\nError freading block %llu %lx\n", i, + (long)buffer); +#endif +#endif + perror("read"); + exit(54); + } + if(verify){ + if(verify_buffer(buffer,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + exit(55); + } + } + } + if(include_flush) + fflush(stream); + if(include_close) + { + fclose(stream); + } + readtime[j] = ((time_so_far() - starttime2)-time_res) + -compute_val; + if(readtime[j] < (double).000001) + { + readtime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + fflush(stream); + fclose(stream); + } + stream = NULL; + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + free(stdio_buf); + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j>= 10; + } + data1[0]=readrate[0]; + data2[0]=1; + if(noretest) + { + readrate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)readrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)readrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",readrate[0]); + if(!silent) printf("%9ld",readrate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%8lld",readrate[0]); + if(!silent) printf("%9lld",readrate[1]); + if(!silent) fflush(stdout); +#endif +} + +/************************************************************************/ +/* read_perf_test */ +/* Read and re-fread test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +read_perf_test(off64_t kilo64,long long reclen,long long *data1,long long *data2) +#else +void +read_perf_test(kilo64,reclen,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1,*data2; +#endif +{ + double starttime2; + double compute_val = (double)0; + double readtime[2]; + double walltime[2], cputime[2]; +#ifdef unix + double qtime_u_start,qtime_u_stop; + double qtime_s_start,qtime_s_stop; +#endif + long long j; + long long traj_size; + off64_t i,numrecs64,traj_offset; + off64_t lock_offset=0; + long long Index = 0; + unsigned long long readrate[2]; + off64_t filebytes64; + volatile char *buffer1; + char *nbuff; + char *maddr; + char *wmaddr; + int fd,open_flags; + int test_foo,ltest; + long wval; + double qtime_start,qtime_stop; + double hist_time; +#ifdef ASYNC_IO + struct cache *gc=0; + +#else + long long *gc=0; +#endif +#ifdef unix + qtime_u_start=qtime_u_stop=0; + qtime_s_start=qtime_s_stop=0; +#endif + hist_time=qtime_start=qtime_stop=0; + maddr=0; + traj_offset=0; + test_foo=0; + numrecs64 = (kilo64*1024)/reclen; + + open_flags = O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + open_flags |=O_RSYNC|O_SYNC; +#endif +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + open_flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + open_flags |=O_DIRECTIO; +#endif +#endif + if(r_traj_flag) + { + numrecs64=r_traj_ops; + filebytes64 = r_traj_fsize; + } else + filebytes64 = numrecs64*reclen; + fd = 0; + if(Q_flag && (!rol_opened)) + { + rol_opened++; + rqfd=fopen("rol.dat","a"); + if(rqfd==0) + { + printf("Unable to open rol.dat\n"); + exit(56); + } + fprintf(rqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + rrqfd=fopen("rrol.dat","a"); + if(rrqfd==0) + { + printf("Unable to open rrol.dat\n"); + exit(57); + } + fprintf(rrqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + /* + * begin real testing + */ + if(noretest) + ltest=1; + else + ltest=2; + + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if(purge) + purgeit(nbuff,reclen); + if(Q_flag || hist_summary) + qtime_start=time_so_far(); + if(mmapflag) + { + wmaddr=&maddr[i*reclen]; + fill_area((long long*)wmaddr,(long long*)nbuff,(long long)reclen); + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_read_no_copy(gc, (long long)fd, &buffer1, (i*reclen), reclen, + 1LL,(numrecs64*reclen),depth); + else + async_read(gc, (long long)fd, nbuff, (i*reclen),reclen, + 1LL,(numrecs64*reclen),depth); + } + else + { +#if defined(Windows) + if(unbuffered) + { + ReadFile(hand, nbuff, reclen,(LPDWORD)&wval, + 0); + } + else +#endif + wval=read((int)fd, (void*)nbuff, (size_t) reclen); + if(wval != reclen) + { +#ifdef _64BIT_ARCH_ +#ifdef NO_PRINT_LLD + printf("\nError reading block %ld %lx\n", i, + (unsigned long long)nbuff); +#else + printf("\nError reading block %lld %llx\n", i, + (unsigned long long)nbuff); +#endif +#else +#ifdef NO_PRINT_LLD + printf("\nError reading block %ld %x\n", i, + (long)nbuff); +#else + printf("\nError reading block %lld %lx\n", i, + (long)nbuff); +#endif +#endif + perror("read"); + exit(61); + } + } + } + if(verify) { + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + exit(62); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + exit(63); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + buffer1=0; + if(hist_summary) + { + qtime_stop=time_so_far(); + hist_time =(qtime_stop-qtime_start-time_res); + hist_insert(hist_time); + } + if(Q_flag) + { + qtime_stop=time_so_far(); + if(j==0) +#ifdef NO_PRINT_LLD + fprintf(rqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); + else + fprintf(rrqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); +#else + fprintf(rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); + else + fprintf(rrqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,(qtime_stop-qtime_start-time_res)*1000000,reclen); +#endif + } + r_traj_ops_completed++; + r_traj_bytes_completed+=reclen; + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)1)) + printf("Read unlock failed. %d\n",errno); +#ifdef unix + if(Q_flag) + { + qtime_u_stop=utime_so_far(); + qtime_s_stop=stime_so_far(); + if(j==0) + fprintf(rqfd,"\nSystem time %10.3f User time %10.3f Real %10.3f (seconds)\n", + (qtime_s_stop-qtime_s_start)/sc_clk_tck, + (qtime_u_stop-qtime_u_start)/sc_clk_tck, + time_so_far()-starttime2); + else + fprintf(rrqfd,"\nSystem time %10.3f User time %10.3f Real %10.3f (seconds)\n", + (qtime_s_stop-qtime_s_start)/sc_clk_tck, + (qtime_u_stop-qtime_u_start)/sc_clk_tck, + time_so_far()-starttime2); + } +#endif +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + readtime[j] = ((time_so_far() - starttime2)-time_res)-compute_val; + if(readtime[j] < (double).000001) + { + readtime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + if(OPS_flag || MS_flag){ + filebytes64=r_traj_ops_completed; + /*filebytes64=filebytes64/reclen;*/ + } else + filebytes64=r_traj_bytes_completed; + + for(j=0;j>= 10; + + } + data1[0]=readrate[0]; + data2[0]=1; + if(noretest) + { + readrate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)readrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)readrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%9ld",readrate[0]); + if(!silent) printf("%9ld",readrate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%9lld",readrate[0]); + if(!silent) printf("%9lld",readrate[1]); + if(!silent) fflush(stdout); +#endif +} + + +/************************************************************************/ +/* random_perf_test */ +/* Random read and write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void random_perf_test(off64_t kilo64,long long reclen,long long *data1,long long *data2) +#else +void random_perf_test(kilo64,reclen,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1, *data2; +#endif +{ + double randreadtime[2]; + double starttime2; + double walltime[2], cputime[2]; + double compute_val = (double)0; +#if defined (bsd4_2) || defined(Windows) + long long rand1,rand2,rand3; +#endif + unsigned long long big_rand; + long long j; + off64_t i,numrecs64; + long long Index=0; + int flags; + unsigned long long randreadrate[2]; + off64_t filebytes64; + off64_t lock_offset=0; + volatile char *buffer1; + char *wmaddr,*nbuff; + char *maddr,*free_addr; + int fd,wval; + long long *recnum= 0; +#if defined(VXFS) || defined(solaris) + int test_foo=0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif +#ifdef MERSENNE + unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}; + unsigned long long length=4; +#endif + + maddr=free_addr=0; + numrecs64 = (kilo64*1024)/reclen; +#ifdef MERSENNE + init_by_array64(init, length); +#else +#ifdef bsd4_2 + srand(0); +#else +#ifdef Windows + srand(0); +#else + srand48(0); +#endif +#endif +#endif + recnum = (long long *)malloc(sizeof(*recnum)*numrecs64); + if (recnum){ + /* pre-compute random sequence based on + Fischer-Yates (Knuth) card shuffle */ + for(i = 0; i < numrecs64; i++){ + recnum[i] = i; + } + for(i = 0; i < numrecs64; i++) { + long long tmp; +#ifdef MERSENNE + big_rand=genrand64_int64(); +#else +#ifdef bsd4_2 + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else +#ifdef Windows + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else + big_rand = lrand48(); +#endif +#endif +#endif + big_rand = big_rand%numrecs64; + tmp = recnum[i]; + recnum[i] = recnum[big_rand]; + recnum[big_rand] = tmp; + } + } + else + { + fprintf(stderr,"Random uniqueness fallback.\n"); + } + flags = O_RDWR; +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + fd=0; + if(oflag) + flags |= O_SYNC; +#if defined(O_DSYNC) + if(odsync) + flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + filebytes64 = numrecs64*reclen; + for( j=0; j<2; j++ ) + { + if(j==0) + flags |=O_CREAT; + if (no_write && (j == 1)) + continue; + if(cpuutilflag) + { + walltime[j] = time_so_far(); + cputime[j] = cputime_so_far(); + } + if(Uflag) /* Unmount and re-mount the mountpoint */ + { + purge_buffer_cache(); + } + if((fd = I_OPEN(filename, ((int)flags),0640))<0){ + printf("\nCan not open temporary file for read/write\n"); + perror("open"); + exit(66); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,filebytes64,0,PROT_READ|PROT_WRITE); + } + nbuff=mainbuffer; + if(fetchon) + fetchit(nbuff,reclen); +#ifdef MERSENNE + init_by_array64(init, length); +#else +#ifdef bsd4_2 + srand(0); +#else +#ifdef Windows + srand(0); +#else + srand48(0); +#endif +#endif +#endif + compute_val=(double)0; + starttime2 = time_so_far(); + if ( j==0 ){ + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if(purge) + purgeit(nbuff,reclen); + if (recnum) { + offset64 = reclen * (long long)recnum[i]; + } + else + { + +#ifdef MERSENNE + big_rand =genrand64_int64(); + offset64 = reclen * (big_rand%numrecs64); +#else +#ifdef bsd4_2 + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); + offset64 = reclen * (big_rand%numrecs64); +#else +#ifdef Windows + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); + offset64 = reclen * (big_rand%numrecs64); +#else + offset64 = reclen * (lrand48()%numrecs64); +#endif +#endif +#endif + } + + if( !(h_flag || k_flag || mmapflag)) + { + if(I_LSEEK( fd, offset64, SEEK_SET )<0) + { + perror("lseek"); + exit(68); + }; + } + if(rlocking) + { + lock_offset=I_LSEEK(fd,0,SEEK_CUR); + mylockr((int) fd, (int) 1, (int)1, + lock_offset, reclen); + } + if(mmapflag) + { + wmaddr=&maddr[offset64]; + fill_area((long long*)wmaddr,(long long*)nbuff,(long long)reclen); + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_read_no_copy(gc, (long long)fd, &buffer1, offset64,reclen, + 0LL,(numrecs64*reclen),depth); + else + async_read(gc, (long long)fd, nbuff, (offset64),reclen, + 0LL,(numrecs64*reclen),0LL); + } + else + { + if(read(fd, (void *)nbuff, (size_t)reclen) != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError reading block at %ld\n", + offset64); +#else + printf("\nError reading block at %lld\n", + offset64); +#endif + perror("read"); + exit(70); + } + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)offset64/reclen,reclen,(long long)pattern,sverify)){ + exit(71); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)offset64/reclen,reclen,(long long)pattern,sverify)){ + exit(72); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + if(rlocking) + { + lock_offset=I_LSEEK(fd,0,SEEK_CUR); + mylockr((int) fd, (int) 1, (int)1, + lock_offset, reclen); + } + } + } + else + { + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if (recnum) { + offset64 = reclen * (long long)recnum[i]; + } + else + { +#ifdef bsd4_2 + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); + offset64 = reclen * (big_rand%numrecs64); +#else +#ifdef Windows + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); + offset64 = reclen * (big_rand%numrecs64); +#else + offset64 = reclen * (lrand48()%numrecs64); +#endif +#endif + } + if(async_flag && no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,offset64/reclen); + } + if(purge) + purgeit(nbuff,reclen); + + if((verify & diag_v) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,offset64/reclen); + + if (!(h_flag || k_flag || mmapflag)) + { + I_LSEEK( fd, offset64, SEEK_SET ); + } + if(rlocking) + { + lock_offset=I_LSEEK(fd,0,SEEK_CUR); + mylockr((int) fd, (int) 1, (int)0, + lock_offset, reclen); + } + if(mmapflag) + { + wmaddr=&maddr[offset64]; + fill_area((long long*)nbuff,(long long*)wmaddr,(long long)reclen); + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_write_no_copy(gc, (long long)fd, nbuff, reclen, offset64, + depth,free_addr); + else + async_write(gc, (long long)fd, nbuff, reclen, offset64, depth); + } + else + { + wval=write(fd, nbuff,(size_t)reclen); + if(wval != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError writing block at %ld\n", + offset64); +#else + printf("\nError writing block at %lld\n", + offset64); +#endif + if(wval==-1) + perror("write"); + signal_handler(); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + } + } /* end of modifications *kcollins:2-5-96 */ +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC);/* Clean up before read starts running */ + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + randreadtime[j] = ((time_so_far() - starttime2)-time_res)- + compute_val; + if(randreadtime[j] < (double).000001) + { + randreadtime[j]=time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)filebytes64,MS_SYNC);/* Clean up before read starts running */ + } + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j<2;j++) + { + if(no_write && (j==1)) + { + randreadrate[1] = 0.0; + continue; + } + if(MS_flag) + { + randreadrate[j]=1000000.0*(randreadtime[j] / (double)filebytes64); + continue; + } + else + { + randreadrate[j] = + (unsigned long long) ((double) filebytes64 / randreadtime[j]); + } + if(!(OPS_flag || MS_flag)) + randreadrate[j] >>= 10; + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)randreadrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)randreadrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",randreadrate[0]); + if(!silent) printf("%8ld",randreadrate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%8lld",randreadrate[0]); + if(!silent) printf("%8lld",randreadrate[1]); + if(!silent) fflush(stdout); +#endif + if(recnum) + free(recnum); +} + +/************************************************************************/ +/* reverse_perf_test */ +/* Reverse read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void reverse_perf_test(off64_t kilo64,long long reclen,long long *data1,long long *data2) +#else +void reverse_perf_test(kilo64,reclen,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1,*data2; +#endif +{ + double revreadtime[2]; + double starttime2; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long j; + off64_t i,numrecs64; + long long Index = 0; + unsigned long long revreadrate[2]; + off64_t filebytes64; + off64_t lock_offset=0; + int fd,open_flags; + char *maddr,*wmaddr; + volatile char *buffer1; + int ltest; + char *nbuff; +#if defined(VXFS) || defined(solaris) + int test_foo=0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + maddr=wmaddr=0; + open_flags=O_RDONLY; +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + open_flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + open_flags |=O_DIRECTIO; +#endif +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + open_flags |=O_RSYNC|O_SYNC; +#endif + numrecs64 = (kilo64*1024)/reclen; + filebytes64 = numrecs64*reclen; + fd = 0; + if(noretest) + ltest=1; + else + ltest=2; + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + + if(purge) + purgeit(nbuff,reclen); + if(mmapflag) + { + wmaddr = &maddr[((numrecs64-1)-i)*reclen]; + fill_area((long long*)wmaddr,(long long*)nbuff,(long long)reclen); + } + else + if(async_flag) + { + if(no_copy_flag) + async_read_no_copy(gc, (long long)fd, &buffer1, ((((numrecs64-1)-i)*reclen)), + reclen, -1LL,(numrecs64*reclen),depth); + else + async_read(gc, (long long)fd, nbuff, (((numrecs64-1)-i)*reclen), + reclen,-1LL,(numrecs64*reclen),depth); + }else + { + if(read((int)fd, (void*)nbuff, (size_t) reclen) != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError reading block %ld\n", i); +#else + printf("\nError reading block %lld\n", i); +#endif + perror("read"); + exit(79); + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)(numrecs64-1)-i,reclen,(long long)pattern,sverify)){ + exit(80); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)(numrecs64-1)-i,reclen,(long long)pattern,sverify)){ + exit(81); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + if (!(h_flag || k_flag || mmapflag)) + { + I_LSEEK( fd, (off64_t)-2*reclen, SEEK_CUR ); + } + } +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + close(fd); + } + revreadtime[j] = ((time_so_far() - starttime2)-time_res) + -compute_val; + if(revreadtime[j] < (double).000001) + { + revreadtime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + close(fd); + } + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j>= 10; + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)revreadrate[0]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",revreadrate[0]); +#else + if(!silent) printf("%8lld",revreadrate[0]); +#endif + if(!silent) fflush(stdout); +} + +/************************************************************************/ +/* rewriterec_perf_test */ +/* Re-write the same record */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void rewriterec_perf_test(off64_t kilo64 ,long long reclen,long long *data1,long long *data2) +#else +void rewriterec_perf_test(kilo64 ,reclen,data1,data2) +off64_t kilo64; +long long reclen; +long long *data1,*data2; +#endif +{ + double writeintime; + double starttime1; + double walltime, cputime; + double compute_val = (double)0; + long long i; + off64_t numrecs64; + long long flags; + long long Index=0; + unsigned long long writeinrate; + off64_t filebytes64; + off64_t lock_offset=0; + int fd,wval; + char *maddr; + char *wmaddr,*free_addr,*nbuff; +#if defined(VXFS) || defined(solaris) + int test_foo=0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + walltime=cputime=0; + maddr=wmaddr=free_addr=nbuff=0; + numrecs64 = (kilo64*1024)/reclen; + filebytes64 = numrecs64*reclen; +/* flags = O_RDWR|O_CREAT|O_TRUNC;*/ + flags = O_RDWR|O_CREAT; +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + if(oflag) + flags |= O_SYNC; +#if defined(O_DSYNC) + if(odsync) + flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif +/* + if (!no_unlink) + { + if(check_filename(filename)) + unlink(filename); + } +*/ + if(Uflag) /* Unmount and re-mount the mountpoint */ + { + purge_buffer_cache(); + } + if((fd = I_OPEN(filename, (int)flags,0640))<0) + { + printf("\nCan not open temporary file %s for write.\n",filename); + perror("open"); + exit(84); + } +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,filebytes64,1,PROT_READ|PROT_WRITE); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + nbuff=mainbuffer; + mbuffer=mainbuffer; + if(fetchon) + fetchit(nbuff,reclen); + /* + wval=write(fd, nbuff, (size_t) reclen); + if(wval != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError writing block %ld, fd= %d\n", 0, fd); +#else + printf("\nError writing block %lld, fd= %d\n", 0, fd); +#endif + if(wval==-1) + perror("write"); + signal_handler(); + } + */ + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = time_so_far(); + cputime = cputime_so_far(); + } + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if(async_flag && no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + } + if((verify & diag_v) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + if(purge) + purgeit(nbuff,reclen); + if(mmapflag) + { + wmaddr = &maddr[i*reclen]; + fill_area((long long*)nbuff,(long long*)wmaddr,(long long)reclen); + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_write_no_copy(gc, (long long)fd, nbuff, reclen, (i*reclen), depth,free_addr); + else + async_write(gc, (long long)fd, nbuff, reclen, (i*reclen), depth); + } + else + { + wval=write(fd, nbuff, (size_t) reclen); + if(wval != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError writing block %ld, fd= %d\n", i, fd); +#else + printf("\nError writing block %lld, fd= %d\n", i, fd); +#endif + if(wval==-1) + perror("write"); + signal_handler(); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + if (!(h_flag || k_flag || mmapflag)) + { + I_LSEEK(fd, (off64_t)0,SEEK_SET); + } + } + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC);/* Clean up before read starts running */ + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + } + if(include_close) + { + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + writeintime = ((time_so_far() - starttime1)-time_res)- + compute_val; + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + walltime = time_so_far() - walltime; + if (walltime < cputime) + walltime = cputime; + } + if(writeintime < (double).000001) + { + writeintime= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(!include_close) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC);/* Clean up before read starts running */ + else + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + + } + + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + if(MS_flag) + { + writeinrate=1000000.0*(writeintime / (double)filebytes64); + } + else + { + writeinrate = (unsigned long long) ((double) filebytes64 / writeintime); + } + if(!(OPS_flag || MS_flag)) + writeinrate >>= 10; + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime, cputime); + store_value((off64_t)writeinrate); +#ifdef NO_PRINT_LLD + if(!silent) printf(" %8ld",writeinrate); +#else + if(!silent) printf(" %8lld",writeinrate); +#endif + if(!silent) fflush(stdout); + if(restf) + sleep((int)rest_val); +} + +/************************************************************************/ +/* read_stride_perf_test */ +/* Read with a constant stride test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void read_stride_perf_test(off64_t kilos64,long long reclen,long long *data1,long long *data2) +#else +void read_stride_perf_test(kilos64,reclen,data1,data2) +off64_t kilos64; +long long reclen; +long long *data1, *data2; +#endif +{ + double strideintime; + double starttime1; + double compute_val = (double)0; + double walltime, cputime; + off64_t numrecs64,current_position; + long long Index = 0; + off64_t i,savepos64 = 0; + unsigned long long strideinrate; + off64_t filebytes64; + off64_t lock_offset=0; + long long uu; + off64_t stripewrap=0; + int fd,open_flags; + volatile char *buffer1; + char *nbuff; + char *maddr; + char *wmaddr; +#if defined(VXFS) || defined(solaris) + int test_foo=0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + walltime=cputime=0; + nbuff=maddr=wmaddr=0; + open_flags=O_RDONLY; +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + open_flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + open_flags |=O_DIRECTIO; +#endif +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + open_flags |=O_RSYNC|O_SYNC; +#endif + next64 = (off64_t)0; + numrecs64 = (kilos64*1024)/reclen; + filebytes64 = numrecs64*reclen; + if(Uflag) /* Unmount and re-mount the mountpoint */ + { + purge_buffer_cache(); + } + if((fd = I_OPEN(filename, (int)open_flags, 0640))<0) + { + printf("\nCan not open temporary file for read\n"); + perror("open"); + exit(86); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,filebytes64,0,PROT_READ); + } + fsync(fd); + current_position=0; + nbuff=mainbuffer; + mbuffer=mainbuffer; + if(fetchon) + fetchit(nbuff,reclen); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = time_so_far(); + cputime = cputime_so_far(); + } + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if(purge) + purgeit(nbuff,reclen); + if(verify) + { + savepos64=current_position/reclen; + } + if(mmapflag) + { + wmaddr = &maddr[current_position]; + fill_area((long long*)wmaddr,(long long*)nbuff,(long long)reclen); + } + else + { + if(async_flag) + { + if(no_copy_flag) + async_read_no_copy(gc, (long long)fd, &buffer1, current_position, + reclen, stride,(numrecs64*reclen),depth); + else + async_read(gc, (long long)fd, nbuff, current_position, reclen, + stride,(numrecs64*reclen),depth); + } + else + { + if((uu=read((int)fd, (void*)nbuff, (size_t) reclen)) != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError reading block %ld, fd= %d Filename %s Read returned %ld\n", i, fd,filename,uu); + printf("\nSeeked to %ld Reclen = %ld\n", savepos64,reclen); +#else + printf("\nError reading block %lld, fd= %d Filename %s Read returned %lld\n", i, fd,filename,uu); + printf("\nSeeked to %lld Reclen = %lld\n", savepos64,reclen); +#endif + perror("read"); + exit(88); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + current_position+=reclen; + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen, (off64_t)savepos64 ,reclen,(long long)pattern,sverify)){ + exit(89); + } + } + else + { + if(verify_buffer(nbuff,reclen, (off64_t)savepos64 ,reclen,(long long)pattern,sverify)){ + exit(90); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + + /* This is a bit tricky. The goal is to read with a stride through + the file. The problem is that you need to touch all of the file + blocks. So.. the first pass through you read with a constant stride. + When you hit eof then add 1 to the beginning offset of the next + time through the file. The rub here is that eventually adding + 1 will cause the initial start location plus the STRIDE to be + beyond eof. So... when this happens the initial offset for the + next pass needs to be set back to 0. + */ + if(current_position + (stride * reclen) >= (numrecs64 * reclen)-reclen) + { + current_position=0; + + stripewrap++; + + if(numrecs64 <= stride) + { + current_position=0; + } + else + { + current_position = (off64_t)((stripewrap)%numrecs64)*reclen; + } + + if (!(h_flag || k_flag || mmapflag)) + { + if(I_LSEEK(fd,current_position,SEEK_SET)<0) + { + perror("lseek"); + exit(91); + } + } + } + else + { + current_position+=(stride*reclen)-reclen; + if (!(h_flag || k_flag || mmapflag)) + { + if(I_LSEEK(fd,current_position,SEEK_SET)<0) + { + perror("lseek"); + exit(93); + }; + } + } + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + walltime = time_so_far() - walltime; + if (walltime < cputime) + walltime = cputime; + } + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + close(fd); + } + strideintime = ((time_so_far() - starttime1)-time_res) + -compute_val; + if(strideintime < (double).000001) + { + strideintime= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(!include_close) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + close(fd); + } + + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + if(MS_flag) + { + strideinrate=1000000.0*(strideintime / (double)filebytes64); + } + else + { + strideinrate = (unsigned long long) ((double) filebytes64 / strideintime); + } + if(!(OPS_flag || MS_flag)) + strideinrate >>= 10; + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime, cputime); + store_value((off64_t)strideinrate); +#ifdef NO_PRINT_LLD + if(!silent) printf(" %8ld",strideinrate); +#else + if(!silent) printf(" %8lld",strideinrate); +#endif + if(!silent) fflush(stdout); + if(restf) + sleep((int)rest_val); +} + +#ifdef HAVE_PREAD +/************************************************************************/ +/* pwrite_perf_test */ +/* pwrite and re-write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void pwrite_perf_test(off64_t kilos64,long long reclen,long long *data1,long long *data2) +#else +void pwrite_perf_test(kilos64,reclen,data1,data2) +off64_t kilos64; +long long reclen; +long long *data1,*data2; +#endif +{ + double pwritetime[2]; + double starttime1; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long i,j; + long long Index = 0; + unsigned long long pwriterate[2]; + off64_t filebytes64; + long long flags_here = 0; + int fd,ltest,wval; + off64_t numrecs64,traj_offset; + off64_t lock_offset=0; + long long traj_size; +#if defined(VXFS) || defined(solaris) + int test_foo=0; +#endif + char *nbuff; + + traj_offset=0; + nbuff=mainbuffer; + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + numrecs64 = (kilos64*1024)/reclen; + filebytes64 = numrecs64*reclen; + } + fd = 0; + if(oflag){ + flags_here = O_SYNC|O_RDWR; + } + else + { + flags_here = O_RDWR; + } +#if defined(O_DSYNC) + if(odsync) + flags_here |= O_DSYNC; +#endif + +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags_here |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags_here |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags_here |=O_DIRECTIO; +#endif +#endif + if(noretest) + ltest=1; + else + ltest=2; + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + if((verify && diag_v) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,i); + if(purge) + purgeit(nbuff,reclen); + if(I_PWRITE(fd, nbuff, reclen, traj_offset) != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError pwriting block %ld, fd= %d\n", i, + fd); +#else + printf("\nError pwriting block %lld, fd= %d\n", i, + fd); +#endif + perror("pwrite"); + signal_handler(); + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + } + if(include_flush) + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(include_close) + { + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + pwritetime[j] = ((time_so_far() - starttime1)-time_res) + -compute_val; + if(pwritetime[j] < (double).000001) + { + pwritetime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + wval=close(fd); + if(wval==-1){ + perror("close"); + signal_handler(); + } + } + + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j>= 10; + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(noretest) + { + pwriterate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)pwriterate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)pwriterate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",pwriterate[0]); + if(!silent) printf("%9ld",pwriterate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%8lld",pwriterate[0]); + if(!silent) printf("%9lld",pwriterate[1]); + if(!silent) fflush(stdout); +#endif +} + +/************************************************************************/ +/* pread_perf_test */ +/* pread and re-pread test */ +/************************************************************************/ +#ifdef HAVE_PREAD +#ifdef HAVE_ANSIC_C +void pread_perf_test(off64_t kilos64,long long reclen,long long *data1,long long *data2) +#else +void pread_perf_test(kilos64,reclen,data1,data2) +off64_t kilos64; +long long reclen; +long long *data1, *data2; +#endif +{ + double starttime2; + double preadtime[2]; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long numrecs64,i; + long long j; + long long Index = 0; + unsigned long long preadrate[2]; + off64_t filebytes64; + off64_t lock_offset=0; + int fd,open_flags; + int ltest; + off64_t traj_offset; + long long traj_size; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + char *nbuff; + + traj_offset=0; + nbuff=mainbuffer; + open_flags=O_RDONLY; +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + open_flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + open_flags |=O_DIRECTIO; +#endif +#endif +#if defined(_HPUX_SOURCE) || defined(linux) || defined(__FreeBSD__) || defined(__DragonFly__) + if(read_sync) + open_flags |=O_RSYNC|O_SYNC; +#endif + if(r_traj_flag) + { + filebytes64 = r_traj_fsize; + numrecs64=r_traj_ops; + } + else + { + numrecs64 = (kilos64*1024)/reclen; + filebytes64 = numrecs64*reclen; + } + + fd = 0; + if(noretest) + ltest=1; + else + ltest=2; + for( j=0; j (MAXBUFFERSIZE-reclen)) + Index=0; + nbuff = mbuffer + Index; + } + + if(purge) + purgeit(nbuff,reclen); + if(I_PREAD(((int)fd), ((void*)nbuff), ((size_t) reclen),traj_offset ) + != reclen) + { +#ifdef NO_PRINT_LLD + printf("\nError reading block %ld %lx\n", i,(unsigned long)nbuff); +#else + printf("\nError reading block %lld %lx\n", i,(unsigned long)nbuff); +#endif + perror("pread"); + exit(103); + } + if(verify){ + if(verify_buffer(nbuff,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + exit(104); + } + } + if(rlocking) + { + lock_offset=I_LSEEK(fd,0,SEEK_CUR); + mylockr((int) fd, (int) 1, (int)1, + lock_offset, reclen); + } + } + if(include_flush) + fsync(fd); + if(include_close) + close(fd); + preadtime[j] = ((time_so_far() - starttime2)-time_res) + -compute_val; + if(preadtime[j] < (double).000001) + { + preadtime[j]= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(!include_close) + { + fsync(fd); + close(fd); + } + + if(cpuutilflag) + { + cputime[j] = cputime_so_far() - cputime[j]; + if (cputime[j] < cputime_res) + cputime[j] = 0.0; + walltime[j] = time_so_far() - walltime[j]; + if (walltime[j] < cputime[j]) + walltime[j] = cputime[j]; + } + if(restf) + sleep((int)rest_val); + } + + filebytes64 = numrecs64*reclen; + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + for(j=0;j>= 10; + } + if(noretest) + { + preadrate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)preadrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)preadrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%8ld",preadrate[0]); + if(!silent) printf("%9ld",preadrate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%8lld",preadrate[0]); + if(!silent) printf("%9lld",preadrate[1]); + if(!silent) fflush(stdout); +#endif +} +#endif + +#ifdef HAVE_PREADV +/************************************************************************/ +/* pwritev_perf_test */ +/* pwritev and re-pwritev test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void pwritev_perf_test(off64_t kilos64,long long reclen,long long *data1,long long *data2) +#else +void pwritev_perf_test(kilos64,reclen,data1,data2) +off64_t kilos64; +long long reclen; +long long *data1,*data2; +#endif +{ + int wval; + double starttime1; + double pwritevtime[2]; + double walltime[2], cputime[2]; + double compute_val = (double)0; + long long list_off[PVECMAX]; + long long numvecs,j,xx; + unsigned long long pwritevrate[2]; + off64_t filebytes64,i; + off64_t numrecs64; + int fd,ltest; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + long long flags_here; + char *nbuff; +#ifdef MERSENNE + unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4; +#endif + + numrecs64 = (kilos64*1024)/reclen; + filebytes64 = numrecs64*reclen; + nbuff = mainbuffer; + fd = 0; + if(oflag) + flags_here = O_SYNC|O_RDWR; + else + flags_here = O_RDWR; +#if defined(O_DSYNC) + if(odsync) + flags_here |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags_here |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags_here |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags_here |=O_DIRECTIO; +#endif +#endif + + if(noretest) + ltest=1; + else + ltest=2; + + for( j=0; j>= 10; + } + if(noretest) + { + pwritevrate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)pwritevrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)pwritevrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%9ld",pwritevrate[0]); + if(!silent) printf("%10ld",pwritevrate[1]); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%9lld",pwritevrate[0]); + if(!silent) printf("%10lld",pwritevrate[1]); + if(!silent) fflush(stdout); +#endif +} +#endif + + +#ifdef HAVE_PREADV +/**************************************************************************/ +/* create_list() */ +/* Creates a list of PVECMAX entries that are unique (non over lapping ). */ +/* Each of these offsets are then used in a vector (preadv/pwritev) */ +/**************************************************************************/ +#ifdef HAVE_ANSIC_C +void create_list(long long *list_off, long long reclen, off64_t numrecs64) +#else +void create_list(list_off, reclen, numrecs64) +long long *list_off; +long long reclen; +off64_t numrecs64; +#endif +{ + long long found,i,j; + long long numvecs; + unsigned long long big_rand = -1; +#if defined (bsd4_2) || defined(Windows) + long long rand1,rand2,rand3; +#endif + + numvecs = PVECMAX; + if(numrecs64< numvecs) + numvecs = numrecs64; + for(j=0;j>= 10; + } + if(noretest) + { + preadvrate[1]=(long long)0; + if(cpuutilflag) + { + walltime[1]=0.0; + cputime[1]=0.0; + } + } + + /* Must save walltime & cputime before calling store_value() for each/any cell.*/ + if(cpuutilflag) + store_times(walltime[0], cputime[0]); + store_value((off64_t)preadvrate[0]); + if(cpuutilflag) + store_times(walltime[1], cputime[1]); + store_value((off64_t)preadvrate[1]); +#ifdef NO_PRINT_LLD + if(!silent) printf("%10ld",preadvrate[0]); + if(!silent) printf("%9ld",preadvrate[1]); + if(!silent) printf("\n"); + if(!silent) fflush(stdout); +#else + if(!silent) printf("%10lld",preadvrate[0]); + if(!silent) printf("%9lld",preadvrate[1]); + if(!silent) printf("\n"); + if(!silent) fflush(stdout); +#endif +} +#endif + +/************************************************************************/ +/* print_header() */ +/* Prints the header for the output from Iozone. */ +/************************************************************************/ +#endif +#ifdef HAVE_ANSIC_C +void print_header(void) +#else +void print_header() +#endif +{ + if(Eflag) + { + if(!silent) printf(CONTROL_STRING2, + " ", + " ", + " ", + " ", + " ", + " ", + "random", /*kcollins:2-5-96*/ + "random", /*kcollins:2-5-96*/ + "bkwd", + "record", + "stride", + " ", + " ", + " ", + " " +#ifdef HAVE_PREAD + ," ", + " ", + " ", + " " +#ifdef HAVE_PREADV + ," ", + " ", + " ", + " " +#endif +#endif + ); + if(!silent) printf(CONTROL_STRING2, + "KB", + "reclen", + "write", + "rewrite", + "read", + "reread", + "read", /*kcollins:2-5-96*/ + "write", /*kcollins:2-5-96*/ + "read", + "rewrite", + "read", + "fwrite", + "frewrite", + "fread", + "freread" +#ifdef HAVE_PREAD + ,"pwrite", + "repwrite", + "pread", + "repread" +#ifdef HAVE_PREADV + ,"pwritev", + "repwritev", + "preadv", + "repreadv" +#endif +#endif + ); + }else + if(RWONLYflag){ /*kcollins 8-21-96*/ + if(!silent) printf(CONTROL_STRING4, /*kcollins 8-21-96*/ + " ", /*kcollins 8-21-96*/ + " ", /*kcollins 8-21-96*/ + " ", /*kcollins 8-21-96*/ + " ", /*kcollins 8-21-96*/ + " ", /*kcollins 8-21-96*/ + " " /*kcollins 8-21-96*/ + ); /*kcollins 8-21-96*/ + if(!silent) printf(CONTROL_STRING4, /*kcollins 8-21-96*/ + "KB", /*kcollins 8-21-96*/ + "reclen", /*kcollins 8-21-96*/ + "write", /*kcollins 8-21-96*/ + "rewrite", /*kcollins 8-21-96*/ + "read", /*kcollins 8-21-96*/ + "reread" /*kcollins 8-21-96*/ + ); /*kcollins 8-21-96*/ + }else{ + if(!(mmapflag || async_flag)) + { + if(!silent) printf(CONTROL_STRING3, + " ", + " ", + " ", + " ", + " ", + " ", + "random", /*kcollins:2-5-96*/ + "random", /*kcollins:2-5-96*/ + "bkwd", + "record", + "stride", + "", + "", + "", + "" + ); + if(!silent) printf(CONTROL_STRING3, + "KB", + "reclen", + "write", + "rewrite", + "read", + "reread", + "read", /*kcollins:2-5-96*/ + "write", /*kcollins:2-5-96*/ + "read", + "rewrite", + "read", + "fwrite", + "frewrite", + "fread", + "freread" + ); + }else + { + if(!silent) printf(CONTROL_STRING3, + " ", + " ", + " ", + " ", + " ", + " ", + "random", /*kcollins:2-5-96*/ + "random", /*kcollins:2-5-96*/ + "bkwd", + "record", + "stride", + "", + "", + "", + "" + ); + if(!silent) printf(CONTROL_STRING3, + "KB", + "reclen", + "write", + "rewrite", + "read", + "reread", + "read", /*kcollins:2-5-96*/ + "write", /*kcollins:2-5-96*/ + "read", + "rewrite", + "read", + "", + "", + "", + "" + ); + } + } +} + +/************************************************************************/ +/* store_value() */ +/* Stores a value in an in memory array. Used by the report function */ +/* to re-organize the output for Excel */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +store_value(off64_t value) +#else +store_value(value) +off64_t value; +#endif +{ + report_array[current_x][current_y]=value; + current_x++; + if(current_x > max_x) + max_x=current_x; + if(current_y > max_y) + max_y=current_y; + if(max_x >= MAX_X) + { + printf("\nMAX_X too small\n"); + exit(117); + } + if(max_y >= MAX_Y) + { + printf("\nMAX_Y too small\n"); + exit(118); + } +} + +/************************************************************************/ +/* store_times() */ +/* Stores runtime (walltime & cputime) in a memory array. */ +/* Used by the report function to re-organize the output for Excel */ +/* For now, must be called immediately before calling store_value() for */ +/* each cell. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +store_times(double walltime, double cputime) +#else +store_times(walltime, cputime) +double walltime, cputime; +#endif +{ + runtimes [current_x][current_y].walltime = walltime; + runtimes [current_x][current_y].cputime = cputime; + runtimes [current_x][current_y].cpuutil = cpu_util(cputime, walltime); +} + +/************************************************************************/ +/* dump_report() */ +/* Dumps the Excel report on standard output. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void dump_report(long long who) +#else +dump_report(who) +long long who; +#endif +{ + long long i; + off64_t current_file_size; + off64_t rec_size; + + if(bif_flag) + bif_column++; + if(!silent) printf(" "); + + /* + * Need to reconstruct the record size list + * as the crossover in -a changed the list. + */ + del_record_sizes(); + init_record_sizes(orig_min_rec_size, orig_max_rec_size); + + for(rec_size=get_next_record_size(0); rec_size <= orig_max_rec_size; + rec_size=get_next_record_size(rec_size)) + { + if (rec_size == 0) break; + if(bif_flag) + do_float(bif_fd,(double)(rec_size/1024),bif_row,bif_column++); +#ifdef NO_PRINT_LLD + if(!silent) printf(" %c%ld%c",'"',rec_size/1024,'"'); +#else + if(!silent) printf(" %c%lld%c",'"',rec_size/1024,'"'); +#endif + } + if(!silent) printf("\n"); + if(bif_flag) + { + bif_column=0; + bif_row++; + } + + current_file_size = report_array[0][0]; + if(bif_flag) + { + do_float(bif_fd,(double)(current_file_size),bif_row,bif_column++); + } +#ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",'"',current_file_size,'"'); +#else + if(!silent) printf("%c%lld%c ",'"',current_file_size,'"'); +#endif + for(i=0;i<=max_y;i++){ + if(report_array[0][i] != current_file_size){ + if(!silent) printf("\n"); + current_file_size = report_array[0][i]; + if(bif_flag) + { + bif_row++; + bif_column=0; + do_float(bif_fd,(double)(current_file_size),bif_row,bif_column++); + } +#ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",'"',current_file_size,'"'); +#else + if(!silent) printf("%c%lld%c ",'"',current_file_size,'"'); +#endif + } + if(bif_flag) + do_float(bif_fd,(double)(report_array[who][i]),bif_row,bif_column++); +#ifdef NO_PRINT_LLD + if(!silent) printf(" %ld ",report_array[who][i]); +#else + if(!silent) printf(" %lld ",report_array[who][i]); +#endif + } + if(bif_flag) + { + bif_row++; + bif_column=0; + } + if(!silent) printf("\n"); +} + +/************************************************************************/ +/* Wrapper that dumps each of the collected data sets. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void dump_excel(void) +#else +void dump_excel() +#endif +{ + if(bif_flag) + { + bif_fd=create_xls(bif_filename); + do_label(bif_fd,command_line,bif_row++,bif_column); + do_label(bif_fd," ",bif_row++,bif_column); + do_label(bif_fd,"The top row is records sizes, the left column is file sizes",bif_row++,bif_column); + } + if(!silent) printf("Excel output is below:\n"); + + if ((!include_tflag) || (include_mask & (long long)WRITER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Writer Report",bif_row++,bif_column); + if(!silent) printf("\n%cWriter report%c\n",'"','"'); + dump_report(2); + if(bif_flag) + do_label(bif_fd,"Re-writer Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-writer report%c\n",'"','"'); + dump_report(3); + } + + if ((!include_tflag) || (include_mask & (long long)READER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Reader Report",bif_row++,bif_column); + if(!silent) printf("\n%cReader report%c\n",'"','"'); + dump_report(4); + if(bif_flag) + do_label(bif_fd,"Re-reader Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Reader report%c\n",'"','"'); + dump_report(5); + } + + if ((!include_tflag) || (include_mask & (long long)RANDOM_RW_MASK)) { + if(bif_flag) + do_label(bif_fd,"Random Read Report",bif_row++,bif_column); + if(!silent) printf("\n%cRandom read report%c\n",'"','"'); + dump_report(6); + if(bif_flag) + do_label(bif_fd,"Random Write Report",bif_row++,bif_column); + if(!silent) printf("\n%cRandom write report%c\n",'"','"'); + dump_report(7); + } + + if ((!include_tflag) || (include_mask & (long long)REVERSE_MASK)) { + if(bif_flag) + do_label(bif_fd,"Backward Read Report",bif_row++,bif_column); + if(!silent) printf("\n%cBackward read report%c\n",'"','"'); + dump_report(8); + } + + if ((!include_tflag) || (include_mask & (long long)REWRITE_REC_MASK)) { + if(bif_flag) + do_label(bif_fd,"Record Rewrite Report",bif_row++,bif_column); + if(!silent) printf("\n%cRecord rewrite report%c\n",'"','"'); + dump_report(9); + } + + if ((!include_tflag) || (include_mask & (long long)STRIDE_READ_MASK)) { + if(bif_flag) + do_label(bif_fd,"Stride Read Report",bif_row++,bif_column); + if(!silent) printf("\n%cStride read report%c\n",'"','"'); + dump_report(10); + } + + if ((!include_tflag) || (include_mask & (long long)FWRITER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Fwrite Report",bif_row++,bif_column); + if(!silent) printf("\n%cFwrite report%c\n",'"','"'); + dump_report(11); + if(bif_flag) + do_label(bif_fd,"Re-fwrite Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Fwrite report%c\n",'"','"'); + dump_report(12); + } + + if ((!include_tflag) || (include_mask & (long long)FREADER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Fread Report",bif_row++,bif_column); + if(!silent) printf("\n%cFread report%c\n",'"','"'); + dump_report(13); + if(bif_flag) + do_label(bif_fd,"Re-fread Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Fread report%c\n",'"','"'); + dump_report(14); + } + +#ifdef HAVE_PREAD + if(Eflag) + { + if ((!include_tflag) || (include_mask & (long long)PWRITER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Pwrite Report",bif_row++,bif_column); + if(!silent) printf("\n%cPwrite report%c\n",'"','"'); + dump_report(15); + if(bif_flag) + do_label(bif_fd,"Re-pwrite Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Pwrite report%c\n",'"','"'); + dump_report(16); + } + + if ((!include_tflag) || (include_mask & (long long)PREADER_MASK)) { + if(bif_flag) + do_label(bif_fd,"Pread Report",bif_row++,bif_column); + if(!silent) printf("\n%cPread report%c\n",'"','"'); + dump_report(17); + if(bif_flag) + do_label(bif_fd,"Re-pread Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Pread report%c\n",'"','"'); + dump_report(18); + } + +#ifdef HAVE_PREADV + if ((!include_tflag) || (include_mask & (long long)PWRITEV_MASK)) { + if(bif_flag) + do_label(bif_fd,"Pwritev Report",bif_row++,bif_column); + if(!silent) printf("\n%cPwritev report%c\n",'"','"'); + dump_report(19); + if(bif_flag) + do_label(bif_fd,"Re-pwritev Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Pwritev report%c\n",'"','"'); + dump_report(20); + } + + if ((!include_tflag) || (include_mask & (long long)PREADV_MASK)) { + if(bif_flag) + do_label(bif_fd,"Preadv Report",bif_row++,bif_column); + if(!silent) printf("\n%cPreadv report%c\n",'"','"'); + dump_report(21); + if(bif_flag) + do_label(bif_fd,"Re-preadv Report",bif_row++,bif_column); + if(!silent) printf("\n%cRe-Preadv report%c\n",'"','"'); + dump_report(22); + } +#endif + } +#endif + if (cpuutilflag) + dump_cputimes(); + if(bif_flag) + close_xls(bif_fd); +} + +/************************************************************************/ +/* dump_times() */ +/* Dumps the Excel CPU times report to stdout and to the bif file. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void dump_times(long long who) +#else +dump_times(who) +long long who; +#endif +{ + long long i; + off64_t current_file_size; + off64_t rec_size; + + if (bif_flag) + bif_column++; + if(!silent) printf(" "); + + for (rec_size = get_next_record_size(0); rec_size <= orig_max_rec_size; + rec_size = get_next_record_size(rec_size)) + { + if (rec_size == 0) break; + if (bif_flag) + do_float(bif_fd, (double)(rec_size/1024), bif_row, bif_column++); +#ifdef NO_PRINT_LLD + if(!silent) printf(" %c%ld%c",'"',rec_size/1024,'"'); +#else + if(!silent) printf(" %c%lld%c",'"',rec_size/1024,'"'); +#endif + } + if(!silent) printf("\n"); + if (bif_flag) + { + bif_column=0; + bif_row++; + } + + current_file_size = report_array[0][0]; + if (bif_flag) + { + do_float(bif_fd, (double)(current_file_size), bif_row, bif_column++); + } +#ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",'"',current_file_size,'"'); +#else + if(!silent) printf("%c%lld%c ",'"',current_file_size,'"'); +#endif + for (i = 0; i <= max_y; i++) { + if (report_array[0][i] != current_file_size) { + if(!silent) printf("\n"); + current_file_size = report_array[0][i]; + if (bif_flag) + { + bif_row++; + bif_column=0; + do_float(bif_fd, (double)(current_file_size), bif_row, bif_column++); + } +#ifdef NO_PRINT_LLD + if(!silent) printf("%c%ld%c ",'"',current_file_size,'"'); +#else + if(!silent) printf("%c%lld%c ",'"',current_file_size,'"'); +#endif + } + if (bif_flag) + do_float(bif_fd, (double)(runtimes [who][i].cpuutil), bif_row, bif_column++); + if(!silent) printf(" %6.2f", runtimes [who][i].cpuutil); + } + if(!silent) printf("\n"); + if (bif_flag) + { + bif_row++; + bif_column=0; + } +} + +/************************************************************************/ +/* Wrapper that dumps each of the collected data sets. */ +/* This one dumps only the collected CPU times. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void dump_cputimes(void) +#else +void dump_cputimes(void) +#endif +{ + bif_row++; + bif_column = 0; + + if ((!include_tflag) || (include_mask & (long long)WRITER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Writer CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cWriter CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(2); + if(bif_flag) + do_label(bif_fd, "Re-writer CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-writer CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(3); + } + + if ((!include_tflag) || (include_mask & (long long)READER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Reader CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cReader CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(4); + if(bif_flag) + do_label(bif_fd, "Re-reader CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Reader CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(5); + } + + if ((!include_tflag) || (include_mask & (long long)RANDOM_RW_MASK)) { + if(bif_flag) + do_label(bif_fd, "Random Read CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRandom read CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(6); + if(bif_flag) + do_label(bif_fd, "Random Write CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRandom write CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(7); + } + + if ((!include_tflag) || (include_mask & (long long)REVERSE_MASK)) { + if(bif_flag) + do_label(bif_fd, "Backward Read CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cBackward read CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(8); + } + + if ((!include_tflag) || (include_mask & (long long)REWRITE_REC_MASK)) { + if(bif_flag) + do_label(bif_fd, "Record Rewrite CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRecord rewrite CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(9); + } + + if ((!include_tflag) || (include_mask & (long long)STRIDE_READ_MASK)) { + if(bif_flag) + do_label(bif_fd, "Stride Read CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cStride read CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(10); + } + + if ((!include_tflag) || (include_mask & (long long)FWRITER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Fwrite CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cFwrite CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(11); + if(bif_flag) + do_label(bif_fd, "Re-fwrite CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Fwrite CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(12); + } + + if ((!include_tflag) || (include_mask & (long long)FREADER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Fread CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cFread CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(13); + if(bif_flag) + do_label(bif_fd, "Re-fread CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Fread CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(14); + } + +#ifdef HAVE_PREAD + if(Eflag) + { + if ((!include_tflag) || (include_mask & (long long)PWRITER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Pwrite CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cPwrite CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(15); + if(bif_flag) + do_label(bif_fd, "Re-pwrite CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Pwrite CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(16); + } + + if ((!include_tflag) || (include_mask & (long long)PREADER_MASK)) { + if(bif_flag) + do_label(bif_fd, "Pread CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cPread CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(17); + if(bif_flag) + do_label(bif_fd, "Re-pread CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Pread CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(18); + } + +#ifdef HAVE_PREADV + if ((!include_tflag) || (include_mask & (long long)PWRITEV_MASK)) { + if(bif_flag) + do_label(bif_fd, "Pwritev CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cPwritev CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(19); + if(bif_flag) + do_label(bif_fd, "Re-pwritev CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Pwritev CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(20); + } + + if ((!include_tflag) || (include_mask & (long long)PREADV_MASK)) { + if(bif_flag) + do_label(bif_fd, "Preadv CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cPreadv CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(21); + if(bif_flag) + do_label(bif_fd, "Re-preadv CPU utilization report (Zero values should be ignored)", bif_row++, bif_column); + if(!silent) printf("\n%cRe-Preadv CPU utilization report (Zero values should be ignored)%c\n",'"','"'); + dump_times(22); + } +#endif + } +#endif +} + +/************************************************************************/ +/* Internal memory allocation mechanism. Uses shared memory or mmap */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +char * +alloc_mem(long long size, int shared_flag) +#else +char * +alloc_mem(size,shared_flag) +long long size; +int shared_flag; +#endif +{ + long long size1; + char *addr,*dumb; + int shmid; + int tfd; + long long tmp; +#if defined(solaris) + char mmapFileName[]="mmap_tmp_XXXXXX"; +#endif + + tmp = 0; + dumb = (char *)0; + tfd=0; + size1=l_max(size,page_size); + if(!distributed) + { + if(!trflag) + { + addr=(char *)malloc((size_t)size1); + return(addr); + } + if(use_thread) + { + addr=(char *)malloc((size_t)size1); + return(addr); + } + } + if(!shared_flag) + { + addr=(char *)malloc((size_t)size1); + return(addr); + } +#ifdef SHARED_MEM + size1=l_max(size,page_size); + size1=(size1 +page_size) & ~(page_size-1); + shmid=(int)shmget((key_t)(IPC_PRIVATE), (size_t)size1 , (int)(IPC_CREAT|0666)); + if(shmid < (int)0) + { + printf("\nUnable to get shared memory segment(shmget)\n"); +#ifdef NO_PRINT_LLD + printf("shmid = %d, size = %ld, size1 = %lu, Error %d\n",shmid,size,(size_t)size1,errno); +#else + printf("shmid = %d, size = %lld, size1 = %lu, Error %d\n",shmid,size,(unsigned long)size1,errno); +#endif + exit(119); + } + /*addr = (char *)shmat(shmid, 0, SHM_W);*/ + /* Some systems will not take the above but + * will default to read/write if no flags + * are provided. (AIX) + * The POSIX standard states that if SHM_RDONLY + * is not specified then it will be read/write. + */ + addr = (char *)shmat((int)shmid, 0, 0); +#ifdef _64BIT_ARCH_ + if((long long)addr == (long long)-1) +#else + if((long)addr == (long)-1) +#endif + { + printf("\nUnable to get shared memory segment\n"); + printf("..Error %d\n",errno); + exit(120); + } + shmctl(shmid, IPC_RMID, 0); + return(addr); +#else + + size1=l_max(size,page_size); + size1=(size1 +page_size) & ~(page_size-1); +#if defined(bsd4_2) && !defined(macosx) + if((tfd = creat("mmap.tmp", 0666))<0) + { + printf("Unable to create tmp file\n"); + exit(121); + } + addr=(char *)mmap(0,&size1,PROT_WRITE|PROT_READ, + MAP_ANON|MAP_SHARED, tfd, 0); + unlink("mmap.tmp"); +#else + + +#if defined(solaris) + tfd=mkstemp(mmapFileName); + if(tfd < 0) + { + printf("Unable to create tmp file\n"); + exit(121); + } + dumb=(char *)malloc((size_t)size1); + bzero(dumb,size1); + write(tfd,dumb,size1); + free(dumb); + addr=(char *)mmap(0,(size_t)size1,PROT_WRITE|PROT_READ, + MAP_SHARED, tfd, 0); + unlink(mmapFileName); +#else +#if defined(SCO) || defined(SCO_Unixware_gcc) || defined(Windows) + char mmapFileName[]="mmap_tmp_XXXXXX"; + tfd=mkstemp(mmapFileName); + if(tfd < 0) + { + printf("Unable to create tmp file\n"); + exit(121); + } + dumb=(char *)malloc((size_t)size1); + bzero(dumb,size1); + write(tfd,dumb,size1); + free(dumb); + addr=(char *)mmap(0,(size_t)size1,PROT_WRITE|PROT_READ, + MAP_SHARED, tfd, 0); + unlink(mmapFileName); +#else + addr=(char *)mmap(0,(size_t)size1,PROT_WRITE|PROT_READ, + MAP_ANONYMOUS|MAP_SHARED, -1, 0); +#endif +#endif +#endif + if((char *)addr == (char *)-1) + { + printf("\nUnable to get memory segment\n"); + printf("Error %d\n",errno); + exit(122); + } + if(debug1) + printf("Got shared memory for size %d\n",size1); + + return(addr); +#endif +} + +/************************************************************************/ +/* Implementation of poll() function. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void Poll(long long time1) +#else +void Poll(time1) +long long time1; +#endif +{ + struct timeval howlong; + howlong.tv_sec=(int)(time1/100000); + howlong.tv_usec=(int)(time1%100000); /* Get into u.s. */ + select(0, 0, 0, 0, &howlong); +} + +/************************************************************************/ +/* Implementation of max() function. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +long long l_max(long long one,long long two) +#else +long long l_max(one,two) +long long one,two; +#endif +{ + if(one > two) + return(one); + else + return(two); +} + +/************************************************************************/ +/* Internal Kill. With stonewalling disabled, kill does nothing */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void Kill(long long pid,long long sig) +#else +void Kill(pid,sig) +long long pid,sig; +#endif +{ + if(!xflag) + { + /*printf("Killing %d\n",pid);*/ + kill((pid_t)pid,(int)sig); + } +} +/************************************************************************/ +/* Implementation of min() function. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +long long l_min(long long num1,long long num2) +#else +long long l_min(num1,num2) +long long num1,num2; +#endif +{ + if(num1 >= num2) + return num2; + else + return num1; +} + +/************************************************************************/ +/* Routine to call throughput tests many times. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void +multi_throughput_test(long long mint,long long maxt) +#else +void multi_throughput_test(mint, maxt) +long long mint, maxt; +#endif +{ + int *t_rangeptr, *t_rangecurs; + int *saveptr = (int *)0; + int tofree = 0; + long long i; + if(t_count == 0){ + t_count = (int) maxt - mint + 1; + t_rangeptr = (int *) malloc((size_t)sizeof(int)*t_count); + saveptr = t_rangeptr; + tofree = 1; + t_rangecurs = t_rangeptr; + for(i=mint; i<= maxt; i++) { + *(t_rangecurs++) = i; + } + } + else { + t_rangeptr = &t_range[0]; + } + for(i=0; i < t_count; i++){ + num_child = *(t_rangeptr++); + current_client_number=0; /* Need to start with 1 */ + throughput_test(); + current_x=0; + current_y++; + } + if(Rflag) + dump_throughput(); + if(tofree) + free(saveptr); + +} + + + +/************************************************************************/ +/* Routine to purge the buffer cache by unmounting drive. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +purge_buffer_cache() +#else +purge_buffer_cache() +#endif +{ + char command[1024]; + int ret,i; + strcpy(command,"umount "); + strcat(command, mountname); + /* + umount might fail if the device is still busy, so + retry unmounting several times with increasing delays + */ + for (i = 1; i < 200; ++i) { + ret = system(command); + if (ret == 0) + break; + sleep(i); /* seconds */ + } + strcpy(command,"mount "); + strcat(command, mountname); + /* + mount might fail if the device is still busy, so + retry mounting several times with increasing delays + */ + for (i = 1; i < 10; ++i) { + ret = system(command); + if (ret == 0) + break; + sleep(i); /* seconds */ + } +} + +/************************************************************************/ +/* Thread write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_write_test(void *x) +#else +void * +thread_write_test( x) +#endif +{ + + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; + double walltime, cputime; + double compute_val = (double)0; + float delay = (float)0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + off64_t traj_offset; + off64_t lock_offset=0; + off64_t save_offset=0; + long long flags,traj_size; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + FILE *w_traj_fd; + int fd; + long long recs_per_buffer; + long long stopped,i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long xx,xx2; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff; + char *maddr; + char *wmaddr,*free_addr; + char now_string[30]; + int anwser,bind_cpu,wval; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + off64_t filebytes64; + char tmpname[256]; + FILE *thread_wqfd; + FILE *thread_Lwqfd; + +#ifdef ASYNC_IO + struct cache *gc=0; + +#else + long long *gc=0; +#endif + + if(compute_flag) + delay=compute_time; + nbuff=maddr=wmaddr=free_addr=0; + hist_time=thread_qtime_stop=thread_qtime_start=0; + thread_wqfd=w_traj_fd=thread_Lwqfd=(FILE *)0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long long)((long)x); + } + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined(_HPUX_SOURCE) || defined(linux) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1 ) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + /*****************/ + /* Children only */ + /*******************************************************************/ + /* Initial write throughput performance test. **********************/ + /*******************************************************************/ +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_ALWAYS,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + CloseHandle(hand); + } +#endif + if(oflag) + flags=O_RDWR|O_SYNC|O_CREAT; + else + flags=O_RDWR|O_CREAT; +#if defined(O_DSYNC) + if(odsync) + flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } + else + { +#endif + if((fd = I_OPEN(dummyfile[xx], (int)flags,0640))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("\nCan not open temp file: %s\n", + filename); + perror("open"); + exit(125); + } +#if defined(Windows) + } +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(filebytes64),1,PROT_READ|PROT_WRITE); + } + if(reclen < cache_size ) + { + recs_per_buffer = cache_size/reclen ; + nbuff=&nbuff[(xx%recs_per_buffer)*reclen]; + } + if(fetchon) /* Prefetch into processor cache */ + fetchit(nbuff,reclen); + if((verify && !no_copy_flag) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + + if(w_traj_flag) + w_traj_fd=open_w_traj(); + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag=CHILD_STATE_READY; /* Tell parent child is ready to go */ + if(distributed && client_iozone) + tell_master_ready(chid); + if(distributed && client_iozone) + { + if(cdebug) + { + fprintf(newstdout,"Child %d waiting for go from master\n",(int)xx); + fflush(newstdout); + } + wait_for_master_go(chid); + if(cdebug) + { + fprintf(newstdout,"Child %d received go from master\n",(int)xx); + fflush(newstdout); + } + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + + written_so_far=0; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->actual = 0; + child_stat->throughput = 0; + stopped=0; + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)0) != 0) + printf("File lock for write failed. %d\n",errno); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_wol.dat",(int)xx); + thread_wqfd=fopen(tmpname,"a"); + if(thread_wqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_wqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Write test start: ",now_string); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + if(w_traj_flag) + rewind(w_traj_fd); + for(i=0; ithroughput = + (time_so_far() - starttime1)-time_res; + if(include_close) + { + if((fd = I_OPEN(dummyfile[xx], (int)flags,0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("\nCan not open temp file: %s\n", + filename); + perror("open"); + exit(125); + } + I_LSEEK(fd,save_offset,SEEK_SET); + } + if(child_stat->throughput < (double).000001) + { + child_stat->throughput = time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + } + if(purge) + purgeit(nbuff,reclen); + if(Q_flag || hist_summary || op_rate_flag) + { + thread_qtime_start=time_so_far(); + } +again: + if(mmapflag) + { + wmaddr = &maddr[i*reclen]; + fill_area((long long*)nbuff,(long long*)wmaddr,(long long)reclen); + /*printf("CHid: %lld Writing offset %lld for length of %lld\n",chid,i*reclen,reclen);*/ + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,i); + async_write_no_copy(gc, (long long)fd, nbuff, reclen, (i*reclen), depth,free_addr); + } + else + async_write(gc, (long long)fd, nbuff, reclen, (i*reclen), depth); + } + else + { +#if defined(Windows) + if(unbuffered) + { + WriteFile(hand,nbuff,reclen, (LPDWORD)&wval,0); + } + else + { +#endif + wval=write(fd, nbuff, (size_t) reclen); +#if defined(Windows) + } +#endif + if(wval != reclen) + { + if(*stop_flag && !stopped){ + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + temp_time = time_so_far(); + child_stat->throughput = + (temp_time - starttime1)-time_res; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + goto again; + } + /* Note: Writer must finish even though told + to stop. Otherwise the readers will fail. + The code will capture bytes transfered + before told to stop but let the writer + complete. + */ +#ifdef NO_PRINT_LLD + printf("\nError writing block %ld, fd= %d\n", i, + fd); +#else + printf("\nError writing block %lld, fd= %d\n", i, + fd); +#endif + if(wval==-1) + perror("write"); + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(127); + } + } + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long)((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + written_so_far+=reclen/1024; + if(*stop_flag) + { + written_so_far-=reclen/1024; + w_traj_bytes_completed-=reclen; + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + } + + + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)0)) + printf("Write unlock failed. %d\n",errno); + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + if(!stopped){ + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid, child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_WRITE_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + + if (debug1) { + printf(" child/slot: %lld, wall-cpu: %8.3f %8.3fC" " -> %6.2f%%\n", + xx, walltime, cputime, + cpu_util(cputime, walltime)); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + stopped=0; + /*******************************************************************/ + /* End write performance test. *************************************/ + /*******************************************************************/ + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)numrecs64*reclen,MS_SYNC); /*Clean up before read starts running*/ + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + if(Q_flag && (thread_wqfd !=0) ) + fclose(thread_wqfd); + free(dummyfile[xx]); + if(w_traj_flag) + fclose(w_traj_fd); + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Write test finished: ",now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("write",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +#ifdef HAVE_PREAD +/************************************************************************/ +/* Thread pwrite test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_pwrite_test(void *x) +#else +void * +thread_pwrite_test( x) +#endif +{ + + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; + double walltime, cputime; + double compute_val = (double)0; + float delay = (float)0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + off64_t traj_offset; + off64_t lock_offset=0; + long long flags,traj_size; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + FILE *w_traj_fd; + int fd; + long long recs_per_buffer; + long long stopped,i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long xx,xx2; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff; + char *maddr; + char *wmaddr,*free_addr; + char now_string[30]; + int anwser,bind_cpu,wval; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + off64_t filebytes64; + char tmpname[256]; + FILE *thread_wqfd; + FILE *thread_Lwqfd; + +#ifdef ASYNC_IO + struct cache *gc=0; + +#else + long long *gc=0; +#endif + + if(compute_flag) + delay=compute_time; + nbuff=maddr=wmaddr=free_addr=0; + hist_time=thread_qtime_stop=thread_qtime_start=0; + thread_wqfd=w_traj_fd=thread_Lwqfd=(FILE *)0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long long)((long)x); + } + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined( _HPUX_SOURCE ) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1 ) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + /*****************/ + /* Children only */ + /*******************************************************************/ + /* Initial pwrite throughput performance test. *********************/ + /*******************************************************************/ + if(!notruncate) + { + if((fd = I_CREAT(dummyfile[xx], 0640))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(123); + } + close(fd); + } + if(oflag) + flags=O_RDWR|O_SYNC|O_CREAT; + else + flags=O_RDWR|O_CREAT; +#if defined(O_DSYNC) + if(odsync) + flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + if((fd = I_OPEN(dummyfile[xx], (int)flags,0640))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("\nCan not open temp file: %s\n", + filename); + perror("open"); + exit(125); + } +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(filebytes64),1,PROT_READ|PROT_WRITE); + } + if(reclen < cache_size ) + { + recs_per_buffer = cache_size/reclen ; + nbuff=&nbuff[(xx%recs_per_buffer)*reclen]; + } + if(fetchon) /* Prefetch into processor cache */ + fetchit(nbuff,reclen); + if((verify && !no_copy_flag) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + + if(w_traj_flag) + w_traj_fd=open_w_traj(); + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag=CHILD_STATE_READY; /* Tell parent child is ready to go */ + if(distributed && client_iozone) + tell_master_ready(chid); + if(distributed && client_iozone) + { + if(cdebug) + { + fprintf(newstdout,"Child %d waiting for go from master\n",(int)xx); + fflush(newstdout); + } + wait_for_master_go(chid); + if(cdebug) + { + fprintf(newstdout,"Child %d received go from master\n",(int)xx); + fflush(newstdout); + } + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + + written_so_far=0; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->actual = 0; + child_stat->throughput = 0; + stopped=0; + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)0) != 0) + printf("File lock for write failed. %d\n",errno); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_pwol.dat",(int)xx); + thread_wqfd=fopen(tmpname,"a"); + if(thread_wqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_wqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Pwrite test start: ",now_string); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + if(w_traj_flag) + rewind(w_traj_fd); + for(i=0; ithroughput = + (time_so_far() - starttime1)-time_res; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput = time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + } + if(purge) + purgeit(nbuff,reclen); + if(Q_flag || hist_summary || op_rate_flag) + { + thread_qtime_start=time_so_far(); + } +again: + if(mmapflag) + { + wmaddr = &maddr[traj_offset]; + fill_area((long long*)nbuff,(long long*)wmaddr,(long long)reclen); + /*printf("CHid: %lld Writing offset %lld for length of %lld\n",chid,i*reclen,reclen);*/ + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,i); + async_write_no_copy(gc, (long long)fd, nbuff, reclen, (traj_offset), depth,free_addr); + } + else + async_write(gc, (long long)fd, nbuff, reclen, (traj_offset), depth); + } + else + { + wval=I_PWRITE(fd, nbuff, reclen, traj_offset); + if(wval != reclen) + { + if(*stop_flag && !stopped){ + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + temp_time = time_so_far(); + child_stat->throughput = + (temp_time - starttime1)-time_res; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + goto again; + } + /* Note: Writer must finish even though told + to stop. Otherwise the readers will fail. + The code will capture bytes transfered + before told to stop but let the writer + complete. + */ +#ifdef NO_PRINT_LLD + printf("\nError pwriting block %ld, fd= %d\n", i, + fd); +#else + printf("\nError pwriting block %lld, fd= %d\n", i, + fd); +#endif + if(wval==-1) + perror("pwrite"); + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(127); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + written_so_far+=reclen/1024; + if(*stop_flag) + { + written_so_far-=reclen/1024; + w_traj_bytes_completed-=reclen; + } + } + + + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)0)) + printf("Write unlock failed. %d\n",errno); + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); + close(fd); + } + if(!stopped){ + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid, child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_PWRITE_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + + if (debug1) { + printf(" child/slot: %lld, wall-cpu: %8.3f %8.3fC" " -> %6.2f%%\n", + xx, walltime, cputime, + cpu_util(cputime, walltime)); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + stopped=0; + /*******************************************************************/ + /* End pwrite performance test. *************************************/ + /*******************************************************************/ + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)numrecs64*reclen,MS_SYNC); /*Clean up before read starts running*/ + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + + close(fd); + } + if(Q_flag && (thread_wqfd !=0) ) + fclose(thread_wqfd); + free(dummyfile[xx]); + if(w_traj_flag) + fclose(w_traj_fd); + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Pwrite test finished: ",now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Pwrite",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} +#endif + +/************************************************************************/ +/* Thread re-write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_rwrite_test(void *x) +#else +void * +thread_rwrite_test(x) +#endif +{ + /************************/ + /* Children only here */ + /************************/ + struct child_stats *child_stat; + long long xx,xx2; + double compute_val = (double)0; + double walltime, cputime; + float delay = (float)0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + off64_t traj_offset; + off64_t lock_offset=0; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + int fd; + FILE *w_traj_fd; + long long flags = 0; + double starttime1 = 0; + double temp_time; + long long recs_per_buffer,traj_size; + long long i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far=0; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff; + char *maddr,*free_addr; + char *wmaddr; + char now_string[30]; + int anwser,bind_cpu,wval; + FILE *thread_rwqfd,*thread_Lwqfd; + char tmpname[256]; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; + +#else + long long *gc=0; +#endif + + if(compute_flag) + delay=compute_time; + wmaddr=nbuff=maddr=free_addr=0; + thread_rwqfd=w_traj_fd=thread_Lwqfd=(FILE *)0; + hist_time=traj_offset=thread_qtime_stop=thread_qtime_start=0; + walltime=cputime=0; + anwser=bind_cpu=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx=(long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined( _HPUX_SOURCE ) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + flags = O_RDWR; + if(oflag) + flags|= O_SYNC; +#if defined(O_DSYNC) + if(odsync) + flags|= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_ALWAYS,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } + else + { +#endif + if((fd = I_OPEN(dummyfile[xx], (int)flags,0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); +#ifdef NO_PRINT_LLD + printf("\nChild %ld\n",xx); +#else + printf("\nChild %lld\n",xx); +#endif + child_stat->flag = CHILD_STATE_HOLD; + perror(dummyfile[xx]); + exit(128); + } +#if defined(Windows) + } +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),1,PROT_READ|PROT_WRITE); + } + if(fetchon) + fetchit(nbuff,reclen); + if(w_traj_flag) + w_traj_fd=open_w_traj(); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_rwol.dat",(int)xx); + thread_rwqfd=fopen(tmpname,"a"); + if(thread_rwqfd==0) + { + printf("Unable to open %s\n",tmpname); + client_error=errno; + if(distributed && client_iozone) + send_stop(); + exit(40); + } + fprintf(thread_rwqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Rewrite test start: ",now_string); + } + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + tell_master_ready(chid); + if(distributed && client_iozone) + { + if(cdebug) + { + fprintf(newstdout,"Child %d waiting for go from master\n",(int)xx); + fflush(newstdout); + } + wait_for_master_go(chid); + if(cdebug) + { + fprintf(newstdout,"Child %d received go from master\n",(int)xx); + fflush(newstdout); + } + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)0) != 0) + printf("File lock for write failed. %d\n",errno); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + if(w_traj_flag) + rewind(w_traj_fd); + if((verify && !no_copy_flag) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + for(i=0; iflag = CHILD_STATE_HOLD; + signal_handler(); + } + } + } + re_written_so_far+=reclen/1024; + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + if(*stop_flag) + { + re_written_so_far-=reclen/1024; + w_traj_bytes_completed-=reclen; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_rwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_rwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + } + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)0)) + printf("Write unlock failed. %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + temp_time=time_so_far(); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*re_written_so_far=(re_written_so_far*1024)/reclen;*/ + re_written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)re_written_so_far/child_stat->throughput; + child_stat->actual = (double)re_written_so_far; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid, child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_REWRITE_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + mmap_end(maddr,(unsigned long long)filebytes64); + } + else + fsync(fd); +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + free(dummyfile[xx]); + + if(Q_flag && (thread_rwqfd !=0) ) + fclose(thread_rwqfd); + + if(w_traj_flag) + fclose(w_traj_fd); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild Stopping %ld\n",xx); +#else + printf("\nChild Stopping %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Rewrite test finished: ",now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Rewrite",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +/************************************************************************/ +/* Thread read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_read_test(void *x) +#else +void * +thread_read_test(x) +#endif +{ + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; + long long r_traj_bytes_completed; + long long r_traj_ops_completed; + int fd; + FILE *r_traj_fd,*thread_rqfd; + FILE *thread_Lwqfd; + long long flags = 0; + off64_t traj_offset; + off64_t lock_offset=0; + double starttime1 = 0; + float delay = 0; + double temp_time; + double thread_qtime_start,thread_qtime_stop; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double compute_val = (double)0; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long recs_per_buffer,traj_size; + off64_t i; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *nbuff=0; + char *maddr=0; + char *wmaddr=0; + char tmpname[256]; + volatile char *buffer1; + char now_string[30]; + int anwser,bind_cpu; + long wval; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + if(compute_flag) + delay=compute_time; + thread_rqfd=thread_Lwqfd=r_traj_fd=(FILE *)0; + hist_time=traj_offset=thread_qtime_stop=thread_qtime_start=0; + walltime=cputime=0; + anwser=bind_cpu=0; + r_traj_bytes_completed=r_traj_ops_completed=0; + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; + if(r_traj_flag) + { + filebytes64 = r_traj_fsize; + numrecs64=r_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + flags=O_RDONLY|O_SYNC; + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + SetFilePointer(hand,(LONG)0,0,FILE_BEGIN); + } + else + { +#endif + if((fd = I_OPEN(dummyfile[xx], (int)flags,0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(130); + } +#if defined(Windows) + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),0,PROT_READ); + } + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + /*****************/ + /* Children only */ + /*****************/ + if(Q_flag) + { + sprintf(tmpname,"Child_%d_rol.dat",(int)xx); + thread_rqfd=fopen(tmpname,"a"); + if(thread_rqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_rqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Read test start: ",now_string); + } + + if(r_traj_flag) + r_traj_fd=open_r_traj(); + if(fetchon) + fetchit(nbuff,reclen); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + /* Wait for signal from parent */ + while(child_stat->flag!=CHILD_STATE_BEGIN) + Poll((long long)1); + } + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1) != 0) + printf("File lock for read failed. %d\n",errno); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + + if(r_traj_flag) + rewind(r_traj_fd); + for(i=0; iflag = CHILD_STATE_HOLD; + exit(132); + } + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(133); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(134); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + read_so_far+=reclen/1024; + r_traj_bytes_completed+=reclen; + r_traj_ops_completed++; + if(*stop_flag) + { + read_so_far-=reclen/1024; + r_traj_bytes_completed-=reclen; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_rqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + } + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)1)) + printf("Read unlock failed. %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + temp_time = time_so_far(); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*read_so_far=(read_so_far*1024)/reclen;*/ + read_so_far=r_traj_ops_completed; + } + child_stat->throughput = read_so_far/child_stat->throughput; + child_stat->actual = read_so_far; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid,child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_READ_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + /*fsync(fd);*/ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + mmap_end(maddr,(unsigned long long)filebytes64); + }else + fsync(fd); +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + if(Q_flag && (thread_rqfd !=0) ) + fclose(thread_rqfd); + free(dummyfile[xx]); + if(r_traj_flag) + fclose(r_traj_fd); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Read test finished: ",now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Read",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +#ifdef HAVE_PREAD +/************************************************************************/ +/* Thread pread test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_pread_test(void *x) +#else +void * +thread_pread_test(x) +#endif +{ + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; + long long r_traj_bytes_completed; + long long r_traj_ops_completed; + int fd; + FILE *r_traj_fd,*thread_rqfd; + FILE *thread_Lwqfd; + long long flags = 0; + off64_t traj_offset; + off64_t lock_offset=0; + double starttime1 = 0; + float delay = 0; + double temp_time; + double thread_qtime_start,thread_qtime_stop; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double compute_val = (double)0; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long recs_per_buffer,traj_size; + off64_t i; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *nbuff=0; + char *maddr=0; + char *wmaddr=0; + char tmpname[256]; + char now_string[30]; + volatile char *buffer1; + int anwser,bind_cpu; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + + if(compute_flag) + delay=compute_time; + thread_rqfd=thread_Lwqfd=r_traj_fd=(FILE *)0; + hist_time=traj_offset=thread_qtime_stop=thread_qtime_start=0; + walltime=cputime=0; + anwser=bind_cpu=0; + r_traj_bytes_completed=r_traj_ops_completed=0; + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; + if(r_traj_flag) + { + filebytes64 = r_traj_fsize; + numrecs64=r_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#ifdef _HPUX_SOURCE + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + flags=O_RDONLY|O_SYNC; + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + if((fd = I_OPEN(dummyfile[xx], (int)flags,0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(130); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),0,PROT_READ); + } + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + /*****************/ + /* Children only */ + /*****************/ + if(Q_flag) + { + sprintf(tmpname,"Child_%d_prol.dat",(int)xx); + thread_rqfd=fopen(tmpname,"a"); + if(thread_rqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_rqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Pread test start: ",now_string); + } + + if(r_traj_flag) + r_traj_fd=open_r_traj(); + if(fetchon) + fetchit(nbuff,reclen); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + /* Wait for signal from parent */ + while(child_stat->flag!=CHILD_STATE_BEGIN) + Poll((long long)1); + } + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1) != 0) + printf("File lock for read failed. %d\n",errno); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + + if(r_traj_flag) + rewind(r_traj_fd); + for(i=0; iflag = CHILD_STATE_HOLD; + exit(132); + } + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(133); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(134); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + read_so_far+=reclen/1024; + r_traj_bytes_completed+=reclen; + r_traj_ops_completed++; + if(*stop_flag) + { + read_so_far-=reclen/1024; + r_traj_bytes_completed-=reclen; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_rqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_rqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + } + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)1)) + printf("Read unlock failed. %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } + close(fd); + } + temp_time = time_so_far(); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*read_so_far=(read_so_far*1024)/reclen;*/ + read_so_far=r_traj_ops_completed; + } + child_stat->throughput = read_so_far/child_stat->throughput; + child_stat->actual = read_so_far; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid,child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_READ_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + /*fsync(fd);*/ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + mmap_end(maddr,(unsigned long long)filebytes64); + }else + fsync(fd); + close(fd); + } + if(Q_flag && (thread_rqfd !=0) ) + fclose(thread_rqfd); + free(dummyfile[xx]); + if(r_traj_flag) + fclose(r_traj_fd); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Pread test finished: ", + now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Pread",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} +#endif + +/************************************************************************/ +/* Thread re-read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_rread_test(void *x) +#else +void * +thread_rread_test(x) +#endif +{ + long long xx,xx2; + char *nbuff; + struct child_stats *child_stat; + int fd; + FILE *r_traj_fd,*thread_rrqfd; + FILE *thread_Lwqfd; + long long r_traj_bytes_completed; + double walltime, cputime; + long long r_traj_ops_completed; + off64_t traj_offset; + off64_t lock_offset=0; + long long flags = 0; + double starttime1 = 0; + float delay = 0; + double temp_time; + double thread_qtime_start,thread_qtime_stop; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double compute_val = (double)0; + long long recs_per_buffer,traj_size; + off64_t i; + off64_t written_so_far, read_so_far, re_written_so_far, + re_read_so_far; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *maddr=0; + char *wmaddr=0; + char now_string[30]; + volatile char *buffer1; + int anwser,bind_cpu; + long wval; + char tmpname[256]; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + /*****************/ + /* Children only */ + /*****************/ + if(compute_flag) + delay=compute_time; + hist_time=thread_qtime_stop=thread_qtime_start=0; + thread_rrqfd=r_traj_fd=thread_Lwqfd=(FILE *)0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + r_traj_bytes_completed=r_traj_ops_completed=0; + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(r_traj_flag) + { + filebytes64 = r_traj_fsize; + numrecs64=r_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + flags=O_RDONLY|O_SYNC; + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + SetFilePointer(hand,(LONG)0,0,FILE_BEGIN); + } + else + { +#endif + if((fd = I_OPEN(dummyfile[xx], ((int)flags),0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(135); + } +#if defined(Windows) + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(filebytes64),0,PROT_READ); + } + if(r_traj_flag) + r_traj_fd=open_r_traj(); + if(fetchon) + fetchit(nbuff,reclen); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_rrol.dat",(int)xx); + thread_rrqfd=fopen(tmpname,"a"); + if(thread_rrqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_rrqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Reread test start: ",now_string); + } + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag = CHILD_STATE_READY; + + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + + /* Wait for signal from parent */ + while(child_stat->flag!=CHILD_STATE_BEGIN) + Poll((long long)1); + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1) != 0) + printf("File lock for read failed. %d\n",errno); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + + if(r_traj_flag) + rewind(r_traj_fd); + for(i=0; iflag = CHILD_STATE_HOLD; + exit(137); + } + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(138); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(139); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + re_read_so_far+=reclen/1024; + r_traj_bytes_completed+=reclen; + r_traj_ops_completed++; + if(*stop_flag) + { + re_read_so_far-=reclen/1024; + r_traj_bytes_completed-=reclen; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_rrqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_rrqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + } + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)1)) + printf("Read unlock failed. %d\n",errno); + /*fsync(fd);*/ +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)filebytes64); + } +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*re_read_so_far=(re_read_so_far*1024)/reclen;*/ + re_read_so_far=r_traj_ops_completed; + } + child_stat->throughput = re_read_so_far/child_stat->throughput; + child_stat->actual = re_read_so_far; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + { + tell_master_stats(THREAD_REREAD_TEST,chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(filebytes64),MS_SYNC); + mmap_end(maddr,(unsigned long long)filebytes64); + }else + fsync(fd); +#if defined(Windows) + if(unbuffered) + CloseHandle(hand); + else +#endif + close(fd); + } + if(Q_flag && (thread_rrqfd !=0) ) + fclose(thread_rrqfd); + free(dummyfile[xx]); + if(r_traj_flag) + fclose(r_traj_fd); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Reread test finished: ",now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Reread",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +/************************************************************************/ +/* Thread_reverse_perf_test */ +/* Reverse read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_reverse_read_test(void *x) +#else +void * +thread_reverse_read_test(x) +#endif +{ + long long xx,xx2; + char *nbuff; + struct child_stats *child_stat; + int fd; + long long flags = 0; + double walltime, cputime; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double starttime2 = 0; + float delay = 0; + double temp_time; + double compute_val = (double)0; + long long recs_per_buffer; + off64_t i,t_offset; + off64_t lock_offset=0; + off64_t current_position=0; + off64_t written_so_far, reverse_read, re_read_so_far,read_so_far; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *maddr=0; + char *wmaddr=0; + char now_string[30]; + volatile char *buffer1; + int anwser,bind_cpu; + off64_t traj_offset; + char tmpname[256]; + FILE *thread_revqfd=0; + FILE *thread_Lwqfd=0; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + /*****************/ + /* Children only */ + /*****************/ + if(compute_flag) + delay=compute_time; + hist_time=thread_qtime_stop=thread_qtime_start=0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + written_so_far=read_so_far=reverse_read=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + flags=O_RDONLY|O_SYNC; + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } +#endif + + if((fd = I_OPEN(dummyfile[xx], ((int)flags),0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(140); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),0,PROT_READ); + } + if(fetchon) + fetchit(nbuff,reclen); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_revol.dat",(int)xx); + thread_revqfd=fopen(tmpname,"a"); + if(thread_revqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_revqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Reverse read start: ",now_string); + } + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + starttime2 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime2; + cputime = cputime_so_far(); + } + + t_offset = (off64_t)reclen; + if (!(h_flag || k_flag || mmapflag)) + { + if(check_filename(dummyfile[xx])) + { + if((I_LSEEK( fd, -t_offset, SEEK_END ))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror("lseek"); + exit(142); + } + } + else + { + if(I_LSEEK( fd, (numrecs64*reclen)-t_offset, SEEK_SET )<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror("lseek"); + exit(77); + } + } + } + current_position=(reclen*numrecs64)-reclen; + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1)!=0) + printf("File lock for read failed. %d\n",errno); + for(i=0; iflag = CHILD_STATE_HOLD; + exit(144); + } + } + } + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)(current_position/reclen),reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(145); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)(current_position/reclen),reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(146); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + current_position+=reclen; + if(async_flag && no_copy_flag) + async_release(gc); + t_offset = (off64_t)reclen*2; + if (!(h_flag || k_flag || mmapflag)) + { + I_LSEEK( fd, -t_offset, SEEK_CUR ); + } + current_position-=(2 *reclen); + reverse_read +=reclen/1024; + if(*stop_flag) + { + reverse_read -=reclen/1024; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_revqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_revqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + } + if(file_lock) + if(mylockf((int) fd,(int)0, (int)1)) + printf("Read unlock failed %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + } + close(fd); + } + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime2)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(OPS_flag){ + reverse_read=(reverse_read*1024)/reclen; + } + child_stat->throughput = reverse_read/child_stat->throughput; + child_stat->actual = reverse_read; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_REVERSE_READ_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + close(fd); + } + free(dummyfile[xx]); + if(Q_flag && (thread_revqfd !=0) ) + fclose(thread_revqfd); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Reverse read finished: ", + now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Read Backwards",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} +/************************************************************************/ +/* Thread_stride_read_test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_stride_read_test(void *x) +#else +void * +thread_stride_read_test(x) +#endif +{ + long long xx,xx2; + char *nbuff=0; + struct child_stats *child_stat; + double walltime, cputime; + int fd; + long long flags = 0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double starttime2 = 0; + float delay = 0; + double compute_val = (double)0; + double temp_time; + long long recs_per_buffer; + off64_t i; + off64_t lock_offset=0; + off64_t savepos64=0; + off64_t written_so_far, stride_read,re_read_so_far,read_so_far; + off64_t stripewrap = 0; + off64_t current_position = 0; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *maddr=0; + char *wmaddr=0; + volatile char *buffer1; + int anwser,bind_cpu; + off64_t traj_offset; + char tmpname[256]; + char now_string[30]; + FILE *thread_strqfd=0; + FILE *thread_Lwqfd=0; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif + /*****************/ + /* Children only */ + /*****************/ + if(compute_flag) + delay=compute_time; + hist_time=thread_qtime_stop=thread_qtime_start=0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + written_so_far=read_so_far=stride_read=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + flags=O_RDONLY|O_SYNC; + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } +#endif + if((fd = I_OPEN(dummyfile[xx], ((int)flags),0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(147); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),0,PROT_READ); + } + if(fetchon) + fetchit(nbuff,reclen); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_strol.dat",(int)xx); + thread_strqfd=fopen(tmpname,"a"); + if(thread_strqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_strqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Stride test start: ", + now_string); + } + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + + /* wait for parent to say go */ + while(child_stat->flag!=CHILD_STATE_BEGIN) + Poll((long long)1); + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1)!=0) + printf("File lock for write failed. %d\n",errno); + starttime2 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime2; + cputime = cputime_so_far(); + } + for(i=0; iflag = CHILD_STATE_HOLD; + exit(149); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + current_position+=reclen; + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)savepos64,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(150); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)savepos64,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(151); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + if(current_position + (stride * reclen) >= (numrecs64 * reclen)-reclen) + { + current_position=0; + + stripewrap++; + + if(numrecs64 <= stride) + { + current_position=0; + } + else + { + current_position = (off64_t)((stripewrap)%numrecs64)*reclen; + } + if (!(h_flag || k_flag || mmapflag)) + { + if(I_LSEEK(fd,current_position,SEEK_SET)<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror("lseek"); + exit(152); + } + } + } + else + { + current_position+=(stride*reclen)-reclen; + if (!(h_flag || k_flag || mmapflag)) + { + if(I_LSEEK(fd,current_position,SEEK_SET)<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror("lseek"); + exit(154); + }; + } + } + stride_read +=reclen/1024; + if(*stop_flag) + { + stride_read -=reclen/1024; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_strqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_strqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + } + if(file_lock) + if(mylockf((int) fd,(int)0,(int)1)) + printf("Read unlock failed %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + } + close(fd); + } + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime2)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(OPS_flag){ + stride_read=(stride_read*1024)/reclen; + } + child_stat->throughput = stride_read/child_stat->throughput; + child_stat->actual = stride_read; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + { + tell_master_stats(THREAD_STRIDE_TEST,chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + close(fd); + } + if(Q_flag && (thread_strqfd !=0) ) + fclose(thread_strqfd); + free(dummyfile[xx]); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Stride test finished: ", + now_string); + fclose(thread_Lwqfd); + } + if(hist_summary) + dump_hist("Stride Read",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +/************************************************************************/ +/* Thread random test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_mix_test(void *x) +#else +void * +thread_mix_test(x) +#endif +{ + int selector; + int num_readers; + long xx; + +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long)x; + } + else + { + xx=(long)chid; + } +#endif + if(pct_read!=0) + { + num_readers = (pct_read * num_child)/100; + if(xx < num_readers) + selector=0; + else + selector=1; + } + else + { + if(Kplus_flag) + { + if(xx+1 <= Kplus_readers) + selector=0; + else + selector=1; + } + else + { + /* Simple round robin */ + selector= ((int)xx) % 2; + } + } + if(selector==0) + { + if(seq_mix) + thread_read_test(x); + else + thread_ranread_test(x); + } + else + { + if(seq_mix) + thread_write_test(x); + else + thread_ranwrite_test(x); + } + return(0); +} +/************************************************************************/ +/* Thread random read test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_ranread_test(void *x) +#else +void * +thread_ranread_test(x) +#endif +{ + long long xx,xx2; + struct child_stats *child_stat; + double walltime, cputime; + int fd; + long long flags = 0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double starttime1 = 0; + float delay = 0; + double temp_time; + double compute_val = (double)0; + off64_t written_so_far, ranread_so_far, re_written_so_far,re_read_so_far; + long long recs_per_buffer; + off64_t current_offset=0; + off64_t i; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + char *nbuff=0; + char *maddr=0; + char *wmaddr=0; + volatile char *buffer1; + int anwser,bind_cpu; + off64_t traj_offset; + off64_t lock_offset=0; + char tmpname[256]; + char now_string[30]; + FILE *thread_randrfd=0; + FILE *thread_Lwqfd=0; + long long *recnum=0; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + long long save_pos; +#if defined (bsd4_2) || defined(Windows) + long long rand1,rand2,rand3; +#endif + unsigned long long big_rand; +#ifdef ASYNC_IO + struct cache *gc=0; +#else + long long *gc=0; +#endif +#ifdef MERSENNE + unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4; +#endif + +#ifdef MERSENNE + init_by_array64(init, length); +#else +#ifdef bsd4_2 + srand(0); +#else +#ifdef Windows + srand(0); +#else + srand48(0); +#endif +#endif +#endif + recnum = (long long *)malloc(sizeof(*recnum)*numrecs64); + if (recnum){ + /* pre-compute random sequence based on + Fischer-Yates (Knuth) card shuffle */ + for(i = 0; i < numrecs64; i++){ + recnum[i] = i; + } + for(i = 0; i < numrecs64; i++) { + long long tmp = recnum[i]; +#ifdef MERSENNE + big_rand = genrand64_int64(); +#else +#ifdef bsd4_2 + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else +#ifdef Windows + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else + big_rand = lrand48(); +#endif +#endif +#endif + big_rand = big_rand%numrecs64; + tmp = recnum[i]; + recnum[i] = recnum[big_rand]; + recnum[big_rand] = tmp; + } + } + else + { + fprintf(stderr,"Random uniqueness fallback.\n"); + } + if(compute_flag) + delay=compute_time; + hist_time=thread_qtime_stop=thread_qtime_start=0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + written_so_far=ranread_so_far=re_written_so_far=re_read_so_far=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + if(oflag) + { + flags=O_RDONLY|O_SYNC; + } + else + flags=O_RDONLY; +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif + +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } +#endif + if((fd = I_OPEN(dummyfile[xx], ((int)flags),0))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + perror(dummyfile[xx]); + exit(156); + } +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif + + if(mmapflag) + { + maddr=(char *)initfile(fd,(numrecs64*reclen),0,PROT_READ); + } + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + if(debug1) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + /*****************/ + /* Children only */ + /*****************/ + if(fetchon) + fetchit(nbuff,reclen); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_randrol.dat",(int)xx); + thread_randrfd=fopen(tmpname,"a"); + if(thread_randrfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_randrfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Random read start: ", + now_string); + } + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + +#ifdef MERSENNE + init_by_array64(init, length); +#else +#ifdef bsd4_2 + srand(0); +#else +#ifdef Windows + srand(0); +#else + srand48(0); +#endif +#endif +#endif + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)1)!=0) + printf("File lock for read failed. %d\n",errno); + for(i=0; iflag = CHILD_STATE_HOLD; + exit(160); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)1, + lock_offset, reclen); + } + save_pos=current_offset/reclen; + current_offset+=reclen; + if(verify){ + if(async_flag && no_copy_flag) + { + if(verify_buffer(buffer1,reclen,(off64_t)save_pos,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(161); + } + } + else + { + if(verify_buffer(nbuff,reclen,(off64_t)save_pos,reclen,(long long)pattern,sverify)){ + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(162); + } + } + } + if(async_flag && no_copy_flag) + async_release(gc); + ranread_so_far+=reclen/1024; + if(*stop_flag) + { + ranread_so_far-=reclen/1024; + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_randrfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_randrfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + } + if(file_lock) + if(mylockf((int) fd,(int)0,(int)1)) + printf("Read unlock failed %d\n",errno); +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(include_flush) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + }else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + { + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + } + close(fd); + } + temp_time = time_so_far(); + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + if(OPS_flag){ + ranread_so_far=(ranread_so_far*1024)/reclen; + } + child_stat->throughput = ranread_so_far/child_stat->throughput; + child_stat->actual = ranread_so_far; + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid,child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_RANDOM_READ_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)(numrecs64*reclen),MS_SYNC); + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + close(fd); + } + if(Q_flag && (thread_randrfd !=0) ) + fclose(thread_randrfd); + free(dummyfile[xx]); + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Random read finished: ",now_string); + fclose(thread_Lwqfd); + } + if(recnum) + free(recnum); + if(hist_summary) + dump_hist("Random Read",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +/************************************************************************/ +/* Thread random write test */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_ranwrite_test(void *x) +#else +void * +thread_ranwrite_test( x) +#endif +{ + + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; + double walltime, cputime; + double compute_val = (double)0; + float delay = (double)0; + double thread_qtime_stop,thread_qtime_start; + double hist_time; + double desired_op_rate_time; + double actual_rate; + off64_t traj_offset; + off64_t current_offset=0; + long long flags; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + int fd; + long long recs_per_buffer; + long long stopped,i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long xx,xx2; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff=0; + char *maddr=0; + char *wmaddr=0; + char *free_addr=0; + int anwser,bind_cpu,wval; + off64_t filebytes64; + off64_t lock_offset=0; + char tmpname[256]; + char now_string[30]; + FILE *thread_randwqfd=0; + FILE *thread_Lwqfd=0; + long long *recnum = 0; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif +#if defined (bsd4_2) || defined(Windows) + long long rand1,rand2,rand3; +#endif + unsigned long long big_rand; + +#ifdef ASYNC_IO + struct cache *gc=0; + +#else + long long *gc=0; +#endif +#ifdef MERSENNE + unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4; +#endif + + if(compute_flag) + delay=compute_time; + hist_time=thread_qtime_stop=thread_qtime_start=0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + filebytes64 = numrecs64*reclen; + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + recs_per_buffer = cache_size/reclen ; +#ifdef MERSENNE + init_by_array64(init, length); +#else +#ifdef bsd4_2 + srand(0); +#else +#ifdef Windows + srand(0); +#else + srand48(0); +#endif +#endif +#endif + recnum = (long long *) malloc(sizeof(*recnum)*numrecs64); + if (recnum){ + /* pre-compute random sequence based on + Fischer-Yates (Knuth) card shuffle */ + for(i = 0; i < numrecs64; i++){ + recnum[i] = i; + } + for(i = 0; i < numrecs64; i++) { + long long tmp = recnum[i]; +#ifdef MERSENNE + big_rand = genrand64_int64(); +#else +#ifdef bsd4_2 + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else +#ifdef Windows + rand1=(long long)rand(); + rand2=(long long)rand(); + rand3=(long long)rand(); + big_rand=(rand1<<32)|(rand2<<16)|(rand3); +#else + big_rand = lrand48(); +#endif +#endif +#endif + big_rand = big_rand%numrecs64; + tmp = recnum[i]; + recnum[i] = recnum[big_rand]; + recnum[big_rand] = tmp; + } + } + else + { + fprintf(stderr,"Random uniqueness fallback.\n"); + } +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long long)((long)x); + } + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#if defined( _HPUX_SOURCE ) || defined ( linux ) + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; +#if defined(_HPUX_SOURCE) + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); +#else + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(bind_cpu, &cpuset); + + pthread_setaffinity_np(pthread_self(), sizeof(cpuset),&cpuset); +#endif + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1 ) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + /*****************/ + /* Children only */ + /*******************************************************************/ + /* Random write throughput performance test. **********************/ + /*******************************************************************/ + if(oflag) + flags=O_RDWR|O_SYNC|O_CREAT; + else + flags=O_RDWR|O_CREAT; +#if defined(O_DSYNC) + if(odsync) + flags |= O_DSYNC; +#endif +#if defined(_HPUX_SOURCE) || defined(linux) + if(read_sync) + flags |=O_RSYNC|O_SYNC; +#endif + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + if(direct_flag) + flags |=O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + flags |=O_DIRECTIO; +#endif +#endif +#if defined(Windows) + if(unbuffered) + { + hand=CreateFile(dummyfile[xx], + GENERIC_READ|GENERIC_WRITE, + FILE_SHARE_WRITE|FILE_SHARE_READ, + NULL,OPEN_EXISTING,FILE_FLAG_NO_BUFFERING| + FILE_FLAG_WRITE_THROUGH|FILE_FLAG_POSIX_SEMANTICS, + NULL); + } +#endif + if((fd = I_OPEN(dummyfile[xx], ((int)flags),0640))<0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("\nCan not open temp file: %s\n", + filename); + perror("open"); + exit(125); + } +#ifdef VXFS + if(direct_flag) + { + ioctl(fd,VX_SETCACHE,VX_DIRECT); + ioctl(fd,VX_GETCACHE,&test_foo); + if(test_foo == 0) + { + if(!client_iozone) + printf("\nVxFS advanced setcache feature not available.\n"); + exit(3); + } + } +#endif +#if defined(solaris) + if(direct_flag) + { + test_foo = directio(fd, DIRECTIO_ON); + if(test_foo != 0) + { + if(!client_iozone) + printf("\ndirectio not available.\n"); + exit(3); + } + } +#endif +#ifdef ASYNC_IO + if(async_flag) + async_init(&gc,fd,direct_flag); +#endif + if(mmapflag) + { + maddr=(char *)initfile(fd,(filebytes64),1,PROT_READ|PROT_WRITE); + } + if(reclen < cache_size ) + { + recs_per_buffer = cache_size/reclen ; + nbuff=&nbuff[(xx%recs_per_buffer)*reclen]; + } + if(fetchon) /* Prefetch into processor cache */ + fetchit(nbuff,reclen); + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag=CHILD_STATE_READY; /* Tell parent child is ready to go */ + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + written_so_far=0; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->actual = 0; + child_stat->throughput = 0; + stopped=0; + if(file_lock) + if(mylockf((int) fd, (int) 1, (int)0) != 0) + printf("File lock for write failed. %d\n",errno); + if(Q_flag) + { + sprintf(tmpname,"Child_%d_randwol.dat",(int)xx); + thread_randwqfd=fopen(tmpname,"a"); + if(thread_randwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_randwqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Random write start: ", + now_string); + } + if((verify && !no_copy_flag) || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)0); + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + for(i=0; ithroughput = + (time_so_far() - starttime1)-time_res; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput = time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + } + if(purge) + purgeit(nbuff,reclen); + if(Q_flag || hist_summary) + { + thread_qtime_start=time_so_far(); + } +again: + if(mmapflag) + { + wmaddr = &maddr[current_offset]; + fill_area((long long*)nbuff,(long long*)wmaddr,(long long)reclen); + if(!mmapnsflag) + { + if(mmapasflag) + msync(wmaddr,(size_t)reclen,MS_ASYNC); + if(mmapssflag) + msync(wmaddr,(size_t)reclen,MS_SYNC); + } + } + else + { + if(async_flag) + { + if(no_copy_flag) + { + free_addr=nbuff=(char *)malloc((size_t)reclen+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + if(verify || dedup || dedup_interior) + fill_buffer(nbuff,reclen,(long long)pattern,sverify,(long long)(current_offset/reclen)); + async_write_no_copy(gc, (long long)fd, nbuff, reclen, (current_offset), depth,free_addr); + } + else + async_write(gc, (long long)fd, nbuff, reclen, current_offset, depth); + } + else + { + wval = write(fd, nbuff, (size_t) reclen); + if(wval != reclen) + { + if(*stop_flag && !stopped){ + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + temp_time = time_so_far(); + child_stat->throughput = + (temp_time - starttime1)-time_res; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + if(debug1) + { + printf("\n(%ld) Stopped by another\n", (long)xx); + } + stopped=1; + goto again; + } + /* Note: Writer must finish even though told + to stop. Otherwise the readers will fail. + The code will capture bytes transfered + before told to stop but let the writer + complete. + */ +#ifdef NO_PRINT_LLD + printf("\nError writing block %ld, fd= %d\n", i, + fd); +#else + printf("\nError writing block %lld, fd= %d\n", i, + fd); +#endif + if(wval==-1) + perror("write"); + if (!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + child_stat->flag = CHILD_STATE_HOLD; + exit(127); + } + } + } + if(rlocking) + { + mylockr((int) fd, (int) 0, (int)0, + lock_offset, reclen); + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start-time_res); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); +/* +printf("Desired rate %g Actual rate %g Nap %g microseconds\n",desired_op_rate_time, + actual_rate, (desired_op_rate_time-actual_rate)); +*/ + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long) ((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_randwqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_randwqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + written_so_far+=reclen/1024; + if(*stop_flag) + { + written_so_far-=reclen/1024; + w_traj_bytes_completed-=reclen; + } + } + + + if(file_lock) + if(mylockf((int) fd, (int) 0, (int)0)) + printf("Write unlock failed. %d\n",errno); + +#ifdef ASYNC_IO + if(async_flag) + { + end_async(gc); + gc=0; + } +#endif + if(!xflag) + { + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + } + + if(include_flush) + { + if(mmapflag) + msync(maddr,(size_t)filebytes64,MS_SYNC); + else + fsync(fd); + } + if(include_close) + { + if(mmapflag) + mmap_end(maddr,(unsigned long long)filebytes64); + close(fd); + } + if(!stopped){ + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + if(cdebug) + { + fprintf(newstdout,"Child %d: throughput %f actual %f \n",(int)chid,child_stat->throughput, + child_stat->actual); + fflush(newstdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_RANDOM_WRITE_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + stopped=0; + /*******************************************************************/ + /* End random write performance test. ******************************/ + /*******************************************************************/ + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + if(!include_close) + { + if(mmapflag) + { + msync(maddr,(size_t)numrecs64*reclen,MS_SYNC); /*Clean up before read starts running*/ + mmap_end(maddr,(unsigned long long)numrecs64*reclen); + }else + fsync(fd); + + close(fd); + } + if(Q_flag && (thread_randwqfd !=0) ) + fclose(thread_randwqfd); + free(dummyfile[xx]); + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Random write finished: ", + now_string); + fclose(thread_Lwqfd); + } + if(recnum) + free(recnum); + if(hist_summary) + dump_hist("Random Write",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + +/************************************************************************/ +/* Thread cleanup test */ +/* This is not a measurement. It is a mechanism to cleanup all of the */ +/* temporary files that were being used. This becomes very important */ +/* when testing multiple clients over a network :-) */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void * +thread_cleanup_test(void *x) +#else +void * +thread_cleanup_test(x) +#endif +{ + long long xx; + struct child_stats *child_stat; + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + + +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + xx = (long long)((long)x); + else + { + xx=chid; + } +#endif + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx],xx); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx],xx); +#endif + } + if(!no_unlink) + { + if(check_filename(dummyfile[xx])) + unlink(dummyfile[xx]); + } + + child_stat = (struct child_stats *)&shmaddr[xx]; + /*****************/ + /* Children only */ + /*****************/ + child_stat=(struct child_stats *)&shmaddr[xx]; + child_stat->flag = CHILD_STATE_READY; + if(distributed && client_iozone) + { + tell_master_ready(chid); + wait_for_master_go(chid); + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + + *stop_flag=1; + if(distributed && client_iozone) + send_stop(); + if(distributed && client_iozone) + tell_master_stats(THREAD_CLEANUP_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + free(dummyfile[xx]); + + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif +return(0); +} + + +/************************************************************************/ +/* mythread_create() Internal routine that calls pthread_create() */ +/************************************************************************/ +#ifndef NO_THREADS +#ifdef HAVE_ANSIC_C +long long +mythread_create( void *(*func)(void *),void *x) +#else +long long +mythread_create( func,x) +void *(*func)(void *); +void *x; +#endif +{ + pthread_t ts; + pthread_attr_t attr; + int xx; + int *yy; +#ifdef _64BIT_ARCH_ + long long meme; + meme = (long long)x; +#else + long meme; + meme = (long)x; +#endif + yy=(int *)x; + + +#ifdef OSFV3 + + xx=(int )pthread_create(&ts, pthread_attr_default, + func, (void *)yy); + +#else + pthread_attr_init(&attr); + xx=(int )pthread_create((pthread_t *)&ts, (pthread_attr_t *) &attr, + func, (void *)yy); +#endif + bcopy(&ts,&p_childids[meme],sizeof(pthread_t)); + if(xx < (int)0) + printf("Thread create failed. Returned %d Errno = %d\n",xx,errno); + if(debug1 ) + { + printf("\nthread created has an id of %lx\n",ts); + printf("meme %ld\n",meme); + } + return((long long)meme); +} +#else +#ifdef HAVE_ANSIC_C +long long +mythread_create( void *(*func)(void *),void *x) +#else +long long +mythread_create( func,x) +void *(*func)(void *); +void *x; +#endif +{ + printf("This version does not support threads\n"); + return(-1); +} +#endif + +/************************************************************************/ +/* thread_exit() Internal routine that calls pthread_exit() */ +/************************************************************************/ +#ifndef NO_THREADS +#ifdef HAVE_ANSIC_C +int +thread_exit(void) +#else +int +thread_exit() +#endif +{ + pthread_exit((void *)NULL); +return(0); +} +#else +#ifdef HAVE_ANSIC_C +int +thread_exit(void) +#else +int +thread_exit() +#endif +{ + printf("This version does not support threads\n"); + return(-1); +} +#endif + +/************************************************************************/ +/* mythread_self() Internal function that calls pthread_self() */ +/************************************************************************/ +#ifndef NO_THREADS +#ifdef HAVE_ANSIC_C +pthread_t +mythread_self(void) +#else +pthread_t +mythread_self() +#endif +{ + pthread_t xx; + xx = pthread_self(); + return(xx); +} +#else +#ifdef HAVE_ANSIC_C +int +mythread_self(void) +#else +int +mythread_self() +#endif +{ + printf("This version does not support threads\n"); + return(-1); +} +#endif + +/************************************************************************/ +/* Internal thread_join routine... calls pthread_join */ +/************************************************************************/ +#ifndef NO_THREADS +#ifdef HAVE_ANSIC_C +void * +thread_join( long long tid, void *status) +#else +void * +thread_join( tid, status) +long long tid; +void *status; +#endif +{ + int xx; + pthread_t eek; + pthread_attr_t foo; + + bcopy(&p_childids[tid],&eek,sizeof(pthread_t)); + xx=pthread_join(eek,(void **)&foo); + if(xx<0) + printf("Thread join returned error %d\n",errno); + return(0); +} +#else +#ifdef HAVE_ANSIC_C +void * +thread_join( long long tid, void *status) +#else +void * +thread_join( tid, status) +long long tid; +void *status; +#endif +{ + printf("This version does not support threads\n"); + return((void *)-1); +} +#endif + + +/************************************************************************/ +/* Dump the CPU utilization data. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +dump_throughput_cpu(void) +#else +void +dump_throughput_cpu() +#endif +{ + long long x,y,i,j; + char *port; + char *label; + char print_str[300]; + x=max_x; + y=max_y; + + port = use_thread ? "threads" : "processes"; + printf("\n\"CPU utilization report Y-axis is type of test X-axis is number of %s\"\n",port); + if (bif_flag) + { + sprintf(print_str, "CPU utilization report Y-axis is type of test X-axis is number of %s", port); + do_label(bif_fd, print_str, bif_row++, bif_column); + } + label = OPS_flag ? "ops/sec" : + MS_flag ? "microseconds/op" : "Kbytes/sec"; +#ifdef NO_PRINT_LLD + if(!silent) printf("\"Record size = %ld Kbytes \"\n", reclen/1024); +#else + if(!silent) printf("\"Record size = %lld Kbytes \"\n", reclen/1024); +#endif + if(!silent) printf("\"Output is in CPU%%\"\n\n"); + if (bif_flag) + { +#ifdef NO_PRINT_LLD + sprintf(print_str, "Record size = %ld Kbytes", reclen/1024); +#else + sprintf(print_str, "Record size = %lld Kbytes", reclen/1024); +#endif + do_label(bif_fd, print_str, bif_row++, bif_column); + sprintf(print_str, "Output is in CPU%%"); + do_label(bif_fd, print_str, bif_row++, bif_column); + } + for (i = 0; i < x; i++) + { + if(!silent) printf("\"%15s \"", throughput_tests[i]); + if (bif_flag) + { + sprintf(print_str, "%15s ", throughput_tests[i]); + do_label(bif_fd, print_str, bif_row, bif_column++); + bif_column++; + } + for (j = 0; j <= y; j++) + { + if (bif_flag) + do_float(bif_fd, runtimes[i][j].cpuutil, bif_row, bif_column++); + if(!silent) printf(" %10.2f ", runtimes[i][j].cpuutil); + } + if(!silent) printf("\n\n"); + if (bif_flag) + { + bif_column=0; + bif_row++; + } + } +} + + +/************************************************************************/ +/* Dump the throughput graphs */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +dump_throughput(void) +#else +void +dump_throughput() +#endif +{ + long long x,y,i,j; + char *port; + char *label; + char print_str[300]; + x=max_x; + y=max_y; + + if(use_thread) + port="threads"; + else + port="processes"; + if(!silent) printf("\n\"Throughput report Y-axis is type of test X-axis is number of %s\"\n",port); + if(bif_flag) + { + bif_fd=create_xls(bif_filename); + do_label(bif_fd,command_line,bif_row++,bif_column); + sprintf(print_str,"Throughput report Y-axis is type of test X-axis is number of %s",port); + do_label(bif_fd,print_str,bif_row++,bif_column); + } + if(OPS_flag) + label="ops/sec"; + else + if(MS_flag) + label="microseconds/op"; + else + label="Kbytes/sec"; +#ifdef NO_PRINT_LLD + if(!silent) printf("\"Record size = %ld Kbytes \"\n",reclen/1024); +#else + if(!silent) printf("\"Record size = %lld Kbytes \"\n",reclen/1024); +#endif + if(!silent) printf("\"Output is in %s\"\n\n",label); + if(bif_flag) + { +#ifdef NO_PRINT_LLD + sprintf(print_str,"Record size = %ld Kbytes",reclen/1024); +#else + sprintf(print_str,"Record size = %lld Kbytes",reclen/1024); +#endif + do_label(bif_fd,print_str,bif_row++,bif_column); + sprintf(print_str,"Output is in %s",label); + do_label(bif_fd,print_str,bif_row++,bif_column); + } + for(i=0;i<=toutputindex;i++) + { + if(!silent) printf("\"%15s \"",toutput[i]); + if(bif_flag) + { + sprintf(print_str,"%15s ",toutput[i]); + do_label(bif_fd,print_str,bif_row,bif_column++); + bif_column++; + } + for(j=0;j<=y;j++) + { + if(bif_flag) + { + do_float(bif_fd,(double)report_darray[i][j],bif_row,bif_column++); + } + if(!silent) printf(" %10.2f ",report_darray[i][j]); + } + if(!silent) printf("\n\n"); + if(bif_flag) + { + bif_column=0; + bif_row++; + } + } + if (cpuutilflag) + dump_throughput_cpu(); + if(bif_flag) + close_xls(bif_fd); +} + +/************************************************************************/ +/* store_dvalue() */ +/* Stores a value in an in memory array. Used by the report function */ +/* to re-organize the output for Excel */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +store_dvalue(double value) +#else +void +store_dvalue(value) +double value; +#endif +{ + report_darray[current_x][current_y]=value; + current_x++; + if(current_x > max_x) + max_x=current_x; + if(current_y > max_y) + max_y=current_y; + if(max_x >= MAX_X) + { + printf("\nMAX_X too small\n"); + exit(163); + } + if(max_y >= MAXSTREAMS) + { + printf("\nMAXSTREAMS too small\n"); + exit(164); + } +} + +/************************************************************************/ +/* Initialize a file that will be used by mmap. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +char * +initfile(int fd, off64_t filebytes,int flag,int prot) +#else +char * +initfile(fd, filebytes,flag, prot) +int fd; +off64_t filebytes; +int flag, prot; +#endif +{ + char *pa; + int mflags=0; + long long x; + char *tmp,*stmp; + int file_flags; + long long recs; + long long i; + int dflag = 0; + + if(flag) + { + +#ifdef _HPUX_SOURCE + /* + * Save time, just have the operating system prealloc + * the file + */ + prealloc(fd,filebytes); +#else + /* + * Allocate a temporary buffer to meet any alignment + * contraints of any method. + */ + tmp=(char *)malloc((size_t)reclen * 2); + stmp=tmp; + /* + * Align to a reclen boundary. + */ + tmp = (char *)((((long)tmp + (long)reclen))& ~(((long)reclen-1))); + /* + * Special case.. Open O_DIRECT, and going to be mmap() + * Under Linux, one can not create a sparse file using + * a file that is opened with O_DIRECT + */ + file_flags=fcntl(fd,F_GETFL); + +#if ! defined(DONT_HAVE_O_DIRECT) +#if defined(linux) || defined(__AIX__) || defined(IRIX) || defined(IRIX64) || defined(Windows) || defined (__FreeBSD__) + dflag = O_DIRECT; +#endif +#if defined(TRU64) + if(direct_flag) + dflag = O_DIRECTIO; +#endif +#endif + if((file_flags & dflag) !=0) + { + recs=filebytes/reclen; + for (i =0; i microsecs) + break; + } + +/* + select(0,0,0,0,&nap_time); +*/ + + +} + +/************************************************************************/ +/* Function that establishes the resolution */ +/* of the gettimeofday() function. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void +get_resolution(void) +#else +void +get_resolution() +#endif +{ + double starttime, finishtime, besttime = 0; + long j,delay; + int k; + + finishtime=time_so_far1(); /* Warm up the instruction cache */ + starttime=time_so_far1(); /* Warm up the instruction cache */ + delay=j=0; /* Warm up the data cache */ + for(k=0;k<10;k++) + { + while(1) + { + starttime=time_so_far1(); + for(j=0;j< delay;j++) + ; + finishtime=time_so_far1(); + if(starttime==finishtime) + delay++; + else + { + if(k==0) + besttime=(finishtime-starttime); + if((finishtime-starttime) < besttime) + besttime=(finishtime-starttime); + break; + } + } + } + time_res=besttime/1000000.0; +} + +/************************************************************************/ +/* Function that establishes the resolution */ +/* of the getrusage() function. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void +get_rusage_resolution(void) +#else +void +get_rusage_resolution() +#endif +{ + double starttime, finishtime; + long j; + + finishtime=cputime_so_far(); /* Warm up the instruction cache */ + starttime=cputime_so_far(); /* Warm up the instruction cache */ + delay=j=0; /* Warm up the data cache */ + while(1) + { + starttime=cputime_so_far(); + for(j=0;j< delay;j++) + ; + finishtime=cputime_so_far(); + if(starttime==finishtime) + + delay++; + else + break; + } + cputime_res = (finishtime-starttime); /* in seconds */ +} +/************************************************************************/ +/* Time measurement routines. */ +/* Return time in microseconds */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +static double +time_so_far1(void) +#else +static double +time_so_far1() +#endif +{ + /* For Windows the time_of_day() is useless. It increments in + 55 milli second increments. By using the Win32api one can + get access to the high performance measurement interfaces. + With this one can get back into the 8 to 9 microsecond resolution + */ +#ifdef Windows + LARGE_INTEGER freq,counter; + double wintime; + double bigcounter; + struct timeval tp; + + if(pit_hostname[0]){ + pit_gettimeofday(&tp, (struct timezone *) NULL, pit_hostname, + pit_service); + return ((double) (tp.tv_sec)*1000000.0)+(((double)tp.tv_usec)); + } + else + { + + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + bigcounter=(double)counter.HighPart *(double)0xffffffff + + (double)counter.LowPart; + wintime = (double)(bigcounter/(double)freq.LowPart); + return((double)wintime*1000000.0); + } +#else +#if defined (OSFV4) || defined(OSFV3) || defined(OSFV5) + struct timespec gp; + + if (getclock(TIMEOFDAY, (struct timespec *) &gp) == -1) + perror("getclock"); + return (( (double) (gp.tv_sec)*1000000.0) + + ( ((float)(gp.tv_nsec)) * 0.001 )); +#else + struct timeval tp; + + if(pit_hostname[0]){ + if (pit_gettimeofday(&tp, (struct timezone *) NULL, pit_hostname, + pit_service) == -1) + perror("pit_gettimeofday"); + return ((double) (tp.tv_sec)*1000000.0) + (((double) tp.tv_usec) ); + } + else + { + if (gettimeofday(&tp, (struct timezone *) NULL) == -1) + perror("gettimeofday"); + return ((double) (tp.tv_sec)*1000000.0) + (((double) tp.tv_usec) ); + } +#endif +#endif +} + +/************************************************************************/ +/* Return the clocks per tick for the times() call. */ +/************************************************************************/ +#ifdef unix +#ifdef HAVE_ANSIC_C +static double +clk_tck(void) /* Get the clocks per tick for times */ +#else +static double +clk_tck() /* Get the clocks per tick for times */ +#endif +{ + return((double)sysconf(_SC_CLK_TCK)); +} + +/************************************************************************/ +/* Return the user time in tics as a double. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +static double +utime_so_far(void) /* Return user time in ticks as double */ +#else +static double +utime_so_far() +#endif +{ + struct tms tp; + + times(&tp); + return ((double) (tp.tms_utime)); +} + +/************************************************************************/ +/* Return the system time in tics as a double. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +static double +stime_so_far(void) /* Return system time in ticks as double */ +#else +static double +stime_so_far() +#endif +{ + struct tms tp; + + times(&tp); + return ((double) (tp.tms_stime)); +} + +/************************************************************************/ +/* Return the CPU (user + system) time in seconds as a double. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +static double +cputime_so_far(void) /* Return CPU time in seconds as double */ +#else +static double +cputime_so_far() +#endif +{ +#if 0 + struct tms tp; + + times(&tp); + return ((double) (tp.tms_utime + tp.tms_stime) / sc_clk_tck); +#else + struct rusage ru; + + if (getrusage (RUSAGE_SELF, &ru)) + perror ("getrusage"); + return ((double)(ru.ru_utime.tv_sec + ru.ru_stime.tv_sec) + + .000001 *(ru.ru_utime.tv_usec + ru.ru_stime.tv_usec)); +#endif +} +#endif + +/************************************************************************/ +/* Return the CPU utilization ((user + system) / walltime) as a percentage. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +static double +cpu_util(double cputime, double walltime) +#else +static double +cpu_util(cputime, walltime) +double cputime, walltime; +#endif +{ + double cpu; + + if (walltime <= (double)0.0) + { + cpu = (double)0.0; + return cpu; + } + if (cputime <= (double)0.0) + cputime = 0.0; + if (walltime <= (double)0.0) + cpu = (double)100.0; + else { + cpu = (((double)100.0 * cputime) / walltime); + /* + if (cpu > (double)100.0) + cpu = (double)99.99; + */ + } + return cpu; +} + +/************************************************************************/ +/* This is a locking function that permits the writes and */ +/* reads during the test to hold a file lock. Since each */ +/* tmp file that Iozone creates is a private file, this seems */ +/* like a no-op but it turns out that when using Iozone */ +/* over NFS, life is very, very different. Some vendors */ +/* read and write performance goes to zip when locks are held */ +/* even if there is only one process using the file and having */ +/* it locked. Some implementations of NFS transition from async */ +/* to fully sync reads and writes if any locks are used. Euck... */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +int +mylockf(int fd, int op, int rdwr) +#else +int +mylockf(fd, op, rdwr) +int fd, op, rdwr; +#endif +{ + struct flock myflock; + int ret; + if(op==0) /* Generic unlock the whole file */ + { + myflock.l_type=F_UNLCK; + myflock.l_whence=SEEK_SET; + myflock.l_start=0; + myflock.l_len=0; /* The whole file */ + myflock.l_pid=getpid(); + ret=fcntl(fd,F_SETLKW, &myflock); + } + else + /* Generic lock the whole file */ + { + if(rdwr==0) + myflock.l_type=F_WRLCK; /* Apply write lock */ + else + myflock.l_type=F_RDLCK; /* Apply read lock */ + myflock.l_whence=SEEK_SET; + myflock.l_start=0; + myflock.l_len=0; /* The whole file */ + myflock.l_pid=getpid(); + ret=fcntl(fd,F_SETLKW, &myflock); + } + return(ret); +} + +#ifdef HAVE_ANSIC_C +int +mylockr(int fd, int op, int rdwr, off64_t offset, off64_t size) +#else +int +mylockr(fd, op, rdwr, offset, size) +int fd, op, rdwr; +off64_t offset; +off64_t size; +#endif +{ + struct flock myflock; + int ret; + if(op==0) /* Generic unlock the whole file */ + { + /*printf("Child: %lld Unlock offset %lld size %lld\n",chid,offset,size);*/ + myflock.l_type=F_UNLCK; + myflock.l_whence=SEEK_SET; + myflock.l_start=offset; + myflock.l_len=size; /* The whole file */ + myflock.l_pid=getpid(); + ret=fcntl(fd,F_SETLKW, &myflock); + } + else + /* Generic lock the range */ + { + if(rdwr==0) + { + myflock.l_type=F_WRLCK; /* Apply write lock */ + /* printf("Write ");*/ + } + else + { + myflock.l_type=F_RDLCK; /* Apply read lock */ + /* printf("Read ");*/ + } + /*printf("Child: %lld Lock offset %lld size %lld\n",chid, offset,size);*/ + myflock.l_whence=SEEK_SET; + myflock.l_start=offset; + myflock.l_len=size; /* The whole file */ + myflock.l_pid=getpid(); + ret=fcntl(fd,F_SETLKW, &myflock); + } + return(ret); +} +/************************************************************************/ +/* This function is used to simulate compute time that does */ +/* not involve the I/O subsystem. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +float +do_compute(float comp_delay) +#else +float +do_compute(comp_delay) +float comp_delay; +#endif +{ + double starttime,tmptime; + if(comp_delay == (float)0.0) + return(0.0); + starttime=time_so_far(); + while(1) + { + tmptime=time_so_far()-starttime; + if(tmptime >= (double)comp_delay) + return(tmptime); + } + return(0.0); +} + +/************************************************************************/ +/* This function is intended to cause an interruption */ +/* in the read pattern. It will make a reader have */ +/* jitter in its access behavior. */ +/* When using direct I/O one must use a pagesize transfer. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +disrupt(int fd) +#else +void +disrupt(fd) +int fd; +#endif +{ + char *nbuff,*free_addr; + off64_t current; + + free_addr=nbuff=(char *)malloc((size_t)page_size+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + + /* Save current position */ + current = I_LSEEK(fd,0,SEEK_CUR); + + /* Move to beginning of file */ + I_LSEEK(fd,0,SEEK_SET); + + /* Read a little of the file */ + if(direct_flag) + junk=read(fd,nbuff,page_size); + else + junk=read(fd,nbuff,1); + + /* Skip into the file */ + I_LSEEK(fd,page_size,SEEK_SET); + + /* Read a little of the file */ + if(direct_flag) + junk=read(fd,nbuff,page_size); + else + junk=read(fd,nbuff,1); + + /* Restore current position in file, before disruption */ + I_LSEEK(fd,current,SEEK_SET); + free(free_addr); + +} + +#if defined(Windows) +/************************************************************************/ +/* This function is intended to cause an interruption */ +/* in the read pattern. It will make a reader have */ +/* jitter in its access behavior. */ +/* When using direct I/O one must use a pagesize transfer. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +disruptw(HANDLE hand) +#else +void +disruptw(HANDLE) +int hand; +#endif +{ + char *nbuff,*free_addr; + off64_t current; + long retval; + + free_addr=nbuff=(char *)malloc((size_t)page_size+page_size); + nbuff=(char *)(((long)nbuff+(long)page_size) & (long)~(page_size-1)); + + /* Save current position */ + current=SetFilePointer(hand,(LONG)0,0,FILE_CURRENT); + + /* Move to beginning of file */ + SetFilePointer(hand,(LONG)0,0,FILE_BEGIN); + + /* Read a little of the file */ + ReadFile(hand, nbuff, reclen,(LPDWORD)&retval,0); + + /* Skip into the file */ + SetFilePointer(hand,(LONG)page_size,0,FILE_BEGIN); + + /* Read a little of the file */ + ReadFile(hand, nbuff, reclen,(LPDWORD)&retval,0); + + /* Restore current position in file, before disruption */ + SetFilePointer(hand,(LONG)current,0,FILE_BEGIN); + free(free_addr); + +} +#endif + +/************************************************************************/ +/* Read a telemetry file and return the the offset */ +/* for the next operaton. Also, set the size */ +/* in the variable given in the param list. */ +/* which == 0 ... reader calling */ +/* which == 1 ... writer calling */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +long long +get_traj(FILE *traj_fd, long long *traj_size, float *delay, long which) +#else +long long +get_traj(traj_fd, traj_size, delay, which) +FILE *traj_fd; +long long *traj_size; +float *delay; +long which; +#endif +{ + long long traj_offset = 0; + long long tmp2 = 0; + int tmp = 0; + int tokens; + int ret=0; + char *ret1,*where; + char buf[200]; + char sbuf[200]; + int got_line; + + got_line=0; + + while(got_line==0) + { + tokens=0; + ret1=fgets(buf,200,traj_fd); + if(ret1==(char *)0) + { + printf("\n\n\tEarly end of telemetry file. Results not accurate.\n"); + signal_handler(); + } + where=(char *)&buf[0]; + strcpy(sbuf,buf); + if((*where=='#') || (*where=='\n')) + continue; + tokens++; + strtok(where," "); + while( (char *)(strtok( (char *)0," ")) != (char *)0) + { + tokens++; + } + got_line=1; + } + if(tokens == 3) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld %d\n",&traj_offset,&tmp2,&tmp); +#else + ret=sscanf(sbuf,"%lld %lld %d\n",&traj_offset,&tmp2,&tmp); +#endif + /*printf("\nReading %s trajectory with %d items\n",which?"write":"read",tokens);*/ + *traj_size=tmp2; + *delay= ((float)tmp/1000); + } + if(tokens == 2) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld\n",&traj_offset,traj_size); +#else + ret=sscanf(sbuf,"%lld %lld\n",&traj_offset,traj_size); +#endif + *delay=compute_time; + /*printf("\nReading %s trajectory with %d items\n",which?"write":"read",tokens);*/ + } + if((tokens != 2) && (tokens !=3)) + { + printf("\n\tInvalid entry in telemetry file. > %s <\n",sbuf); + exit(178); + } + if(ret==EOF) + { + printf("\n\n\tEarly end of telemetry file. Results not accurate.\n"); + signal_handler(); + } +#ifdef DEBUG +#ifdef NO_PRINT_LLD + if(!silent) printf("\nOffset %lld Size %ld Compute delay %f\n",traj_offset, *traj_size,*delay); +#else + if(!silent) printf("\nOffset %lld Size %lld Compute delay %f\n",traj_offset, *traj_size,*delay); +#endif +#endif + return(traj_offset); +} + +/************************************************************************/ +/* Open the read telemetry file and return file pointer. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +FILE * +open_r_traj(void) +#else +FILE * +open_r_traj() +#endif +{ + FILE *fd; + fd=fopen(read_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open read telemetry file \"%s\"\n", + read_traj_filename); + exit(174); + } + return(fd); +} + +/************************************************************************/ +/* Open the write telemetry file and return file pointer. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +FILE * +open_w_traj(void) +#else +FILE * +open_w_traj() +#endif +{ + FILE *fd; + fd=fopen(write_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open write telemetry file \"%s\"\n", + write_traj_filename); + exit(175); + } + return(fd); +} + +/************************************************************************/ +/* r_traj_size(void) */ +/* This function scans the read telemetry file */ +/* and establishes the number of entries */ +/* and the maximum file offset. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +void +r_traj_size(void) +#else +void +r_traj_size() +#endif +{ + FILE *fd; + int ret; + long long traj_offset = 0; + long long traj_size = 0; + long long max_offset = 0; + int tokens; + int dummy; + int lines; + char buf[200]; + char sbuf[200]; + char *ret1,*where; + + lines=0; + fd=fopen(read_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open read telemetry file \"%s\"\n", + read_traj_filename); + exit(174); + } + while(1) + { + tokens=0; + ret1=fgets(buf,200,fd); + if(ret1==(char *)0) + break; + where=(char *)&buf[0]; + strcpy(sbuf,buf); + lines++; + if((*where=='#') || (*where=='\n')) + continue; + tokens++; + strtok(where," "); + while( (char *)(strtok( (char *)0," ")) != (char *)0) + { + tokens++; + } + if(tokens==1) + { + printf("\n\tInvalid read telemetry file entry. Line %d", + lines); + signal_handler(); + } +#ifdef DEBUG + printf("Tokens = %d\n",tokens); +#endif + if(tokens==3) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld %d\n",&traj_offset,&traj_size,&dummy); +#else + ret=sscanf(sbuf,"%lld %lld %d\n",&traj_offset,&traj_size,&dummy); +#endif + } + if(tokens==2) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld\n",&traj_offset,&traj_size); +#else + ret=sscanf(sbuf,"%lld %lld\n",&traj_offset,&traj_size); +#endif + } + if((tokens != 2) && (tokens !=3)) + { + printf("\n\tInvalid read telemetry file. Line %d\n",lines); + exit(178); + } + if(traj_offset + traj_size > max_offset) + max_offset=traj_offset + traj_size; + + r_traj_ops++; + } + r_traj_fsize=max_offset; +#ifdef DEBUG + printf("File size of read %lld Item count %lld\n",r_traj_fsize,r_traj_ops); +#endif + fclose(fd); +} + +/************************************************************************/ +/* w_traj_size(void) */ +/* This function scans the write telemetry file */ +/* and establishes the number of entries */ +/* and the maximum file offset. */ +/************************************************************************/ +#ifdef HAVE_ANSIC_C +long long +w_traj_size(void) +#else +long long +w_traj_size() +#endif +{ + FILE *fd; + int ret; + long long traj_offset = 0; + long long traj_size = 0; + long long max_offset = 0; + int dummy; + int tokens,lines; + char *ret1; + char buf[200]; + char sbuf[200]; + char *where; + + lines=0; + + fd=fopen(write_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open write telemetry file \"%s\"\n", + write_traj_filename); + exit(174); + } + while(1) + { + tokens=0; + ret1=fgets(buf,200,fd); + if(ret1==(char *)0) + break; + lines++; + where=(char *)&buf[0]; + strcpy(sbuf,buf); + if((*where=='#') || (*where=='\n')) + continue; + tokens++; + strtok(where," "); + while( (char *)(strtok( (char *)0," ")) != (char *)0) + { + tokens++; + } + if(tokens==1) + { + printf("\n\tInvalid write telemetry file entry. Line %d\n", + lines); + signal_handler(); + } + if(tokens==3) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld %d\n",&traj_offset,&traj_size,&dummy); +#else + ret=sscanf(sbuf,"%lld %lld %d",&traj_offset,&traj_size,&dummy); +#endif + } + if(tokens==2) + { +#ifdef NO_PRINT_LLD + ret=sscanf(sbuf,"%ld %ld\n",&traj_offset,&traj_size); +#else + ret=sscanf(sbuf,"%lld %lld\n",&traj_offset,&traj_size); +#endif + } + if(tokens > 3) + { + printf("\n\tInvalid write telemetry file entry. Line %d\n", + lines); + exit(174); + } + if(traj_offset + traj_size > max_offset) + max_offset=traj_offset + traj_size; + + w_traj_ops++; + } + w_traj_fsize=max_offset; +#ifdef DEBUG + printf("File size of write %lld Item count %lld\n",w_traj_fsize,w_traj_ops); +#endif + fclose(fd); + return(max_offset); +} + +/************************************************************************/ +/* Find which version of the telemetry file format is in use. */ +/************************************************************************/ + +#ifdef HAVE_ANSIC_C +void +traj_vers(void) +#else +void +traj_vers() +#endif +{ + FILE *fd; + char *where; + char buf[200]; + int things; + char *ret1; + + if(r_traj_flag) + { + things=0; + fd=fopen(read_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open read telemetry file \"%s\"\n", read_traj_filename); + exit(174); + } +loop1: + ret1=fgets(buf,200,fd); + if(ret1==(char *)0) + { + fclose(fd); + return; + } + where=(char *)&buf[0]; + if((*where=='#') || (*where=='\n')) + goto loop1; + things++; + strtok(where," "); + while( (char *)(strtok( (char *)0," ")) != (char *)0) + { + things++; + } + r_traj_items=things; +#ifdef DEBUG + printf("Found %d items in the read telemetry file\n",things); +#endif + } + if(w_traj_flag) + { + things=0; + fd=fopen(write_traj_filename,"r"); + if(fd == (FILE *)0) + { + printf("Unable to open write telemetry file \"%s\"\n", write_traj_filename); + exit(174); + } +loop2: + ret1=fgets(buf,200,fd); + if(ret1==(char *)0) + { + fclose(fd); + return; + } + where=(char *)&buf[0]; + if((*where=='#') || (*where=='\n')) + goto loop2; + things++; + strtok(where," "); + while( (char *)(strtok( (char *)0," ")) != (char *)0) + { + things++; + } + fclose(fd); + w_traj_items=things; +#ifdef DEBUG + printf("Found %d items in the write telemetry file\n",things); +#endif + } +} + +/********************************************************************/ +/* */ +/* Today this initializes the default set of file sizes for Iozone. */ +/* in the future it may take input from the command line or */ +/* from a file. */ +/* */ +/********************************************************************/ +#ifdef HAVE_ANSIC_C +void +init_file_sizes( off64_t min_f_size, off64_t max_f_size) +#else +void +init_file_sizes(min_f_size, max_f_size) +off64_t min_f_size; +off64_t max_f_size; +#endif +{ + off64_t kilosi; + int x; + if(s_count > 1) + { + for(x=0; x < s_count; x++) + { + kilosi=s_range[x]; + add_file_size((off64_t)kilosi); + } + } + else + { + for(kilosi=min_f_size;kilosi<=max_f_size;kilosi*=multiplier) + { + add_file_size((off64_t)kilosi); + } + } +} + +/********************************************************************/ +/* Used to constuct the list of file sizes to test. */ +/********************************************************************/ +#ifdef HAVE_ANSIC_C +void +add_file_size(off64_t size) +#else +void +add_file_size(size) +off64_t size; +#endif +{ + struct size_entry *size_listp; + struct size_entry *nsize_list; + + size_listp=size_list; + + if(size_list) + { + if(size_listp->next) + while(size_listp->next!=0) + size_listp=size_listp->next; + } + nsize_list=(struct size_entry *)malloc(sizeof(struct size_entry)); + if(nsize_list==0) + { + printf("Malloc failed in add_file_size\n"); + exit(180); + } + nsize_list->next=0; + nsize_list->size=size; + if(size_list == 0) + size_list=nsize_list; + else + size_listp->next=nsize_list; + size_listp=size_list; +} + +/********************************************************************/ +/* Return the next file size to test. */ +/********************************************************************/ +#ifdef HAVE_ANSIC_C +off64_t +get_next_file_size(off64_t size) +#else +off64_t +get_next_file_size(size) +off64_t size; +#endif +{ + struct size_entry *size_listp; + + size_listp=size_list; + + for( ; size_listp ; size_listp=size_listp->next ) + { + if(size_listp->size > size) + return(size_listp->size); + } + return((off64_t)0); +} + + +/**********************************************************************/ +/* */ +/* Today this initializes the default set of record sizes for Iozone. */ +/* in the future it may take input from the command line or */ +/* from a file. */ +/* */ +/**********************************************************************/ +#ifdef HAVE_ANSIC_C +void +init_record_sizes( off64_t min_r_size, off64_t max_r_size) +#else +void +init_record_sizes(min_r_size, max_r_size) +off64_t min_r_size; +off64_t max_r_size; +#endif +{ + int x; + off64_t size; + if(r_count > 1) + { + for(x=0; x < r_count; x++) + { + size=r_range[x]; + add_record_size((off64_t)size); + } + } + else + { + for(size=min_r_size;size<=max_r_size;size*=multiplier) + { + add_record_size((off64_t)size); + } + } +} + +#ifdef HAVE_ANSIC_C +void +del_record_sizes(void) +#else +void +del_record_sizes() +#endif +{ + struct size_entry *size_listp; + struct size_entry *save_item; + + size_listp=rec_size_list; + if(rec_size_list) + { + while(size_listp!=0) + { + save_item=size_listp->next; + free(size_listp); + size_listp=save_item; + } + } + rec_size_list=0; +} + +/********************************************************************/ +/* Used to constuct the list of record sizes to test. */ +/********************************************************************/ +#ifdef HAVE_ANSIC_C +void +add_record_size(off64_t size) +#else +void +add_record_size(size) +off64_t size; +#endif +{ + struct size_entry *size_listp; + struct size_entry *nsize_list; + + size_listp=rec_size_list; + + if(rec_size_list) + { + if(size_listp->next) + while(size_listp->next!=0) + size_listp=size_listp->next; + } + nsize_list=(struct size_entry *)malloc(sizeof(struct size_entry)); + if(nsize_list==0) + { + printf("Malloc failed in add_file_size\n"); + exit(180); + } + nsize_list->next=0; + nsize_list->size=size; + if(rec_size_list == 0) + rec_size_list=nsize_list; + else + size_listp->next=nsize_list; + size_listp=rec_size_list; +} + +/********************************************************************/ +/* Return the next record size to test. */ +/********************************************************************/ +#ifdef HAVE_ANSIC_C +off64_t +get_next_record_size(off64_t size) +#else +off64_t +get_next_record_size(size) +off64_t size; +#endif +{ + struct size_entry *size_listp; + + size_listp=rec_size_list; + + for( ; size_listp ; size_listp=size_listp->next ) + { + if(size_listp->size > size) + return(size_listp->size); + } + return((off64_t)0); +} + + +/* + * Socket based communication mechanism. + * It's intended use is to be the communication mechanism + * that will be used to get Iozone to run across + * multiple clients. 1/11/2002 Don Capps + * The communication model permits a master to send and receive + * messages to and from clients, and for clients to be able to + * send and receive messages to and from the master. + */ +/* + * Interfaces are: + Master: + int start_master_listen(void) + Called to create masters listening port. + + void master_listen(int sock, int size_of_message) + Call when master wants to block and read + a message. + + int start_master_send(char *child_host_name, int port) + Call to start a send channel to a client. + + void master_send(int child_socket_val, char *host_name, + char *send_buffer, int send_size) + Call to send message to a client. + + void stop_master_listen(int master_socket_val) + Call to release the masters listening port. + + void stop_master_send(int child_socket_val) + Call to release the masters send port to a client. + + Clients: + int start_child_listen(int size_of_message) + Called to create clients listening port. + + void child_listen(int sock, int size_of_message) + Call when client wants to block and read + a message from the master. + + void child_send(int child_socket_val, char *controlling_host_name, + char *send_buffer, int send_size) + Call to send message to the master. + + void stop_child_listen(int child_socket_val) + Call to release the clients listening port. + + void stop_child_send(int child_socket_val) + Call to release the clients send port to the master. + + + Messages are sent in command blocks. The structure is + client_command for messages from the master to the + client, and master_command for messages sent from + a client to the master. +*/ + + +/* + * Allocate the master listening port that + * all children will use to send messages to the master. + */ +#ifdef HAVE_ANSIC_C +int +start_master_listen(void) +#else +int +start_master_listen() +#endif +{ + int s; + int rc; + int tmp_port; + int sockerr; + struct sockaddr_in addr; + int recv_buf_size=65536*4; + int optval=1; + struct linger dummy={1,0}; + + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + { + perror("socket failed:"); + exit(19); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_RCVBUF, (char *) + &recv_buf_size, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 1\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, (char *) + &optval, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 2\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_LINGER, (char *) + &dummy, sizeof(struct linger)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 2\n"); + } + tmp_port=HOST_LIST_PORT; + bzero(&addr, sizeof(struct sockaddr_in)); + addr.sin_port = htons(tmp_port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(s, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + addr.sin_port=htons(tmp_port); + continue; + } + master_listen_port = ntohs(addr.sin_port); + } + if(rc < 0) + { + perror("bind failed\n"); + exit(20); + } + + if(mdebug) + printf("Master listening on socket %d Port %d\n",s,tmp_port); + return(s); +} + +/* + * Master listens for messages and blocks until + * something arrives. + */ +struct sockaddr_in listener_sync_sock; + +#ifdef HAVE_ANSIC_C +void +master_listen(int sock, int size_of_message) +#else +void +master_listen(sock, size_of_message) +int sock, size_of_message; +#endif +{ + int tsize; + int s; + struct sockaddr_in *addr; + unsigned int me; + int ns,ret; + struct master_neutral_command *mnc; + + mnc=(struct master_neutral_command *)&master_rcv_buf[0]; + tsize = size_of_message; + addr=&listener_sync_sock; + s = sock; + me=sizeof(struct sockaddr_in); + + if(mdebug) + printf("Master in listening mode on socket %d\n",s); +again: + ret=listen(s,MAXSTREAMS); + if(ret != 0) + { + perror("Master: listen returned error\n"); + } + if(mdebug) + printf("Master in accepting connection\n"); + ns=accept(s,(void *)addr,&me); + if(ns < 0) + { + printf("Master socket %d\n",s); + perror("Master: ***** accept returned error *****\n"); + sleep(1); + goto again; + } + if(mdebug) + printf("Master in reading from connection\n"); + + ret=read(ns,mnc,tsize); + if(ret < tsize) + { + printf("Master read failed. Ret %d Errno %d\n",ret,errno); + } + + close(ns); +} + +/* + * Child sends message to master. + */ + +#ifdef HAVE_ANSIC_C +void +child_send(char *controlling_host_name, struct master_command *send_buffer, int send_size) +#else +void +child_send(controlling_host_name, send_buffer, send_size) +char *controlling_host_name; +struct master_command *send_buffer; +int send_size; +#endif +{ + + int rc,child_socket_val; + struct hostent *he; + int tmp_port; + struct in_addr *ip; + struct sockaddr_in cs_addr,cs_raddr; + struct master_neutral_command outbuf; + struct timespec req,rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + rem.tv_sec = 0; + rem.tv_nsec = 10000000; + + if(cdebug) + { + fprintf(newstdout,"Start_child_send: %s Size %d\n",controlling_host_name,send_size); + fflush(newstdout); + } + he = gethostbyname(controlling_host_name); + if (he == NULL) + { + exit(22); + } + ip = (struct in_addr *)he->h_addr_list[0]; + +over: + cs_raddr.sin_family = AF_INET; + cs_raddr.sin_port = htons(controlling_host_port); + cs_raddr.sin_addr.s_addr = ip->s_addr; + child_socket_val = socket(AF_INET, SOCK_STREAM, 0); + if (child_socket_val < 0) + { + perror("Child: socket failed:"); + exit(23); + } + bzero(&cs_addr, sizeof(struct sockaddr_in)); + tmp_port= CHILD_ESEND_PORT; + cs_addr.sin_port = htons(tmp_port); + cs_addr.sin_family = AF_INET; + cs_addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(child_socket_val, (struct sockaddr *)&cs_addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + cs_addr.sin_port=htons(tmp_port); + continue; + } + } + if (rc < 0) + { + perror("Child: bind failed\n"); + exit(24); + } + if(cdebug) + { + fprintf(newstdout,"Child sender bound to port %d Master port %d \n",tmp_port,HOST_LIST_PORT); + fflush(newstdout); + } +again: + nanosleep(&req,&rem); + rc = connect(child_socket_val, (struct sockaddr *)&cs_raddr, + sizeof(struct sockaddr_in)); + if (rc < 0) + { + if((ecount++ < 200) && (errno != EISCONN)) + { + nanosleep(&req,&rem); + /*sleep(1);*/ + goto again; + } + if(cdebug) + { + fprintf(newstdout,"Child: connect failed. Errno %d \n",errno); + fflush(newstdout); + } + close(child_socket_val); + nanosleep(&req,&rem); + /*sleep(1);*/ + ecount=0; + goto over; + } + ecount=0; + if(cdebug) + { + fprintf(newstdout,"Child connected\n"); + fflush(newstdout); + } + + /* NOW send */ + + bzero(&outbuf, sizeof(struct master_neutral_command)); + if(cdebug>=1) + { + fprintf(newstdout,"Child %d sending message to %s \n",(int)chid, controlling_host_name); + fflush(newstdout); + } + /* + * Convert internal commands to string format to neutral format for portability + */ + strcpy(outbuf.m_host_name,send_buffer->m_host_name); + strcpy(outbuf.m_client_name,send_buffer->m_client_name); + sprintf(outbuf.m_client_number,"%d",send_buffer->m_client_number); + sprintf(outbuf.m_client_error,"%d",send_buffer->m_client_error); + sprintf(outbuf.m_child_port,"%d",send_buffer->m_child_port); + sprintf(outbuf.m_child_async_port,"%d",send_buffer->m_child_async_port); + sprintf(outbuf.m_command,"%d",send_buffer->m_command); + sprintf(outbuf.m_testnum,"%d",send_buffer->m_testnum); + sprintf(outbuf.m_version,"%d",send_buffer->m_version); + sprintf(outbuf.m_mygen,"%d",send_buffer->m_mygen); + sprintf(outbuf.m_throughput,"%f",send_buffer->m_throughput); + sprintf(outbuf.m_cputime,"%f", send_buffer->m_cputime); + sprintf(outbuf.m_walltime,"%f",send_buffer->m_walltime); + sprintf(outbuf.m_stop_flag,"%d",send_buffer->m_stop_flag); + sprintf(outbuf.m_actual,"%f",send_buffer->m_actual); +#ifdef NO_PRINT_LLD + sprintf(outbuf.m_child_flag,"%ld",send_buffer->m_child_flag); +#else + sprintf(outbuf.m_child_flag,"%lld",send_buffer->m_child_flag); +#endif + rc=write(child_socket_val,&outbuf,sizeof(struct master_neutral_command)); + if (rc < 0) { + perror("write failed\n"); + exit(26); + } + close(child_socket_val); +} + + +/* + * Master sending message to a child + * There should be a unique child_socket_val for each + * child. + */ +#ifdef HAVE_ANSIC_C +void +master_send(int child_socket_val, char *host_name, struct client_command *send_buffer, int send_size) +#else +void +master_send(child_socket_val, host_name, send_buffer, send_size) +int child_socket_val; +char *host_name; +struct client_command *send_buffer; +int send_size; +#endif +{ + int rc; + struct client_neutral_command outbuf; + + bzero(&outbuf,sizeof(struct client_neutral_command)); + if(mdebug) + { + printf("Master_neutral_command size = %lu\n",(unsigned long)sizeof(struct master_neutral_command)); + printf("Client_neutral_command size = %lu\n",(unsigned long)sizeof(struct client_neutral_command)); + } + /* + * Convert internal commands to string format for neutral format/portability + */ + strcpy(outbuf.c_host_name,send_buffer->c_host_name); + strcpy(outbuf.c_pit_hostname,send_buffer->c_pit_hostname); + strcpy(outbuf.c_pit_service,send_buffer->c_pit_service); + strcpy(outbuf.c_client_name,send_buffer->c_client_name); + strcpy(outbuf.c_working_dir,send_buffer->c_working_dir); + strcpy(outbuf.c_file_name,send_buffer->c_file_name); + strcpy(outbuf.c_path_dir,send_buffer->c_path_dir); + strcpy(outbuf.c_execute_name,send_buffer->c_execute_name); + strcpy(outbuf.c_write_traj_filename,send_buffer->c_write_traj_filename); + strcpy(outbuf.c_read_traj_filename,send_buffer->c_read_traj_filename); + sprintf(outbuf.c_oflag,"%d",send_buffer->c_oflag); + sprintf(outbuf.c_mfflag,"%d",send_buffer->c_mfflag); + sprintf(outbuf.c_unbuffered,"%d",send_buffer->c_unbuffered); + sprintf(outbuf.c_noretest,"%d",send_buffer->c_noretest); + sprintf(outbuf.c_notruncate,"%d",send_buffer->c_notruncate); + sprintf(outbuf.c_read_sync,"%d",send_buffer->c_read_sync); + sprintf(outbuf.c_jflag,"%d",send_buffer->c_jflag); + sprintf(outbuf.c_async_flag,"%d",send_buffer->c_async_flag); + sprintf(outbuf.c_mmapflag,"%d",send_buffer->c_mmapflag); + sprintf(outbuf.c_k_flag,"%d",send_buffer->c_k_flag); + sprintf(outbuf.c_h_flag,"%d",send_buffer->c_h_flag); + sprintf(outbuf.c_mflag,"%d",send_buffer->c_mflag); + sprintf(outbuf.c_pflag,"%d",send_buffer->c_pflag); + sprintf(outbuf.c_stride_flag,"%d",send_buffer->c_stride_flag); + sprintf(outbuf.c_verify,"%d",send_buffer->c_verify); + sprintf(outbuf.c_sverify,"%d",send_buffer->c_sverify); + sprintf(outbuf.c_odsync,"%d",send_buffer->c_odsync); + sprintf(outbuf.c_diag_v,"%d",send_buffer->c_diag_v); + sprintf(outbuf.c_dedup,"%d",send_buffer->c_dedup); + sprintf(outbuf.c_dedup_interior,"%d",send_buffer->c_dedup_interior); + sprintf(outbuf.c_dedup_compress,"%d",send_buffer->c_dedup_compress); + sprintf(outbuf.c_dedup_mseed,"%d",send_buffer->c_dedup_mseed); + sprintf(outbuf.c_hist_summary,"%d",send_buffer->c_hist_summary); + sprintf(outbuf.c_op_rate,"%d",send_buffer->c_op_rate); + sprintf(outbuf.c_op_rate_flag,"%d",send_buffer->c_op_rate_flag); + sprintf(outbuf.c_Q_flag,"%d",send_buffer->c_Q_flag); + sprintf(outbuf.c_L_flag,"%d",send_buffer->c_L_flag); + sprintf(outbuf.c_include_flush,"%d",send_buffer->c_include_flush); + sprintf(outbuf.c_OPS_flag,"%d",send_buffer->c_OPS_flag); + sprintf(outbuf.c_mmapnsflag,"%d",send_buffer->c_mmapnsflag); + sprintf(outbuf.c_mmapssflag,"%d",send_buffer->c_mmapssflag); + sprintf(outbuf.c_mmapasflag,"%d",send_buffer->c_mmapasflag); + sprintf(outbuf.c_no_copy_flag,"%d",send_buffer->c_no_copy_flag); + sprintf(outbuf.c_include_close,"%d",send_buffer->c_include_close); + sprintf(outbuf.c_disrupt_flag,"%d",send_buffer->c_disrupt_flag); + sprintf(outbuf.c_compute_flag,"%d",send_buffer->c_compute_flag); + sprintf(outbuf.c_xflag,"%d",send_buffer->c_xflag); + sprintf(outbuf.c_MS_flag,"%d",send_buffer->c_MS_flag); + sprintf(outbuf.c_mmap_mix,"%d",send_buffer->c_mmap_mix); + sprintf(outbuf.c_Kplus_flag,"%d",send_buffer->c_Kplus_flag); + sprintf(outbuf.c_w_traj_flag,"%d",send_buffer->c_w_traj_flag); + sprintf(outbuf.c_r_traj_flag,"%d",send_buffer->c_r_traj_flag); + sprintf(outbuf.c_direct_flag,"%d",send_buffer->c_direct_flag); + sprintf(outbuf.c_cpuutilflag,"%d",send_buffer->c_cpuutilflag); + sprintf(outbuf.c_seq_mix,"%d",send_buffer->c_seq_mix); + sprintf(outbuf.c_client_number,"%d",send_buffer->c_client_number); + sprintf(outbuf.c_command,"%d",send_buffer->c_command); + sprintf(outbuf.c_testnum,"%d",send_buffer->c_testnum); + sprintf(outbuf.c_no_unlink,"%d",send_buffer->c_no_unlink); + sprintf(outbuf.c_no_write,"%d",send_buffer->c_no_write); + sprintf(outbuf.c_file_lock,"%d",send_buffer->c_file_lock); + sprintf(outbuf.c_rec_lock,"%d",send_buffer->c_rec_lock); + sprintf(outbuf.c_Kplus_readers,"%d",send_buffer->c_Kplus_readers); + sprintf(outbuf.c_multiplier,"%d",send_buffer->c_multiplier); + sprintf(outbuf.c_share_file,"%d",send_buffer->c_share_file); + sprintf(outbuf.c_pattern,"%d",send_buffer->c_pattern); + sprintf(outbuf.c_version,"%d",send_buffer->c_version); + sprintf(outbuf.c_base_time,"%d",send_buffer->c_base_time); + sprintf(outbuf.c_num_child,"%d",send_buffer->c_num_child); + sprintf(outbuf.c_pct_read,"%d",send_buffer->c_pct_read); + sprintf(outbuf.c_advise_op,"%d",send_buffer->c_advise_op); + sprintf(outbuf.c_advise_flag,"%d",send_buffer->c_advise_flag); + sprintf(outbuf.c_restf,"%d",send_buffer->c_restf); + sprintf(outbuf.c_mygen,"%d",send_buffer->c_mygen); +#ifdef NO_PRINT_LLD + sprintf(outbuf.c_stride,"%ld",send_buffer->c_stride); + sprintf(outbuf.c_rest_val,"%ld",send_buffer->c_rest_val); + sprintf(outbuf.c_delay,"%ld",send_buffer->c_delay); + sprintf(outbuf.c_purge,"%ld",send_buffer->c_purge); + sprintf(outbuf.c_fetchon,"%ld",send_buffer->c_fetchon); + sprintf(outbuf.c_numrecs64,"%ld",send_buffer->c_numrecs64); + sprintf(outbuf.c_reclen,"%ld",send_buffer->c_reclen); + sprintf(outbuf.c_child_flag,"%ld",send_buffer->c_child_flag); + sprintf(outbuf.c_delay_start,"%ld",send_buffer->c_delay_start); + sprintf(outbuf.c_depth,"%ld",send_buffer->c_depth); +#else + sprintf(outbuf.c_delay,"%lld",send_buffer->c_delay); + sprintf(outbuf.c_stride,"%lld",send_buffer->c_stride); + sprintf(outbuf.c_rest_val,"%lld",send_buffer->c_rest_val); + sprintf(outbuf.c_purge,"%lld",send_buffer->c_purge); + sprintf(outbuf.c_fetchon,"%lld",send_buffer->c_fetchon); + sprintf(outbuf.c_numrecs64,"%lld",send_buffer->c_numrecs64); + sprintf(outbuf.c_reclen,"%lld",send_buffer->c_reclen); + sprintf(outbuf.c_child_flag,"%lld",send_buffer->c_child_flag); + sprintf(outbuf.c_delay_start,"%lld",send_buffer->c_delay_start); + sprintf(outbuf.c_depth,"%lld",send_buffer->c_depth); +#endif + sprintf(outbuf.c_stop_flag,"%d",send_buffer->c_stop_flag); + sprintf(outbuf.c_compute_time,"%f",send_buffer->c_compute_time); + + if(mdebug >= 1) + printf("Master sending message to %s \n",host_name); + /*rc = send(child_socket_val, (char *)&outbuf, sizeof(struct client_neutral_command), 0);*/ + rc = write(child_socket_val, (char *)&outbuf, sizeof(struct client_neutral_command)); + if (rc < 0) + { + perror("write failed\n"); + exit(26); + } +} + +/* + * Close the childs listening port for messages from the master. + */ +#ifdef HAVE_ANSIC_C +void +stop_child_listen(int child_socket_val) +#else +void +stop_child_listen(child_socket_val) +int child_socket_val; +#endif +{ + close(child_socket_val); +} + +/* + * Close the childs channel for sending messages to the master. + */ +#ifdef HAVE_ANSIC_C +void +O_stop_child_send(int child_socket_val) +#else +void +O_stop_child_send(child_socket_val) +int child_socket_val; +#endif +{ + close(child_socket_val); +} + +/* + * Close the masters listening channel for all clients messages. + */ +#ifdef HAVE_ANSIC_C +void +stop_master_listen(int master_socket_val) +#else +void +stop_master_listen(master_socket_val) +int master_socket_val; +#endif +{ + if(mdebug) + printf("Stop master listen\n"); +/* + shutdown(master_socket_val,SHUT_RDWR); +*/ + close(master_socket_val); + master_socket_val = 0; +} + +/* + * Close the masters send channel a particular child. + */ +#ifdef HAVE_ANSIC_C +void +stop_master_send(int child_socket_val) +#else +void +stop_master_send(child_socket_val) +int child_socket_val; +#endif +{ + close(child_socket_val); +} + +/* + * Start the childs listening service for messages from the master. + */ +#ifdef HAVE_ANSIC_C +int +start_child_listen(int size_of_message) +#else +int +start_child_listen(size_of_message) +int size_of_message; +#endif +{ + int tsize; + int s; + int rc; + int xx; + int tmp_port; + int sockerr; + int recv_buf_size=65536; + int optval=1; + struct linger dummy={1,0}; + xx = 0; + tsize=size_of_message; /* Number of messages to receive */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + { + perror("socket failed:"); + exit(19); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_RCVBUF, (char *) + &recv_buf_size, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 3\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, (char *) + &optval, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 4\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_LINGER, (char *) + &dummy, sizeof(struct linger)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 4\n"); + } + bzero(&child_sync_sock, sizeof(struct sockaddr_in)); + tmp_port=CHILD_LIST_PORT+chid; + child_sync_sock.sin_port = htons(tmp_port); + child_sync_sock.sin_family = AF_INET; + child_sync_sock.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(s, (struct sockaddr *)&child_sync_sock, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + child_sync_sock.sin_port=htons(tmp_port); + continue; + } + } + child_port = ntohs(child_sync_sock.sin_port); + if(cdebug ==1) + { + fprintf(newstdout,"Child %d: Listen: Bound at port %d\n",(int)chid, tmp_port); + fflush(newstdout); + } + if(rc < 0) + { + fprintf(newstdout,"Child bind failed. Errno %d\n",errno); + fflush(newstdout); + exit(20); + } + return(s); +} +#ifdef HAVE_ANSIC_C +int +child_attach(int s, int flag) +#else +int +child_attach(s, flag) +int s,flag; +#endif +{ + unsigned int me; + int ns; + struct sockaddr_in *addr; + if(flag) + { + addr=&child_async_sock; + if(cdebug) + { + fprintf(newstdout,"Child %d attach async\n",(int)chid); + fflush(newstdout); + } + } + else + { + addr=&child_sync_sock; + if(cdebug) + { + fprintf(newstdout,"Child %d attach sync\n",(int)chid); + fflush(newstdout); + } + } + me=sizeof(struct sockaddr_in); + if(cdebug) + { + printf("Child %d enters listen\n",(int)chid); + fflush(stdout); + } + listen(s,10); + if(cdebug) + { + fprintf(newstdout,"Child %d enters accept\n",(int)chid); + fflush(newstdout); + } + ns=accept(s,(void *)addr,&me); + if(cdebug) + { + fprintf(newstdout,"Child %d attached for receive. Sock %d %d\n", + (int)chid, ns,errno); + fflush(newstdout); + } + return(ns); +} + + +/* + * The clients use this to block waiting for a message from + * the master. + */ +#ifdef HAVE_ANSIC_C +void +child_listen(int sock, int size_of_message) +#else +void +child_listen(sock, size_of_message) +int sock, size_of_message; +#endif +{ + int tsize; + int rcvd; + int s; + int rc; + char *cnc; + + cnc = (char *)&child_rcv_buf[0]; + bzero(cnc, sizeof(child_rcv_buf)); + s = sock; + tsize=size_of_message; /* Number of messages to receive */ + rcvd = 0; + while(rcvd < tsize) + { + if(cdebug ==1) + { + fprintf(newstdout,"Child %d In recieve \n",(int)chid); + fflush(newstdout); + } + rc=read(s,cnc,size_of_message); + if(rc < 0) + { + fprintf(newstdout,"Read failed. Errno %d \n",errno); + fflush(newstdout); + exit(21); + } + if(cdebug >= 1) + { + fprintf(newstdout,"Child %d: Got %d bytes\n",(int)chid, rc); + fflush(newstdout); + } + rcvd+=rc; + cnc+=rc; + } + if(cdebug >= 1) + { + fprintf(newstdout,"Child %d: return from listen\n",(int)chid); + fflush(newstdout); + } +} +/* + * Start the childs async listening service for messages from the master. + */ +#ifdef HAVE_ANSIC_C +int +start_child_listen_async(int size_of_message) +#else +int +start_child_listen_async(size_of_message) +int size_of_message; +#endif +{ + int tsize; + int s; + int rc; + int xx; + int tmp_port; + int sockerr; + int recv_buf_size=65536; + int optval=1; + xx = 0; + tsize=size_of_message; /* Number of messages to receive */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + { + perror("socket failed:"); + exit(19); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_RCVBUF, (char *) + &recv_buf_size, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 5\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, (char *) + &optval, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 6\n"); + } + bzero(&child_async_sock, sizeof(struct sockaddr_in)); + tmp_port=CHILD_ALIST_PORT; + child_async_sock.sin_port = htons(tmp_port); + child_async_sock.sin_family = AF_INET; + child_async_sock.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(s, (struct sockaddr *)&child_async_sock, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + child_async_sock.sin_port=htons(tmp_port); + continue; + } + } + child_async_port = ntohs(child_async_sock.sin_port); + if(cdebug ==1) + { + fprintf(newstdout,"Child %d: Async Listen: Bound at port %d\n", + (int)chid,tmp_port); + fflush(newstdout); + } + if(rc < 0) + { + fprintf(newstdout,"bind failed. Errno %d \n",errno); + fflush(newstdout); + exit(20); + } + return(s); +} +/* + * The clients use this to block waiting for an async message from + * the master. + */ +#ifdef HAVE_ANSIC_C +void +child_listen_async(int sock, int size_of_message) +#else +void +child_listen_async(sock, size_of_message) +int sock, size_of_message; +#endif +{ + int tsize; + int rcvd; + int s; + int rc; + char *cnc; + + cnc = &child_async_rcv_buf[0]; + s = sock; + tsize=size_of_message; /* Number of messages to receive */ + rcvd = 0; + while(rcvd < tsize) + { + if(cdebug ==1) + { + fprintf(newstdout,"Child %d In async recieve \n",(int)chid); + fflush(newstdout); + } + rc=read(s,cnc,size_of_message); + if(rc < 0) + { + fprintf(newstdout,"Read failed. Errno %d \n",errno); + fflush(newstdout); + exit(21); + } + /* Special case. If master gets final results, it can + exit, and close the connection to the async child + too quickly. When this happens the child gets a + read() that returns 0. It just needs to exit here. + */ + if(rc==0) + exit(0); + if(cdebug >= 1) + { + fprintf(newstdout,"Child %d: Got %d bytes (async) \n",(int)chid,rc); + fflush(newstdout); + } + rcvd+=rc; + cnc+=rc; + } + if(cdebug >= 1) + { + fprintf(newstdout,"Child %d: return from async listen\n",(int)chid); + fflush(newstdout); + } +} + +/* + * Start the channel for the master to send a message to + * a particular child on a particular port that the child + * has created for the parent to use to communicate. + */ +#ifdef HAVE_ANSIC_C +int +start_master_send(char *child_host_name, int child_port, struct in_addr *my_s_addr) +#else +int +start_master_send(child_host_name, child_port, my_s_addr) +char *child_host_name; +int child_port; +struct in_addr *my_s_addr; +#endif +{ + int rc,master_socket_val; + struct sockaddr_in addr,raddr; + struct hostent *he; + int port,tmp_port; + int ecount = 0; + struct in_addr *ip; + struct timespec req,rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + rem.tv_sec = 0; + rem.tv_nsec = 10000000; + + he = gethostbyname(child_host_name); + if (he == NULL) + { + printf("Master: Bad hostname >%s<\n",child_host_name); + fflush(stdout); + exit(22); + } + if(mdebug ==1) + { + printf("Master: start master send: %s\n", he->h_name); + fflush(stdout); + } + ip = (struct in_addr *)he->h_addr_list[0]; +#ifndef UWIN + if(mdebug ==1) + { + printf("Master: child name: %s\n", (char *)inet_ntoa(*ip)); + printf("Master: child Port: %d\n", child_port); + fflush(stdout); + } +#endif + + port=child_port; + my_s_addr->s_addr = ip->s_addr; + /*port=CHILD_LIST_PORT;*/ + + raddr.sin_family = AF_INET; + raddr.sin_port = htons(port); + raddr.sin_addr.s_addr = ip->s_addr; + master_socket_val = socket(AF_INET, SOCK_STREAM, 0); + if (master_socket_val < 0) + { + perror("Master: socket failed:"); + exit(23); + } + bzero(&addr, sizeof(struct sockaddr_in)); + tmp_port=HOST_ESEND_PORT; + addr.sin_port = htons(tmp_port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(master_socket_val, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + addr.sin_port=htons(tmp_port); + continue; + } + } + if(mdebug ==1) + { + printf("Master: Bound port\n"); + fflush(stdout); + } + if (rc < 0) + { + perror("Master: bind failed for sync channel to child.\n"); + exit(24); + } + nanosleep(&req,&rem); +again: + rc = connect(master_socket_val, (struct sockaddr *)&raddr, + sizeof(struct sockaddr_in)); + if (rc < 0) + { + if(ecount++ < 300) + { + nanosleep(&req,&rem); + /*sleep(1);*/ + goto again; + } + perror("Master: connect failed\n"); + printf("Error %d\n",errno); + exit(25); + } + if(mdebug ==1) + { + printf("Master Connected\n"); + fflush(stdout); + } + return (master_socket_val); +} +/* + * Start the channel for the master to send a message to + * a particular child on a particular port that the child + * has created for the parent to use to communicate. + */ +#ifdef HAVE_ANSIC_C +int +start_master_send_async(char *child_host_name, int child_port, struct in_addr my_s_addr) +#else +int +start_master_send_async(child_host_name, child_port, my_s_addr) +char *child_host_name; +int child_port; +struct in_addr my_s_addr; +#endif +{ + int rc,master_socket_val; + struct sockaddr_in addr,raddr; + int port,tmp_port; + int ecount = 0; + struct timespec req,rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + rem.tv_sec = 0; + rem.tv_nsec = 10000000; + + + port=child_port; + nanosleep(&req,&rem); + +over: + raddr.sin_family = AF_INET; + raddr.sin_port = htons(port); + raddr.sin_addr.s_addr = my_s_addr.s_addr; + master_socket_val = socket(AF_INET, SOCK_STREAM, 0); + if (master_socket_val < 0) + { + perror("Master: async socket failed:"); + exit(23); + } + bzero(&addr, sizeof(struct sockaddr_in)); + tmp_port=HOST_ASEND_PORT; + addr.sin_port = htons(tmp_port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(master_socket_val, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + addr.sin_port=htons(tmp_port); + continue; + } + } + if(mdebug ==1) + { + printf("Master: Bound async port\n"); + fflush(stdout); + } + if (rc < 0) + { + perror("Master: bind async failed\n"); + exit(24); + } +again: + + rc = connect(master_socket_val, (struct sockaddr *)&raddr, + sizeof(struct sockaddr_in)); + if (rc < 0) + { + if(ecount++ < 300) + { + /* Really need this sleep for Windows */ +#if defined (Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + goto again; + } + perror("Master: async connect failed\n"); + close(master_socket_val); +#if defined (Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + /*sleep(1);*/ + ecount=0; + goto over; + } + if(mdebug ==1) + { + printf("Master async Connected\n"); + fflush(stdout); + } + return (master_socket_val); +} + +/* + * If not "distributed" then call fork. The "distributed" + * will start iozone on a remote node. + */ +#ifdef HAVE_ANSIC_C +long long +start_child_proc(int testnum,long long numrecs64, long long reclen) +#else +long long +start_child_proc(testnum, numrecs64, reclen) +int testnum; +long long numrecs64, reclen; +#endif +{ + long long x; + if(distributed && master_iozone) + { + x=(long long)pick_client(testnum,numrecs64, reclen); + } + else + { + x=(long long)fork(); + } + if(mdebug) + printf("Starting proc %d\n",(int)x); + return(x); +} + +/* + * This function picks a client from the list of clients and + * starts it running on the remote machine. It also waits for + * the remote process to join and then sends the client + * the state information it needs to begin to run the + * test. The client will initialize its state space, + * begin the test and block as the barrier waiting + * for the master to say go. + */ +#ifdef HAVE_ANSIC_C +int +pick_client(int testnum,long long numrecs64, long long reclen) +#else +int +pick_client(testnum, numrecs64, reclen) +int testnum; +long long numrecs64, reclen; +#endif +{ + int x; + int c_command,child_index; + struct client_command cc; + struct master_command mc; + struct master_neutral_command *mnc; + char command[512]; + struct in_addr my_s_addr; + char my_port_num[10]; + + + bzero(&cc,sizeof(struct client_command)); + for(x=0;x<512;x++) + command[x]=0; + + current_client_number++; /* Need to start with 1 */ + x=current_client_number; + + child_idents[x-1].state = C_STATE_ZERO; + /* Step 1. Now start client going on remote node. */ + + find_remote_shell(remote_shell); + sprintf(command,"%s ",remote_shell); + strcat(command,child_idents[x-1].child_name); + strcat(command," -n '"); + strcat(command,child_idents[x-1].execute_path); + strcat(command," -+s -t 1 -r 4 -s 4 -+c "); + strcat(command,controlling_host_name); + if (master_listen_port != HOST_LIST_PORT) + { + sprintf(my_port_num," -+i %d",master_listen_port); + strcat(command,my_port_num); + } + strcat(command," '"); + junk=system(command); +/* + system("remsh rsnperf '/home/capps/niozone/iozone -+s -t 1 -r 4 -s 8 -+c rsnperf'"); + +*/ + if(mdebug) + printf("%s",command); + /* Format example: */ + /* */ + /* system("remsh rsnperf '/home/capps/niozone/iozone */ + /* -+s -t 1 -r 4 -s 8 -+c rsnperf'"); */ + /* */ + + /* Step 2. Wait for join from new client. */ + + child_idents[x-1].state = C_STATE_WAIT_WHO; + + if(mdebug>=1) + printf("\nMaster listening for child to send join message.\n"); + master_listen(master_listen_socket,sizeof(struct master_neutral_command)); + mnc = (struct master_neutral_command *)&master_rcv_buf[0]; + + /* + * Convert from string format back to internal representation + */ + sscanf(mnc->m_child_port,"%d",&mc.m_child_port); + sscanf(mnc->m_child_async_port,"%d",&mc.m_child_async_port); + sscanf(mnc->m_command,"%d",&mc.m_command); + sscanf(mnc->m_version,"%d",&mc.m_version); + if(mc.m_version != proto_version) + { + printf("Client > %s < is not running the same version of Iozone !! C%d M%d\n", child_idents[x-1].child_name, mc.m_version, proto_version); + } + + c_port = mc.m_child_port; + a_port = mc.m_child_async_port; + c_command = mc.m_command; + if(mdebug>=1) + { + printf("Master back from listen child Joined.\n"); + printf("Master: Command %d\n",c_command); + } + /* Step 3. Then start_master_send() for this client. */ + + if(mdebug>=1) + printf("Starting master send channel\n"); + master_send_sockets[x-1]= start_master_send(child_idents[x-1].child_name,c_port, + &my_s_addr); + if(mdebug>=1) + printf("Starting master send async channel\n"); + master_send_async_sockets[x-1]= start_master_send_async(child_idents[x-1].child_name,a_port, + my_s_addr); + + child_idents[x-1].master_socket_num = master_send_sockets[x-1]; + child_idents[x-1].master_async_socket_num = master_send_async_sockets[x-1]; + child_idents[x-1].child_number = x-1; + child_idents[x-1].child_port = c_port; + child_idents[x-1].child_async_port = a_port; + + /* */ + /* Step 4. Send message to client telling him his name, number, */ + /* rsize, fsize, and test to run. */ + strcpy(cc.c_host_name ,controlling_host_name); + strcpy(cc.c_pit_hostname ,pit_hostname); + strcpy(cc.c_pit_service ,pit_service); + strcpy(cc.c_client_name ,child_idents[x-1].child_name); + strcpy(cc.c_working_dir ,child_idents[x-1].workdir); + strcpy(cc.c_file_name ,child_idents[x-1].file_name); + strcpy(cc.c_write_traj_filename ,write_traj_filename); + strcpy(cc.c_read_traj_filename ,read_traj_filename); + cc.c_command = R_JOIN_ACK; + cc.c_client_number = x-1; + cc.c_testnum = testnum; + cc.c_numrecs64 = numrecs64; + cc.c_reclen = reclen; + cc.c_oflag = oflag; + cc.c_mfflag = mfflag; + cc.c_unbuffered = unbuffered; + cc.c_noretest = noretest; + cc.c_notruncate = notruncate; + cc.c_read_sync = read_sync; + cc.c_jflag = jflag; + cc.c_direct_flag = direct_flag; + cc.c_cpuutilflag = cpuutilflag; + cc.c_seq_mix = seq_mix; + cc.c_async_flag = async_flag; + cc.c_k_flag = k_flag; + cc.c_h_flag = h_flag; + cc.c_mflag = mflag; + cc.c_pflag = pflag; + cc.c_stride_flag = stride_flag; + cc.c_fetchon = fetchon; + cc.c_verify = verify; + cc.c_sverify = sverify; + cc.c_odsync = odsync; + cc.c_diag_v = diag_v; + cc.c_dedup = dedup; + cc.c_dedup_interior = dedup_interior; + cc.c_dedup_compress = dedup_compress; + cc.c_dedup_mseed = dedup_mseed; + cc.c_hist_summary = hist_summary; + cc.c_op_rate = op_rate; + cc.c_op_rate_flag = op_rate_flag; + cc.c_file_lock = file_lock; + cc.c_rec_lock = rlocking; + cc.c_Kplus_readers = Kplus_readers; + cc.c_multiplier = multiplier; + cc.c_share_file = share_file; + cc.c_pattern = pattern; + cc.c_version = proto_version; + cc.c_base_time = base_time; + cc.c_num_child = (int)num_child; + cc.c_pct_read = pct_read; + cc.c_advise_op = advise_op; + cc.c_advise_flag = advise_flag; + cc.c_restf = restf; + cc.c_mygen = mygen; + cc.c_Q_flag = Q_flag; + cc.c_L_flag = L_flag; + cc.c_xflag = xflag; + cc.c_w_traj_flag = w_traj_flag; + cc.c_r_traj_flag = r_traj_flag; + cc.c_include_flush = include_flush; + cc.c_OPS_flag = OPS_flag; + cc.c_purge = purge; + cc.c_mmapflag = mmapflag; + cc.c_mmapasflag = mmapasflag; + cc.c_mmapnsflag = mmapnsflag; + cc.c_mmapssflag = mmapssflag; + cc.c_no_copy_flag = no_copy_flag; + cc.c_no_unlink = no_unlink; + cc.c_no_write = no_write; + cc.c_include_close = include_close; + cc.c_disrupt_flag = disrupt_flag; + cc.c_compute_flag = compute_flag; + cc.c_delay = delay; + cc.c_stride = stride; + cc.c_rest_val = rest_val; + cc.c_delay_start = delay_start; + cc.c_compute_time = compute_time; + cc.c_depth = depth; + cc.c_MS_flag = MS_flag; + cc.c_mmap_mix = mmap_mix; + cc.c_Kplus_flag = Kplus_flag; + + + if(mdebug) + printf("Master sending client who he is\n"); + master_send(master_send_sockets[x-1],cc.c_client_name, &cc,sizeof(struct client_command)); + + child_idents[x-1].state = C_STATE_WAIT_BARRIER; + + /* */ + /* Step 5. Wait until you receive message that the chile is at */ + /* the barrier. */ + if(mdebug>=1) + printf("Master listening for child to send at barrier message.\n"); + master_listen(master_listen_socket,sizeof(struct master_neutral_command)); + mnc = (struct master_neutral_command *)&master_rcv_buf[0]; + /* + * Convert from string back to arch specific + */ + sscanf(mnc->m_client_number,"%d",&mc.m_client_number); +#ifdef NO_PRINT_LLD + sscanf(mnc->m_child_flag,"%ld",&mc.m_child_flag); +#else + sscanf(mnc->m_child_flag,"%lld",&mc.m_child_flag); +#endif + + child_index = mc.m_client_number; + child_stat = (struct child_stats *)&shmaddr[child_index]; + child_stat->flag = (long long)(mc.m_child_flag); + if(mdebug>=1) + printf("Master sees child %d at barrier message.\n",child_index); + + return(x); /* Tell code above that it is the parent returning */ +} + +/****************************************************************************************/ +/* This is the code that the client will use when it */ +/* gets started via remote shell. It is activated by the -+c controller_name option. */ +/* */ +/* The steps to this process are: */ +/* 1. Start client receive channel */ +/* 2. Start client send channel */ +/* 3. Send message to controller saying I'm joining. */ +/* 4. Go into a loop and get all instructions from */ +/* 5. Get state information from the master */ +/* 6. Change to the working directory */ +/* 7. Run the test */ +/* 8. Release the listen and send sockets to the master */ +/* */ +/****************************************************************************************/ +#ifdef HAVE_ANSIC_C +void +become_client(void) +#else +void +become_client() +#endif +{ + int x,testnum; + struct master_command mc; + struct client_command cc; + struct client_neutral_command *cnc; + char client_name[100]; + char *workdir; + + bzero(&mc,sizeof(struct master_command)); + x=fork(); /* Become a daemon so that remote shell will return. */ + if(x != 0) + exit(0); + /* + * I am the child + */ + (void)gethostname(client_name,100); + + fflush(stdout); + fflush(stderr); + if(cdebug) + { + newstdin=freopen("/tmp/don_in","r+",stdin); + newstdout=freopen("/tmp/don_out","a+",stdout); + newstderr=freopen("/tmp/don_err","a+",stderr); + } + else + { + fclose(stdin); + fclose(stdout); + fclose(stderr); + } + if(cdebug>=1) + { + fprintf(newstdout,"My name = %s, Controller's name = %s\n",client_name, controlling_host_name); + fflush(newstdout); + } + + /* 1. Start client receive channel */ + + l_sock = start_child_listen(sizeof(struct client_neutral_command)); + l_async_sock = start_child_listen_async(sizeof(struct client_neutral_command)); + + /* 2. Send message to controller saying I'm joining. */ + + strcpy(mc.m_host_name,controlling_host_name); + strcpy(mc.m_client_name,client_name); + mc.m_child_port = child_port; + mc.m_child_async_port = child_async_port; + mc.m_command = R_CHILD_JOIN; + mc.m_version = proto_version; + + if(cdebug) + { + fprintf(newstdout,"Child %s sends JOIN to master %s Host Port %d\n", + client_name,controlling_host_name,controlling_host_port); + fflush(newstdout); + } + child_send(controlling_host_name,(struct master_command *)&mc, sizeof(struct master_command)); + + l_sock=child_attach(l_sock,0); + l_async_sock=child_attach(l_async_sock,1); + + /* 4. Go into a loop and get all instructions from */ + /* the controlling process. */ + + if(cdebug>=1) + { + fprintf(newstdout,"Child %s waiting for who am I\n",client_name); + fflush(newstdout); + } + child_listen(l_sock,sizeof(struct client_neutral_command)); + cnc = (struct client_neutral_command *)&child_rcv_buf; + bzero(&cc, sizeof(struct client_command)); + + /* Convert from string format to arch format */ + sscanf(cnc->c_command,"%d",&cc.c_command); + sscanf(cnc->c_client_name,"%s",cc.c_client_name); + sscanf(cnc->c_client_number,"%d",&cc.c_client_number); + sscanf(cnc->c_host_name,"%s",cc.c_host_name); + sscanf(cnc->c_pit_hostname,"%s",cc.c_pit_hostname); + + if(cc.c_command == R_TERMINATE || cc.c_command==R_DEATH) + { + if(cdebug) + { + fprintf(newstdout,"Child %d received terminate on sync channel !!\n",(int)chid); + fflush(newstdout); + } + exit(1); + } + + if(cdebug) + { + fprintf(newstdout,"Child sees: \n Client name %s \n Client_num # %d \n Host_name %s\n" + ,cc.c_client_name,cc.c_client_number,cc.c_host_name); + fflush(newstdout); + } + + /* + * Now import all of the values of the flags that the child on this + * machine needs to be able to run the test requested. + */ + + /* 5. Get state information from the master */ + +#ifdef NO_PRINT_LLD + sscanf(cnc->c_numrecs64,"%ld",&cc.c_numrecs64); + sscanf(cnc->c_reclen,"%ld",&cc.c_reclen); + sscanf(cnc->c_fetchon,"%ld",&cc.c_fetchon); + sscanf(cnc->c_purge,"%ld",&cc.c_purge); + sscanf(cnc->c_delay,"%ld",&cc.c_delay); + sscanf(cnc->c_stride,"%ld",&cc.c_stride); + sscanf(cnc->c_rest_val,"%ld",&cc.c_rest_val); + sscanf(cnc->c_delay_start,"%ld",&cc.c_delay_start); + sscanf(cnc->c_depth,"%ld",&cc.c_depth); +#else + sscanf(cnc->c_numrecs64,"%lld",&cc.c_numrecs64); + sscanf(cnc->c_reclen,"%lld",&cc.c_reclen); + sscanf(cnc->c_fetchon,"%lld",&cc.c_fetchon); + sscanf(cnc->c_purge,"%lld",&cc.c_purge); + sscanf(cnc->c_delay,"%lld",&cc.c_delay); + sscanf(cnc->c_stride,"%lld",&cc.c_stride); + sscanf(cnc->c_rest_val,"%lld",&cc.c_rest_val); + sscanf(cnc->c_delay_start,"%lld",&cc.c_delay_start); + sscanf(cnc->c_depth,"%lld",&cc.c_depth); +#endif + sscanf(cnc->c_pit_hostname,"%s",cc.c_pit_hostname); + sscanf(cnc->c_pit_service,"%s",cc.c_pit_service); + sscanf(cnc->c_testnum,"%d",&cc.c_testnum); + sscanf(cnc->c_client_number,"%d",&cc.c_client_number); + sscanf(cnc->c_working_dir,"%s",cc.c_working_dir); + sscanf(cnc->c_file_name,"%s",cc.c_file_name); + sscanf(cnc->c_write_traj_filename,"%s",cc.c_write_traj_filename); + sscanf(cnc->c_read_traj_filename,"%s",cc.c_read_traj_filename); + sscanf(cnc->c_noretest,"%d",&cc.c_noretest); + sscanf(cnc->c_notruncate,"%d",&cc.c_notruncate); + sscanf(cnc->c_read_sync,"%d",&cc.c_read_sync); + sscanf(cnc->c_jflag,"%d",&cc.c_jflag); + sscanf(cnc->c_direct_flag,"%d",&cc.c_direct_flag); + sscanf(cnc->c_cpuutilflag,"%d",&cc.c_cpuutilflag); + sscanf(cnc->c_seq_mix,"%d",&cc.c_seq_mix); + sscanf(cnc->c_async_flag,"%d",&cc.c_async_flag); + sscanf(cnc->c_k_flag,"%d",&cc.c_k_flag); + sscanf(cnc->c_h_flag,"%d",&cc.c_h_flag); + sscanf(cnc->c_mflag,"%d",&cc.c_mflag); + sscanf(cnc->c_pflag,"%d",&cc.c_pflag); + sscanf(cnc->c_stride_flag,"%d",&cc.c_stride_flag); + sscanf(cnc->c_verify,"%d",&cc.c_verify); + sscanf(cnc->c_sverify,"%d",&cc.c_sverify); + sscanf(cnc->c_odsync,"%d",&cc.c_odsync); + sscanf(cnc->c_diag_v,"%d",&cc.c_diag_v); + sscanf(cnc->c_dedup,"%d",&cc.c_dedup); + sscanf(cnc->c_dedup_interior,"%d",&cc.c_dedup_interior); + sscanf(cnc->c_dedup_compress,"%d",&cc.c_dedup_compress); + sscanf(cnc->c_dedup_mseed,"%d",&cc.c_dedup_mseed); + sscanf(cnc->c_hist_summary,"%d",&cc.c_hist_summary); + sscanf(cnc->c_op_rate,"%d",&cc.c_op_rate); + sscanf(cnc->c_op_rate_flag,"%d",&cc.c_op_rate_flag); + sscanf(cnc->c_file_lock,"%d",&cc.c_file_lock); + sscanf(cnc->c_rec_lock,"%d",&cc.c_rec_lock); + sscanf(cnc->c_Kplus_readers,"%d",&cc.c_Kplus_readers); + sscanf(cnc->c_multiplier,"%d",&cc.c_multiplier); + sscanf(cnc->c_share_file,"%d",&cc.c_share_file); + sscanf(cnc->c_pattern,"%d",&cc.c_pattern); + sscanf(cnc->c_version,"%d",&cc.c_version); + sscanf(cnc->c_base_time,"%d",&cc.c_base_time); + sscanf(cnc->c_num_child,"%d",&cc.c_num_child); + sscanf(cnc->c_pct_read,"%d",&cc.c_pct_read); + sscanf(cnc->c_advise_op,"%d",&cc.c_advise_op); + sscanf(cnc->c_advise_flag,"%d",&cc.c_advise_flag); + sscanf(cnc->c_restf,"%d",&cc.c_restf); + sscanf(cnc->c_mygen,"%d",&cc.c_mygen); + sscanf(cnc->c_oflag,"%d",&cc.c_oflag); + sscanf(cnc->c_mfflag,"%d",&cc.c_mfflag); + sscanf(cnc->c_unbuffered,"%d",&cc.c_unbuffered); + sscanf(cnc->c_Q_flag,"%d",&cc.c_Q_flag); + sscanf(cnc->c_L_flag,"%d",&cc.c_L_flag); + sscanf(cnc->c_xflag,"%d",&cc.c_xflag); + sscanf(cnc->c_include_flush,"%d",&cc.c_include_flush); + sscanf(cnc->c_OPS_flag,"%d",&cc.c_OPS_flag); + sscanf(cnc->c_mmapflag,"%d",&cc.c_mmapflag); + sscanf(cnc->c_mmapasflag,"%d",&cc.c_mmapasflag); + sscanf(cnc->c_mmapnsflag,"%d",&cc.c_mmapnsflag); + sscanf(cnc->c_mmapssflag,"%d",&cc.c_mmapssflag); + sscanf(cnc->c_no_copy_flag,"%d",&cc.c_no_copy_flag); + sscanf(cnc->c_w_traj_flag,"%d",&cc.c_w_traj_flag); + sscanf(cnc->c_r_traj_flag,"%d",&cc.c_r_traj_flag); + sscanf(cnc->c_no_unlink,"%d",&cc.c_no_unlink); + sscanf(cnc->c_no_write,"%d",&cc.c_no_write); + sscanf(cnc->c_include_close,"%d",&cc.c_include_close); + sscanf(cnc->c_disrupt_flag,"%d",&cc.c_disrupt_flag); + sscanf(cnc->c_compute_flag,"%d",&cc.c_compute_flag); + sscanf(cnc->c_MS_flag,"%d",&cc.c_MS_flag); + sscanf(cnc->c_mmap_mix,"%d",&cc.c_mmap_mix); + sscanf(cnc->c_Kplus_flag,"%d",&cc.c_Kplus_flag); + sscanf(cnc->c_compute_time,"%f",&cc.c_compute_time); + + strcpy(write_traj_filename,cc.c_write_traj_filename); + strcpy(read_traj_filename,cc.c_read_traj_filename); + numrecs64 = cc.c_numrecs64; + strcpy(pit_hostname,cc.c_pit_hostname); + strcpy(pit_service,cc.c_pit_service); + reclen = cc.c_reclen; + testnum = cc.c_testnum; + chid = cc.c_client_number; + workdir=cc.c_working_dir; + oflag = cc.c_oflag; + /* Child's absolute filename to use is provided */ + mfflag = cc.c_mfflag; + if(mfflag) + strcpy(filearray[chid],cc.c_file_name); + if(cdebug) + { + fprintf(newstdout,"File name given %s\n",cc.c_file_name); + fflush(newstdout); + } + unbuffered = cc.c_unbuffered; + noretest = cc.c_noretest; + notruncate = cc.c_notruncate; + read_sync = cc.c_read_sync; + jflag = cc.c_jflag; + direct_flag = cc.c_direct_flag; + cpuutilflag = cc.c_cpuutilflag; + seq_mix = cc.c_seq_mix; + async_flag = cc.c_async_flag; + k_flag = cc.c_k_flag; + h_flag = cc.c_h_flag; + mflag = cc.c_mflag; + pflag = cc.c_pflag; + stride_flag = cc.c_stride_flag; + fetchon = cc.c_fetchon; + verify = cc.c_verify; + diag_v = cc.c_diag_v; + dedup = cc.c_dedup; + dedup_interior = cc.c_dedup_interior; + dedup_compress = cc.c_dedup_compress; + dedup_mseed = cc.c_dedup_mseed; + hist_summary = cc.c_hist_summary; + op_rate = cc.c_op_rate; + op_rate_flag = cc.c_op_rate_flag; + if(diag_v) + sverify = 0; + else + sverify = cc.c_sverify; + file_lock = cc.c_file_lock; + rlocking = cc.c_rec_lock; + Kplus_readers = cc.c_Kplus_readers; + multiplier = cc.c_multiplier; + share_file = cc.c_share_file; + pattern = cc.c_pattern; + /* proto_version = cc.c_version; Don't copy it back. */ + base_time=cc.c_base_time; + num_child=(long long)cc.c_num_child; + pct_read=cc.c_pct_read; + advise_op=cc.c_advise_op; + advise_flag=cc.c_advise_flag; + restf=cc.c_restf; + mygen=cc.c_mygen; + Q_flag = cc.c_Q_flag; + L_flag = cc.c_L_flag; + xflag = cc.c_xflag; + w_traj_flag = cc.c_w_traj_flag; + r_traj_flag = cc.c_r_traj_flag; + include_flush = cc.c_include_flush; + OPS_flag = cc.c_OPS_flag; + purge = cc.c_purge; + mmapflag = cc.c_mmapflag; + mmapasflag = cc.c_mmapasflag; + mmapnsflag = cc.c_mmapnsflag; + mmapssflag = cc.c_mmapssflag; + no_copy_flag = cc.c_no_copy_flag; + no_unlink = cc.c_no_unlink; + no_write = cc.c_no_write; + include_close = cc.c_include_close; + disrupt_flag = cc.c_disrupt_flag; + compute_flag = cc.c_compute_flag; + MS_flag = cc.c_MS_flag; + mmap_mix = cc.c_mmap_mix; + Kplus_flag = cc.c_Kplus_flag; + delay = cc.c_delay; + stride = cc.c_stride; + rest_val = cc.c_rest_val; + depth = cc.c_depth; + delay_start = cc.c_delay_start; + compute_time = cc.c_compute_time; + if(cdebug) + { + fprintf(newstdout,"Child %d change directory to %s\n",(int)chid,workdir); + fflush(newstdout); + } + if(purge) + alloc_pbuf(); + + /* 6. Change to the working directory */ + + if(chdir(workdir)<0) + client_error=errno; + start_child_listen_loop(); /* The async channel listener */ + + /* Need to start this after getting into the correct directory */ + if(w_traj_flag) + w_traj_size(); + if(r_traj_flag) + r_traj_size(); + + get_resolution(); /* Get my clock resolution */ + if(hist_summary) + { + printf("Child got HISTORY flag\n"); + } + + /* 7. Run the test */ + switch(testnum) { + + case THREAD_WRITE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_write_test\n",(int)chid); + fflush(newstdout); + } + thread_write_test((long)0); + break; +#ifdef HAVE_PREAD + case THREAD_PWRITE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_pwrite_test\n",(int)chid); + fflush(newstdout); + } + thread_pwrite_test((long)0); + break; +#endif + case THREAD_REWRITE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_rewrite_test\n",(int)chid); + fflush(newstdout); + } + thread_rwrite_test((long)0); + break; + case THREAD_READ_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_read_test\n",(int)chid); + fflush(newstdout); + } + thread_read_test((long)0); + break; +#ifdef HAVE_PREAD + case THREAD_PREAD_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_read_test\n",(int)chid); + fflush(newstdout); + } + thread_pread_test((long)0); + break; +#endif + case THREAD_REREAD_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_reread_test\n",(int)chid); + fflush(newstdout); + } + thread_rread_test((long)0); + break; + case THREAD_STRIDE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_stride_read_test\n",(int)chid); + fflush(newstdout); + } + thread_stride_read_test((long)0); + break; + case THREAD_RANDOM_READ_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running random read test\n",(int)chid); + fflush(newstdout); + } + thread_ranread_test((long)0); + break; + case THREAD_RANDOM_WRITE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running random write test\n",(int)chid); + fflush(newstdout); + } + thread_ranwrite_test((long)0); + break; + case THREAD_REVERSE_READ_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running reverse read test\n",(int)chid); + fflush(newstdout); + } + thread_reverse_read_test((long)0); + break; + case THREAD_RANDOM_MIX_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running mixed workload test\n",(int)chid); + fflush(newstdout); + } + thread_mix_test((long)0); + break; + case THREAD_FWRITE_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_fwrite_test\n",(int)chid); + fflush(newstdout); + } + thread_fwrite_test((long)0); + break; + case THREAD_FREAD_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running thread_fread_test\n",(int)chid); + fflush(newstdout); + } + thread_fread_test((long)0); + break; + case THREAD_CLEANUP_TEST : + if(cdebug>=1) + { + fprintf(newstdout,"Child %d running cleanup\n",(int)chid); + fflush(newstdout); + } + thread_cleanup_test((long)0); + break; + }; + if(cdebug>=1) + { + fprintf(newstdout,"Child %d finished running test.\n",(int)chid); + fflush(newstdout); + } + + /* 8. Release the listen and send sockets to the master */ + stop_child_listen(l_sock); + + exit(0); +} + +/* + * Clients tell the master their statistics, set the stopped flag, and set shared memory + * child_flag to tell the master they are finished. Also each client report all statistics. + */ +#ifdef HAVE_ANSIC_C +void +tell_master_stats(testnum , chid, throughput, actual, + cpu_time, wall_time, stop_flag, child_flag) +int testnum; +long long chid; +double throughput, actual, wall_time; +float cpu_time; +char stop_flag; +long long child_flag; +/* +void +tell_master_stats(int testnum , long long chid, double tthroughput, + double actual, float cpu_time, float wall_time, + char stop_flag, long long child_flag) +*/ +#else +void +tell_master_stats(testnum , chid, throughput, actual, cpu_time, + wall_time, stop_flag, child_flag) +int testnum; +long long chid; +double throughput, actual, wall_time; +char stop_flag; +float cpu_time; +long long child_flag; +#endif +{ + struct master_command mc; + bzero(&mc,sizeof(struct master_command)); + mc.m_client_number = (int) chid; + mc.m_client_error = (int) client_error; + mc.m_throughput= throughput; + mc.m_testnum = testnum; + mc.m_actual = actual; + mc.m_cputime = cpu_time; + mc.m_walltime = wall_time; + mc.m_stop_flag = stop_flag; + mc.m_child_flag = child_flag; + mc.m_command = R_STAT_DATA; + mc.m_mygen = mygen; + mc.m_version = proto_version; + if(cdebug>=1) + { + fprintf(newstdout,"Child %d: Tell master stats and terminate\n",(int)chid); + fflush(newstdout); + } + child_send(controlling_host_name,(struct master_command *)&mc, sizeof(struct master_command)); +} + +/* + * Stop the master listener loop service. + * Currently this is not used. The master_join_count + * variable is used to terminate the loop service. + */ +#ifdef HAVE_ANSIC_C +void +stop_master_listen_loop(void) +#else +void +stop_master_listen_loop() +#endif +{ + if(mdebug>=1) + printf("Stopping Master listen loop"); + kill(master_listen_pid,SIGKILL); +} + + +/* + * Clients tell the master that I am at the barrier and ready + * for the message to start work. + */ +#ifdef HAVE_ANSIC_C +void +tell_master_ready(long long chid) +#else +void +tell_master_ready(chid) +long long chid; +#endif +{ + struct master_command mc; + bzero(&mc,sizeof(struct master_command)); + if(cdebug>=1) + { + fprintf(newstdout,"Child %d: Tell master to go\n",(int)chid); + fflush(newstdout); + } + mc.m_command = R_FLAG_DATA; + mc.m_mygen = mygen; + mc.m_version = proto_version; + mc.m_child_flag = CHILD_STATE_READY; + mc.m_client_number = (int)chid; + mc.m_client_error = client_error; + child_send(controlling_host_name,(struct master_command *)&mc, sizeof(struct master_command)); +} + +/* + * Clients wait at a barrier for the master to tell them + * to begin work. This is the function where they wait. + */ +#ifdef HAVE_ANSIC_C +void +wait_for_master_go(long long chid) +#else +void +wait_for_master_go(chid) +long long chid; +#endif +{ + struct client_neutral_command *cnc; + struct client_command cc; + bzero(&cc,sizeof(struct client_command)); + child_listen(l_sock,sizeof(struct client_neutral_command)); + cnc = (struct client_neutral_command *)child_rcv_buf; + sscanf(cnc->c_command,"%d",&cc.c_command); + if(cc.c_command == R_TERMINATE || cc.c_command==R_DEATH) + { + if(cdebug) + { + fprintf(newstdout,"Child %d received terminate on sync channel at barrier !!\n",(int)chid); + fflush(newstdout); + } + exit(1); + } + if(cdebug>=1) + { + fprintf(newstdout,"Child %d return from wait_for_master_go\n",(int)chid); + fflush(newstdout); + } +} + +/* + * Create a master listener for receiving data from the + * many children. As the children finish they will send + * their statistics and terminate. When the master_join_count + * goes to zero then it is time to stop this service. + * When this service exits then the parent will know + * that all of the children are done. + */ +#ifdef HAVE_ANSIC_C +void +start_master_listen_loop(int num) +#else +void +start_master_listen_loop(num) +int num; +#endif +{ + int i; + struct child_stats *child_stat; + struct master_neutral_command *mnc; + struct master_command mc; + int temp; + struct timespec req,rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + rem.tv_sec = 0; + rem.tv_nsec = 10000000; + + + master_join_count=num; + master_listen_pid=fork(); + if(master_listen_pid!=0) + return; + if(mdebug>=1) + printf("Starting Master listen loop m %d c %d count %d\n",master_iozone, + client_iozone,num); + + while(master_join_count) + { + master_listen(master_listen_socket,sizeof(struct master_neutral_command)); + mnc=(struct master_neutral_command *)&master_rcv_buf[0]; + + /* + * Convert from string format to arch format + */ + sscanf(mnc->m_command,"%d",&mc.m_command); + sscanf(mnc->m_client_number,"%d",&mc.m_client_number); + sscanf(mnc->m_client_error,"%d",&mc.m_client_error); + sscanf(mnc->m_mygen,"%d",&mc.m_mygen); + sscanf(mnc->m_version,"%d",&mc.m_version); + if(mc.m_version != proto_version) + { + printf("Client # %d is not running the same version of Iozone !\n", + mc.m_client_number); + } + if(mc.m_client_error != 0) + { + printf("\nClient # %d reporting an error %s !\n", + mc.m_client_number,strerror(mc.m_client_error)); + } +#ifdef NO_PRINT_LLD + sscanf(mnc->m_child_flag,"%ld",&mc.m_child_flag); +#else + sscanf(mnc->m_child_flag,"%lld",&mc.m_child_flag); +#endif + sscanf(mnc->m_actual,"%f",&mc.m_actual); + sscanf(mnc->m_throughput,"%f",&mc.m_throughput); + sscanf(mnc->m_cputime,"%f",&mc.m_cputime); + sscanf(mnc->m_walltime,"%f",&mc.m_walltime); + sscanf(mnc->m_stop_flag,"%d",&temp); + mc.m_stop_flag = temp; + + switch(mc.m_command) { + case R_STAT_DATA: + if(mc.m_mygen != mygen) + { + /* + * >>> You are NOT one of my children !!! <<< + * Probably children left behind from another run !!! + * Ignore their messages, and go on without them. + */ + printf("*** Unknown Iozone children responding !!! ***\n"); + continue; + } + i = mc.m_client_number; + if(mdebug) + printf("loop: R_STAT_DATA for client %d\n",i); + child_stat = (struct child_stats *)&shmaddr[i]; + child_stat->flag = mc.m_child_flag; + child_stat->actual = mc.m_actual; + child_stat->throughput = mc.m_throughput; + child_stat->cputime = mc.m_cputime; + child_stat->walltime = mc.m_walltime; + *stop_flag = mc.m_stop_flag; + master_join_count--; + break; + case R_FLAG_DATA: + if(mc.m_mygen != mygen) + { + /* You are NOT one of my children !!! */ + printf("*** Unknown Iozone children responding !!! ***\n"); + continue; + } + if(mdebug) + printf("loop: R_FLAG_DATA: Client %d flag %d \n", + (int)mc.m_client_number, + (int)mc.m_child_flag); + i = mc.m_client_number; + child_stat = (struct child_stats *)&shmaddr[i]; + child_stat->flag = (long long)(mc.m_child_flag); + break; + case R_STOP_FLAG: + if(mc.m_mygen != mygen) + { + /* You are NOT one of my children !!! */ + printf("*** Unknown Iozone children responding !!! ***\n"); + continue; + } + if(mdebug) + printf("Master loop: R_STOP_FLAG: Client %d STOP_FLAG \n", + (int)mc.m_client_number); + *stop_flag=1; + distribute_stop(); + break; + } + + } + /* Let the clients report results before exiting. + Also, exiting too quickly can close the async + socket to the child, and cause it to become ill. + On Solaris, it gets stuck in a 0=read() loop. */ + +#if defined(Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + + exit(0); +} +/* + * Create a client listener for receiving async data from the + * the master. + */ +#ifdef HAVE_ANSIC_C +void +start_child_listen_loop(void) +#else +void +start_child_listen_loop() +#endif +{ + int i; + struct child_stats *child_stat; + struct client_command cc; + struct client_neutral_command *cnc; + + client_listen_pid=fork(); + if(client_listen_pid!=0) + return; + if(cdebug>=1) + { + fprintf(newstdout,"Child %d starting client listen loop\n",(int)chid); + fflush(newstdout); + } + while(1) + { + bzero(&cc,sizeof(struct client_command)); + child_listen_async(l_async_sock,sizeof(struct client_neutral_command)); + cnc=(struct client_neutral_command *)&child_async_rcv_buf; + /* + * Convert from string format to arch format + */ + sscanf(cnc->c_command,"%d",&cc.c_command); + sscanf(cnc->c_client_number,"%d",&cc.c_client_number); + sscanf(cnc->c_stop_flag,"%d",&cc.c_stop_flag); + + switch(cc.c_command) { + case R_STOP_FLAG: + i = cc.c_client_number; + if(cdebug) + { + fprintf(newstdout,"child loop: R_STOP_FLAG for client %d\n",i); + fflush(newstdout); + } + child_stat = (struct child_stats *)&shmaddr[i]; + *stop_flag = cc.c_stop_flag; /* In shared memory with other copy */ + sent_stop=1; + break; + case R_TERMINATE: + if(cdebug) + { + fprintf(newstdout,"Child loop: R_TERMINATE: Client %d \n", + (int)cc.c_client_number); + fflush(newstdout); + } + sleep(2); + /* Aync listener goes away */ + stop_child_listen(l_async_sock); + exit(0); + case R_DEATH: + if(cdebug) + { + fprintf(newstdout,"Child loop: R_DEATH: Client %d \n", + (int)cc.c_client_number); + fflush(newstdout); + } + i = cc.c_client_number; + child_remove_files(i); + sleep(2); + /* Aync listener goes away */ + stop_child_listen(l_async_sock); + exit(0); + } + + } +} + +/* + * The controlling process "master" tells the children to begin. + */ + +#ifdef HAVE_ANSIC_C +void +tell_children_begin(long long childnum) +#else +void +tell_children_begin(childnum) +long long childnum; +#endif +{ + struct client_command cc; + int x; + bzero(&cc,sizeof(struct client_command)); + x = (int) childnum; + if(mdebug>=1) + printf("Master: Tell child %d to begin\n",x); + cc.c_command = R_FLAG_DATA; + cc.c_child_flag = CHILD_STATE_BEGIN; + cc.c_client_number = (int)childnum; + master_send(master_send_sockets[x],child_idents[x].child_name, &cc,sizeof(struct client_command)); +} + +/* + * The master waits here for all of the the children to terminate. + * When the children are done the the master_join_count will be at zero + * and the master_listen_loop will exit. This function waits for this to happen. + */ +#ifdef HAVE_ANSIC_C +void +wait_dist_join(void) +#else +void +wait_dist_join() +#endif +{ + wait(0); + if(mdebug) + printf("Master: All children have finished. Sending terminate\n"); + terminate_child_async(); /* All children are done, so terminate their async channel */ + current_client_number=0; /* start again */ +} + + +/* + * This function reads a file that contains client information. + * The information is: + * client name (DNS usable name) + * client working directory (where to run the test) + * client directory that contains the Iozone executable. + * + * If the first character in a line is a # then it is a comment. + * The maximum number of clients is MAXSTREAMS. + */ +#ifdef HAVE_ANSIC_C +int +get_client_info(void) +#else +int +get_client_info() +#endif +{ + FILE *fd; + char *ret1; + int count; + char buffer[200]; + count=0; + fd=fopen(client_filename,"r"); + if(fd == (FILE *)NULL) + { + printf("Unable to open client file \"%s\"\n", + client_filename); + exit(176); + } + while(1) + { + if (count > MAXSTREAMS) { + printf("Too many lines in client file - max of %d supported\n", + MAXSTREAMS); + exit(7); + } + ret1=fgets(buffer,200,fd); + if(ret1== (char *)NULL) + break; + count+=parse_client_line(buffer,count); + } + fclose(fd); + return(count); +} + + +/* + * This function parses a line from the client file. It is + * looking for: + * Client name (DNS usable) + * Client working directory (where to run the test ) + * Client path to Iozone executable. + * + * Lines that start with # are comments. + */ + +#ifdef HAVE_ANSIC_C +int +parse_client_line(char *buffer,int line_num) +#else +int +parse_client_line(buffer, line_num) +char *buffer; +int line_num; +#endif +{ + int num; + /* Format is clientname, workdir, execute_path */ + /* If column #1 contains a # symbol then skip this line */ + + if(buffer[0]=='#') + return(0); + num=sscanf(buffer,"%s %s %s %s\n", + child_idents[line_num].child_name, + child_idents[line_num].workdir, + child_idents[line_num].execute_path, + child_idents[line_num].file_name); + if((num > 0) && (num !=3) && (num !=4)) + { + printf("Bad Client Identity at entry %d\n",line_num); + printf("Client: -> %s Workdir: -> %s Execute_path: -> %s \n", + child_idents[line_num].child_name, + child_idents[line_num].workdir, + child_idents[line_num].execute_path); + exit(203); + } + if(num == 4) + mfflag++; + + return(1); +} + +/* + * This is a mechanism that the child uses to remove all + * of its temporary files. Only used at terminate time. + */ +#ifdef HAVE_ANSIC_C +void +child_remove_files(int i) +#else +void +child_remove_files(i) +int i; +#endif +{ + + char *dummyfile[MAXSTREAMS]; /* name of dummy file */ + dummyfile[i]=(char *)malloc((size_t)MAXNAMESIZE); + if(mfflag) + { + sprintf(dummyfile[i],"%s",filearray[i]); + } + else + { + sprintf(dummyfile[i],"%s.DUMMY.%d",filearray[i],i); + } + if(cdebug) + { + fprintf(newstdout,"Child %d remove: %s \n",(int)chid, dummyfile[i]); + fflush(newstdout); + } + if(check_filename(dummyfile[i])) + unlink(dummyfile[i]); +} + + +/* + * The master tells the child async listener that it is time + * to terminate its services. + */ +#ifdef HAVE_ANSIC_C +void +terminate_child_async(void) +#else +void +terminate_child_async() +#endif +{ + int i; + struct client_command cc; + bzero(&cc,sizeof(struct client_command)); + cc.c_command = R_TERMINATE; + for(i=0;i>>>> Client Network Speed check <<<<< *\n"); + printf("***************************************************\n\n"); + printf("Master: %s\n",sp_master_host); + printf("Transfer size %d bytes \n",sp_msize); + printf("Count %d\n",sp_count); + printf("Total size %d kbytes \n\n", + (sp_msize*sp_count)/1024); + sp_once=1; + } + sp_dest=sp_remote_host; + sleep(1); + sp_do_master_t(); + free(sp_buf); + } +} + +/* + * Get results back from the client. + */ +#ifdef HAVE_ANSIC_C +void +sp_get_result(int port,int flag) +#else +void +sp_get_result(port,flag) +int port,flag; +#endif +{ + int tcfd; + float throughput; + int count; + char mybuf[1024]; + int sp_offset,xx; + + tcfd=sp_start_master_listen(port, 1024); + sp_offset=0; + while(sp_offset < 1024) + { + xx=read(tcfd,&mybuf[sp_offset],1024); + sp_offset+=xx; + } + sscanf(mybuf,"%d %f",&count,&throughput); + if(!flag) + printf("%-20s received %10d Kbytes @ %10.2f Kbytes/sec \n", + sp_remote_host,count,throughput); + else + printf("%-20s sent %10d Kbytes @ %10.2f Kbytes/sec \n", + sp_remote_host,count,throughput); + close(tcfd); +} + +/* + * Send results to the master. + */ +#ifdef HAVE_ANSIC_C +void +sp_send_result(int port, int count, float throughput) +#else +void +sp_send_result(port, count, throughput) +int port,count; +float throughput; +#endif +{ + int msfd; + char mybuf[1024]; + sprintf(mybuf,"%d %f",count, throughput); + msfd=sp_start_child_send(sp_dest, port, &sp_my_cs_addr); + junk=write(msfd,mybuf,1024); + if(cdebug) + { + fprintf(newstdout,"Sending result\n"); + fflush(newstdout); + } + close(msfd); +} + +/* + * Start the channel for the master to send a message to + * a child on a port that the child + * has created for the parent to use to communicate. + */ +#ifdef HAVE_ANSIC_C +int +sp_start_master_send(char *sp_child_host_name, int sp_child_listen_port, struct in_addr *sp_my_ms_addr) +#else +int +sp_start_master_send(sp_child_host_name, sp_child_listen_port, sp_my_ms_addr) +char *sp_child_host_name; +int sp_child_listen_port; +struct in_addr *sp_my_ms_addr; +#endif +{ + int rc,master_socket_val; + struct sockaddr_in addr,raddr; + struct hostent *he; + int port,tmp_port; + int ecount=0; + struct in_addr *ip; + struct timespec req,rem; + + req.tv_sec = 0; + req.tv_nsec = 10000000; + rem.tv_sec = 0; + rem.tv_nsec = 10000000; + + he = gethostbyname(sp_child_host_name); + if (he == NULL) + { + printf("Master: Bad hostname >%s<\n",sp_child_host_name); + fflush(stdout); + exit(22); + } + if(mdebug ==1) + { + printf("Master: start master send: %s\n", he->h_name); + fflush(stdout); + } + ip = (struct in_addr *)he->h_addr_list[0]; +#ifndef UWIN + if(mdebug ==1) + { + printf("Master: child name: %s\n", (char *)inet_ntoa(*ip)); + printf("Master: child Port: %d\n", sp_child_listen_port); + fflush(stdout); + } +#endif + + port=sp_child_listen_port; + sp_my_ms_addr->s_addr = ip->s_addr; + /*port=CHILD_LIST_PORT;*/ + + raddr.sin_family = AF_INET; + raddr.sin_port = htons(port); + raddr.sin_addr.s_addr = ip->s_addr; + master_socket_val = socket(AF_INET, SOCK_STREAM, 0); + if (master_socket_val < 0) + { + perror("Master: socket failed:"); + exit(23); + } + bzero(&addr, sizeof(struct sockaddr_in)); + tmp_port=sp_master_esend_port; + addr.sin_port = htons(tmp_port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(master_socket_val, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + addr.sin_port=htons(tmp_port); + continue; + } + } + if(mdebug ==1) + { + printf("Master: Bound port\n"); + fflush(stdout); + } + if (rc < 0) + { + perror("Master: bind failed for sync channel to child.\n"); + exit(24); + } +#if defined(Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + +again: + rc = connect(master_socket_val, (struct sockaddr *)&raddr, + sizeof(struct sockaddr_in)); + if (rc < 0) + { + if(ecount++ < 300) + { +#if defined(Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + /*sleep(1);*/ + goto again; + } + perror("Master: connect failed\n"); + printf("Error %d\n",errno); + exit(25); + } + if(mdebug ==1) + { + printf("Master Connected\n"); + fflush(stdout); + } + return (master_socket_val); +} + +/* + * Start the childs listening service for messages from the master. + */ +#ifdef HAVE_ANSIC_C +int +sp_start_child_listen(int listen_port, int size_of_message) +#else +int +sp_start_child_listen(listen_port, size_of_message) +int listen_port; +int size_of_message; +#endif +{ + int tsize; + int s,ns; + unsigned int me; + int rc; + int xx; + int tmp_port; + struct sockaddr_in *addr; + int sockerr; + int recv_buf_size=65536; + int optval=1; + xx = 0; + me=sizeof(struct sockaddr_in); + tsize=size_of_message; /* Number of messages to receive */ + s = socket(AF_INET, SOCK_STREAM, 0); + if (s < 0) + { + perror("socket failed:"); + exit(19); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_RCVBUF, (char *) + &recv_buf_size, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 7\n"); + } + sockerr = setsockopt (s, SOL_SOCKET, SO_REUSEADDR, (char *) + &optval, sizeof(int)); + if ( sockerr == -1 ) { + perror("Error in setsockopt 8\n"); + } + bzero(&sp_child_sync_sock, sizeof(struct sockaddr_in)); + tmp_port=sp_child_listen_port; + sp_child_sync_sock.sin_port = htons(tmp_port); + sp_child_sync_sock.sin_family = AF_INET; + sp_child_sync_sock.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(s, (struct sockaddr *)&sp_child_sync_sock, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + sp_child_sync_sock.sin_port=htons(tmp_port); + continue; + } + } + sp_child_listen_port = ntohs(sp_child_sync_sock.sin_port); + if(cdebug ==1) + { + fprintf(newstdout,"Child: Listen: Bound at port %d\n", tmp_port); + fflush(newstdout); + } + if(rc < 0) + { + if(cdebug ==1) + { + fprintf(newstdout,"bind failed. Errno %d\n", errno); + fflush(newstdout); + } + exit(20); + } + + addr=&sp_child_async_sock; + listen(s,10); + if(cdebug) + { + fprintf(newstdout,"Child enters accept\n"); + fflush(newstdout); + } + ns=accept(s,(void *)addr,&me); + if(cdebug) + { + fprintf(newstdout,"Child attached for receive. Sock %d %d\n", ns,errno); + fflush(newstdout); + } + close(s); + return(ns); +} + + +/* + * The client runs this code + */ +#ifdef HAVE_ANSIC_C +void +sp_do_child_t(void) +#else +void +sp_do_child_t() +#endif +{ + int i,y; + int offset; + int sp_tcount=0; + /* child */ + /* + * Child reads from master + */ + sp_crfd=sp_start_child_listen(sp_child_listen_port, sp_msize); + sp_start_time=time_so_far(); + for(i=0;i%s<\n",sp_master_host_name); + fflush(stdout); + exit(22); + } + if(cdebug ==1) + { + fprintf(newstdout,"Child: start child send: %s\n", he->h_name); + fprintf(newstdout,"To: %s at port %d\n",sp_master_host_name, + sp_master_listen_port); + fflush(newstdout); + } + ip = (struct in_addr *)he->h_addr_list[0]; + + port=sp_master_listen_port; + sp_my_cs_addr->s_addr = ip->s_addr; + + raddr.sin_family = AF_INET; + raddr.sin_port = htons(port); + raddr.sin_addr.s_addr = ip->s_addr; + sp_child_socket_val = socket(AF_INET, SOCK_STREAM, 0); + if (sp_child_socket_val < 0) + { + perror("child: socket failed:"); + exit(23); + } + bzero(&addr, sizeof(struct sockaddr_in)); + tmp_port=sp_child_esend_port; + addr.sin_port = htons(tmp_port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + rc = -1; + while (rc < 0) + { + rc = bind(sp_child_socket_val, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if(rc < 0) + { + tmp_port++; + addr.sin_port=htons(tmp_port); + continue; + } + } + if(cdebug ==1) + { + fprintf(newstdout,"Child: Bound port %d\n",tmp_port); + fflush(newstdout); + } + if (rc < 0) + { + perror("Child: bind failed for sync channel to child.\n"); + exit(24); + } +#if defined(Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif +again: + rc = connect(sp_child_socket_val, (struct sockaddr *)&raddr, + sizeof(struct sockaddr_in)); + if (rc < 0) + { + if(ecount++<300) + { +#if defined(Windows) + sleep(1); +#else + nanosleep(&req,&rem); +#endif + goto again; + } + + fprintf(newstdout,"child: connect failed. Errno %d \n",errno); + fflush(newstdout); + exit(25); + } + if(cdebug ==1) + { + fprintf(newstdout,"child Connected\n"); + fflush(newstdout); + } + return (sp_child_socket_val); +} + +#ifdef HAVE_ANSIC_C +void +do_speed_check(int client_flag) +#else +void +do_speed_check(client_flag) +int client_flag; +#endif +{ + int i; + if(client_flag) + { + speed_main(" "," ",reclen,kilobytes64,client_flag); + } + else + { + printf("Checking %d clients\n",clients_found); + for(i=0;i xor ops + * Seed composed from: blocknumber + (do not include childnum as you want duplicates) + * size ... size of buffers. (in bytes) + * percent. Percent of buffer to modify. + * percent_interior. Percent of buffer that is dedupable within + * and across files + * percent_compress. Percent of buffer that is dedupable within + * but not across files + * + * Returns 0 (zero) for success, and -1 (minus one) for failure. + */ +int +gen_new_buf(char *ibuf, char *obuf, long seed, int size, int percent, + int percent_interior, int percent_compress, int all) +{ + register long *ip, *op; /* Register for speed */ + register long iseed; /* Register for speed */ + register long isize; /* Register for speed */ + register long cseed; /* seed for dedupable for within & ! across */ + register int x,w; /* Register for speed */ + register int value; /* Register for speed */ + register int interior_size; /* size of interior dedup region */ + register int compress_size; /* size of compression dedup region */ + if(ibuf == NULL) /* no input buf */ + return(-1); + if(obuf == NULL) /* no output buf */ + return(-1); + if((percent > 100) || (percent < 0)) /* percent check */ + return(-1); + if(size == 0) /* size check */ + return(-1); + srand(seed+1+(((int)numrecs64)*dedup_mseed)); /* set random seed */ + iseed = rand(); /* generate random value */ + isize = (size * percent)/100; /* percent that is dedupable */ + interior_size = ((isize * percent_interior)/100);/* /sizeof(long) */ + compress_size =((interior_size * percent_compress)/100); + ip = (long *)ibuf; /* pointer to input buf */ + op = (long *)obuf; /* pointer to output buf */ + if(all == 0) /* Special case for verify only */ + isize = sizeof(long); + /* interior_size = dedup_within + dedup_across */ + for(w=0;w 0) + { + srand(1+seed+((chid+1)*(int)numrecs64)*dedup_mseed); + value=rand(); +/* printf("Non-dedup value %x seed %x\n",value,seed);*/ + for( ; x> 62)) + mti); +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +void init_by_array64(unsigned long long init_key[], + unsigned long long key_length) +{ + unsigned long long i, j, k; + init_genrand64(19650218ULL); + i=1; j=0; + k = (NN>key_length ? NN : key_length); + for (; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 3935559000370003845ULL)) + + init_key[j] + j; /* non linear */ + i++; j++; + if (i>=NN) { mt[0] = mt[NN-1]; i=1; } + if (j>=key_length) j=0; + } + for (k=NN-1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 62)) * 2862933555777941757ULL)) + - i; /* non linear */ + i++; + if (i>=NN) { mt[0] = mt[NN-1]; i=1; } + } + + mt[0] = 1ULL << 63; /* MSB is 1; assuring non-zero initial array */ +} + +/* generates a random number on [0, 2^64-1]-interval */ +unsigned long long genrand64_int64(void) +{ + int i; + unsigned long long x; + static unsigned long long mag01[2]={0ULL, MATRIX_A}; + + if (mti >= NN) { /* generate NN words at one time */ + + /* if init_genrand64() has not been called, */ + /* a default initial seed is used */ + if (mti == NN+1) + init_genrand64(5489ULL); + + for (i=0;i>1) ^ mag01[(int)(x&1ULL)]; + } + for (;i>1) ^ mag01[(int)(x&1ULL)]; + } + x = (mt[NN-1]&UM)|(mt[0]&LM); + mt[NN-1] = mt[MM-1] ^ (x>>1) ^ mag01[(int)(x&1ULL)]; + + mti = 0; + } + + x = mt[mti++]; + + x ^= (x >> 29) & 0x5555555555555555ULL; + x ^= (x << 17) & 0x71D67FFFEDA60000ULL; + x ^= (x << 37) & 0xFFF7EEE000000000ULL; + x ^= (x >> 43); + + return x; +} + +/* generates a random number on [0, 2^63-1]-interval */ +long long genrand64_int63(void) +{ + return (long long)(genrand64_int64() >> 1); +} + +/* generates a random number on [0,1]-real-interval */ +double genrand64_real1(void) +{ + return (genrand64_int64() >> 11) * (1.0/9007199254740991.0); +} + +/* generates a random number on [0,1)-real-interval */ +double genrand64_real2(void) +{ + return (genrand64_int64() >> 11) * (1.0/9007199254740992.0); +} + +/* generates a random number on (0,1)-real-interval */ +double genrand64_real3(void) +{ + return ((genrand64_int64() >> 12) + 0.5) * (1.0/4503599627370496.0); +} + +#ifdef MT_TEST + +int main(void) +{ + int i; + unsigned long long init[4]={0x12345ULL, 0x23456ULL, 0x34567ULL, 0x45678ULL}, length=4; + init_by_array64(init, length); + printf("1000 outputs of genrand64_int64()\n"); + for (i=0; i<1000; i++) { + printf("%20llu ", genrand64_int64()); + if (i%5==4) printf("\n"); + } + printf("\n1000 outputs of genrand64_real2()\n"); + for (i=0; i<1000; i++) { + printf("%10.8f ", genrand64_real2()); + if (i%5==4) printf("\n"); + } + return 0; +} +#endif + +/*----------------------------------------------------------------------*/ +/* */ +/* The PIT Programmable Interdimensional Timer */ +/* */ +/* This is used to measure time, when you know something odd is going */ +/* to be happening with your wrist watch. For example, you have entered */ +/* a temporal distortion field where time its-self is not moving */ +/* as it does in your normal universe. ( thing either intense */ +/* gravitational fields bending space-time, or virtual machines playing */ +/* with time ) */ +/* So.. you need to measure time, but with respect to a normal */ +/* space-time. So.. we deal with this by calling for time from another */ +/* machine, but do so with a very similar interface to that of */ +/* gettimeofday(). */ +/* To activate this, one only needs to set an environmental variable. */ +/* Example: setenv IOZ_PIT hostname_of_PIT_server */ +/* The environmental variable tells this client where to go to get */ +/* correct timeofday time stamps, with the usual gettimeofday() */ +/* resolution. (microsecond resolution) */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/* The PIT client: Adapted from source found on the web for someone's */ +/* daytime client code. (Used in many examples for network programming */ +/* Reads PIT info over a socket from a PIT server. */ +/* The PIT server sends its raw microsecond version of gettimeofday */ +/* The PIT client converts this back into timeval structure format. */ +/* Written by: Don Capps. [ capps@iozone.org ] */ +/*----------------------------------------------------------------------*/ + +/*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ +/* >>>> DON'T forget, you must put a definition for PIT <<<<<<<<<< */ +/* >>>> in /etc/services <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ +/*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< */ +#define DFLT_SERVICE "PIT" /* Default service name. */ +#define INVALID_DESC -1 /* Invalid file (socket) descriptor. */ +#define MAXBFRSIZE 256 /* Max bfr sz to read remote TOD. */ + +/* +** Type definitions (for convenience). +*/ +#if defined(Windows) +int false = 0; +int true = 1; +#else +typedef enum { false = 0, true } boolean; +#endif +typedef struct sockaddr_in sockaddr_in_t; +typedef struct sockaddr_in6 sockaddr_in6_t; + +/* + * Routine to mimic gettimeofday() using a remote PIT server + */ +#if defined(_SUA_) +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; +#endif + +int +pit_gettimeofday( struct timeval *tp, struct timezone *foo, + char *pit_hostname, char *pit_service) +{ + int sckt; /* socket descriptor */ + unsigned scopeId = 0; + + /* See if the interdimensional rift is active */ + + if(pit_hostname[0] == 0) + { + return gettimeofday(tp,foo); + } + + if ( ( sckt = openSckt( pit_hostname, + pit_service, + scopeId ) ) == INVALID_DESC ) + { + fprintf( stderr, + "Sorry... a connectionless socket could " + "not be set up.\n"); + return -1; + } + /* + ** Get the remote PIT. + */ + pit( sckt ,tp ); + close(sckt); + return 0; +} + +/* + * Opens a socket for the PIT to use to get the time + * from a remote time server ( A PIT server ) + */ +static int openSckt( const char *host, + const char *service, + unsigned int scopeId ) +{ + struct addrinfo *ai; + int aiErr; + struct addrinfo *aiHead; + struct addrinfo hints; + sockaddr_in6_t *pSadrIn6; + int sckt; + /* + * Initialize the 'hints' structure for getaddrinfo(3). + */ + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = PF_UNSPEC; /* IPv4 or IPv6 records */ + hints.ai_socktype = SOCK_STREAM; /* Connection oriented communication.*/ + hints.ai_protocol = IPPROTO_TCP; /* TCP transport layer protocol only. */ + /* + ** Look up the host/service information. + */ + if ( ( aiErr = getaddrinfo( host, + service, + &hints, + &aiHead ) ) != 0 ) + { + fprintf( stderr, "(line %d): ERROR - %s.\n", __LINE__, + gai_strerror( aiErr ) ); + return INVALID_DESC; + } + /* + ** Go through the list and try to open a connection. Continue until either + ** a connection is established or the entire list is exhausted. + */ + for ( ai = aiHead, sckt = INVALID_DESC; + ( ai != NULL ) && ( sckt == INVALID_DESC ); + ai = ai->ai_next ) + { + /* + ** IPv6 kluge. Make sure the scope ID is set. + */ + if ( ai->ai_family == PF_INET6 ) + { + pSadrIn6 = (sockaddr_in6_t*) ai->ai_addr; + if ( pSadrIn6->sin6_scope_id == 0 ) + { + pSadrIn6->sin6_scope_id = scopeId; + } /* End IF the scope ID wasn't set. */ + } /* End IPv6 kluge. */ + /* + ** Create a socket. + */ + sckt = socket( ai->ai_family, ai->ai_socktype, ai->ai_protocol ); + if(sckt == -1) + { + sckt = INVALID_DESC; + continue; /* Try the next address record in the list. */ + } + /* + ** Set the target destination for the remote host on this socket. That + ** is, this socket only communicates with the specified host. + */ + if (connect( sckt, ai->ai_addr, ai->ai_addrlen ) ) + { + (void) close( sckt ); /* Could use system call again here, + but why? */ + sckt = INVALID_DESC; + continue; /* Try the next address record in the list. */ + } + } /* End FOR each address record returned by getaddrinfo(3). */ + /* + ** Clean up & return. + */ + freeaddrinfo( aiHead ); + return sckt; +} /* End openSckt() */ + +/* + * Read the PIT, and convert this back into timeval + * info, and store it in the timeval structure that was + * passed in. + */ +static void pit( int sckt, struct timeval *tp) +{ + char bfr[ MAXBFRSIZE+1 ]; + int inBytes; + long long value; + /* + ** Send a datagram to the server to wake it up. The content isn't + ** important, but something must be sent to let it know we want the TOD. + */ + junk=write( sckt, "Are you there?", 14 ); + /* + ** Read the PIT from the remote host. + */ + inBytes = read( sckt, bfr, MAXBFRSIZE ); + bfr[ inBytes ] = '\0'; /* Null-terminate the received string. */ + /* + * Convert result to timeval structure format + */ + sscanf(bfr,"%llu\n",&value); + tp->tv_sec = (long)(value / 1000000); + tp->tv_usec = (long)(value % 1000000); +} + +/* sync does not exist in SUA */ +#if defined(_SUA_) +sync() +{ +} +#endif + + +#define BUCKETS 40 +long long buckets[BUCKETS]; +long long bucket_val[BUCKETS] = + { 20,40,60,80,100, + 200,400,600,800,1000, + 2000,4000,6000,8000,10000, + 12000,14000,16000,18000,20000, + 40000,60000,80000,100000, + 200000,400000,600000,800000,1000000, + 2000000,4000000,6000000,8000000,10000000, + 20000000,30000000,60000000,90000000,120000000,120000001}; +/* + * Buckets: (Based on a Netapp internal consensus) + * 0 1 2 3 4 + * <=20us <=40us <=60us <=80us <=100us + * + * 5 6 7 8 9 + * <=200us <=400us <=600us <=88us <=1ms + * + * 10 11 12 13 14 + * <=2ms <=4ms <=6ms <=8ms <=10ms + * + * 15 16 17 18 19 + * <=12ms <=14ms <=16ms <=18ms <=20ms + * + * 20 21 22 23 24 + * <=20ms <=40ms <=60ms <=80ms <=100ms + * + * 25 26 27 28 29 + * <=200ms <=400ms <=600ms <=800ms <=1s + * + * 30 31 32 33 34 + * <=2s <=4s <=6s <=8s <=10s + * + * 35 36 37 38 39 + * <=20s <=30s <=60 <=90s >90 + */ + +/* + fp=fopen("/tmp/iozone_latency_summary.txt","a"); + dump_hist(fp); +*/ + +void +hist_insert(double my_value) +{ + int k; + long long value; + + /* Convert to micro-seconds */ + value = (long long)(my_value * 1000000); + for(k=0;k bucket_val[k]) + { + buckets[k]++; + break; + } + } + } +} + +void +dump_hist(char *what,int id) +{ + FILE *fp; + + char name[256]; + + sprintf(name,"%s_child_%d.txt","Iozone_histogram",id); + + fp = fopen(name,"a"); + +#ifndef NO_PRINT_LLD + fprintf(fp,"Child: %d Op: %s\n",id,what); + fprintf(fp,"Band 1: "); + fprintf(fp," 20us:%-7.1lld ",buckets[0]); + fprintf(fp," 40us:%-7.1lld ",buckets[1]); + fprintf(fp," 60us:%-7.1lld ",buckets[2]); + fprintf(fp," 80us:%-7.1lld ",buckets[3]); + fprintf(fp,"100us:%-7.1lld \n",buckets[4]); + + fprintf(fp,"Band 2: "); + fprintf(fp,"200us:%-7.1lld ",buckets[5]); + fprintf(fp,"400us:%-7.1lld ",buckets[6]); + fprintf(fp,"600us:%-7.1lld ",buckets[7]); + fprintf(fp,"800us:%-7.1lld ",buckets[8]); + fprintf(fp," 1ms:%-7.1lld \n",buckets[9]); + + fprintf(fp,"Band 3: "); + fprintf(fp," 2ms:%-7.1lld ",buckets[10]); + fprintf(fp," 4ms:%-7.1lld ",buckets[11]); + fprintf(fp," 6ms:%-7.1lld ",buckets[12]); + fprintf(fp," 8ms:%-7.1lld ",buckets[13]); + fprintf(fp," 10ms:%-7.1lld \n",buckets[14]); + + fprintf(fp,"Band 4: "); + fprintf(fp," 12ms:%-7.1lld ",buckets[15]); + fprintf(fp," 14ms:%-7.1lld ",buckets[16]); + fprintf(fp," 16ms:%-7.1lld ",buckets[17]); + fprintf(fp," 18ms:%-7.1lld ",buckets[18]); + fprintf(fp," 20ms:%-7.1lld \n",buckets[19]); + + fprintf(fp,"Band 5: "); + fprintf(fp," 40ms:%-7.1lld ",buckets[20]); + fprintf(fp," 60ms:%-7.1lld ",buckets[21]); + fprintf(fp," 80ms:%-7.1lld ",buckets[22]); + fprintf(fp,"100ms:%-7.1lld \n",buckets[23]); + + fprintf(fp,"Band 6: "); + fprintf(fp,"200ms:%-7.1lld ",buckets[24]); + fprintf(fp,"400ms:%-7.1lld ",buckets[25]); + fprintf(fp,"600ms:%-7.1lld ",buckets[26]); + fprintf(fp,"800ms:%-7.1lld ",buckets[27]); + fprintf(fp," 1s:%-7.1lld \n",buckets[28]); + + fprintf(fp,"Band 7: "); + fprintf(fp," 2s:%-7.1lld ",buckets[29]); + fprintf(fp," 4s:%-7.1lld ",buckets[30]); + fprintf(fp," 6s:%-7.1lld ",buckets[31]); + fprintf(fp," 8s:%-7.1lld ",buckets[32]); + fprintf(fp," 10s:%-7.1lld \n",buckets[33]); + + fprintf(fp,"Band 8: "); + fprintf(fp," 20s:%-7.1lld ",buckets[34]); + fprintf(fp," 40s:%-7.1lld ",buckets[35]); + fprintf(fp," 60s:%-7.1lld ",buckets[36]); + fprintf(fp," 80s:%-7.1lld ",buckets[37]); + fprintf(fp," 120s:%-7.1lld \n",buckets[38]); + + fprintf(fp,"Band 9: "); + fprintf(fp,"120+s:%-7.1lld \n\n",buckets[39]); +#else + fprintf(fp,"Child: %d Op: %s\n",id,what); + fprintf(fp,"Band 1: "); + fprintf(fp," 20us:%-7.1ld ",buckets[0]); + fprintf(fp," 40us:%-7.1ld ",buckets[1]); + fprintf(fp," 60us:%-7.1ld ",buckets[2]); + fprintf(fp," 80us:%-7.1ld ",buckets[3]); + fprintf(fp,"100us:%-7.1ld \n",buckets[4]); + + fprintf(fp,"Band 2: "); + fprintf(fp,"200us:%-7.1ld ",buckets[5]); + fprintf(fp,"400us:%-7.1ld ",buckets[6]); + fprintf(fp,"600us:%-7.1ld ",buckets[7]); + fprintf(fp,"800us:%-7.1ld ",buckets[8]); + fprintf(fp," 1ms:%-7.1ld \n",buckets[9]); + + fprintf(fp,"Band 3: "); + fprintf(fp," 2ms:%-7.1ld ",buckets[10]); + fprintf(fp," 4ms:%-7.1ld ",buckets[11]); + fprintf(fp," 6ms:%-7.1ld ",buckets[12]); + fprintf(fp," 8ms:%-7.1ld ",buckets[13]); + fprintf(fp," 10ms:%-7.1ld \n",buckets[14]); + + fprintf(fp,"Band 4: "); + fprintf(fp," 12ms:%-7.1ld ",buckets[15]); + fprintf(fp," 14ms:%-7.1ld ",buckets[16]); + fprintf(fp," 16ms:%-7.1ld ",buckets[17]); + fprintf(fp," 18ms:%-7.1ld ",buckets[18]); + fprintf(fp," 20ms:%-7.1ld \n",buckets[19]); + + fprintf(fp,"Band 5: "); + fprintf(fp," 40ms:%-7.1ld ",buckets[20]); + fprintf(fp," 60ms:%-7.1ld ",buckets[21]); + fprintf(fp," 80ms:%-7.1ld ",buckets[22]); + fprintf(fp,"100ms:%-7.1ld \n",buckets[23]); + + fprintf(fp,"Band 6: "); + fprintf(fp,"200ms:%-7.1ld ",buckets[24]); + fprintf(fp,"400ms:%-7.1ld ",buckets[25]); + fprintf(fp,"600ms:%-7.1ld ",buckets[26]); + fprintf(fp,"800ms:%-7.1ld ",buckets[27]); + fprintf(fp," 1s:%-7.1ld \n",buckets[28]); + + fprintf(fp,"Band 7: "); + fprintf(fp," 2s:%-7.1ld ",buckets[29]); + fprintf(fp," 4s:%-7.1ld ",buckets[30]); + fprintf(fp," 6s:%-7.1ld ",buckets[31]); + fprintf(fp," 8s:%-7.1ld ",buckets[32]); + fprintf(fp," 10s:%-7.1ld \n",buckets[33]); + + fprintf(fp,"Band 8: "); + fprintf(fp," 20s:%-7.1ld ",buckets[34]); + fprintf(fp," 40s:%-7.1ld ",buckets[35]); + fprintf(fp," 60s:%-7.1ld ",buckets[36]); + fprintf(fp," 80s:%-7.1ld ",buckets[37]); + fprintf(fp," 120s:%-7.1ld \n",buckets[38]); + + fprintf(fp,"Band 9: "); + fprintf(fp,"120+s:%-7.1ld \n\n",buckets[39]); +#endif + fclose(fp); +} + +#ifdef HAVE_ANSIC_C +void * thread_fwrite_test(void *x) +#else +void * thread_fwrite_test( x) +#endif +{ + + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double walltime, cputime; + double compute_val = (double)0; + float delay = (float)0; + double thread_qtime_stop,thread_qtime_start; + off64_t traj_offset; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + FILE *w_traj_fd; + int fd; + long long recs_per_buffer; + long long stopped,i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long xx,xx2; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff; + char *maddr; + char *wmaddr,*free_addr; + char now_string[30]; + int anwser,bind_cpu,wval; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + off64_t filebytes64; + char tmpname[256]; + FILE *thread_wqfd; + FILE *thread_Lwqfd; + char *filename; + + char *stdio_buf; + char *how; + long long Index = 0; + FILE *stream = NULL; + static int First_Run = 1; + + if(compute_flag) + delay=compute_time; + nbuff=maddr=wmaddr=free_addr=0; + thread_qtime_stop=thread_qtime_start=0; + thread_wqfd=w_traj_fd=thread_Lwqfd=(FILE *)0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long long)((long)x); + } + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#ifdef _HPUX_SOURCE + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1 ) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + filename=dummyfile[xx]; + + if(mmapflag || async_flag) + return(0); + + stdio_buf=(char *)malloc((size_t)reclen); + + if(Uflag) /* Unmount and re-mount the mountpoint */ + { + purge_buffer_cache(); + } + + if(First_Run==1) + { + First_Run=0; + if(check_filename(filename)) + how="r+"; /* file exists, don't create and zero a new one. */ + else + how="w+"; /* file doesn't exist. create it. */ + } + else + how="r+"; /* re-tests should error out if file does not exist. */ + +#ifdef IRIX64 + if((stream=(FILE *)fopen(filename,how)) == 0) + { + printf("\nCan not fdopen temp file: %s %lld\n", + filename,errno); + perror("fdopen"); + exit(48); + } +#else + if((stream=(FILE *)I_FOPEN(filename,how)) == 0) + { +#ifdef NO_PRINT_LLD + printf("\nCan not fdopen temp file: %s %d\n", + filename,errno); +#else + printf("\nCan not fdopen temp file: %s %d\n", + filename,errno); +#endif + perror("fdopen"); + exit(49); + } +#endif + fd=fileno(stream); + fsync(fd); + if(direct_flag) + setvbuf(stream,stdio_buf,_IONBF,reclen); + else + setvbuf(stream,stdio_buf,_IOFBF,reclen); + + buffer=mainbuffer; + if(fetchon) + fetchit(buffer,reclen); + if(verify || dedup || dedup_interior) + fill_buffer(buffer,reclen,(long long)pattern,sverify,(long long)0); + + compute_val=(double)0; + + /*******************************************************************/ + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag=CHILD_STATE_READY; /* Tell parent child is ready to go */ + + if(distributed && client_iozone) + tell_master_ready(chid); + if(distributed && client_iozone) + { + if(cdebug) + { + printf("Child %d waiting for go from master\n",(int)xx); + fflush(stdout); + } + wait_for_master_go(chid); + if(cdebug) + { + printf("Child %d received go from master\n",(int)xx); + fflush(stdout); + } + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + + written_so_far=0; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->actual = 0; + child_stat->throughput = 0; + stopped=0; + if(Q_flag) + { + sprintf(tmpname,"Child_%d_fwol.dat",(int)xx); + thread_wqfd=fopen(tmpname,"a"); + if(thread_wqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_wqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","fwrite test start: ",now_string); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + /*******************************************************************/ + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + buffer = mbuffer + Index; + } + if((verify & diag_v) || dedup || dedup_interior) + fill_buffer(buffer,reclen,(long long)pattern,sverify,i); + if(purge) + purgeit(buffer,reclen); + if(Q_flag || hist_summary || op_rate_flag) + { + thread_qtime_start=time_so_far(); + } + if(fwrite(buffer, (size_t) reclen, 1, stream) != 1) + { +#ifdef NO_PRINT_LLD + printf("\nError fwriting block %ld, fd= %d\n", i, + fd); +#else + printf("\nError fwriting block %lld, fd= %d\n", i, + fd); +#endif + perror("fwrite"); + signal_handler(); + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long)((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + written_so_far+=reclen/1024; + } + if(include_flush) + { + fflush(stream); + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + } + if(include_close) + { + wval=fclose(stream); + if(wval==-1){ + perror("fclose"); + signal_handler(); + } + } + /*******************************************************************/ + if(!stopped){ + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + } + if(cdebug) + { + printf("Child %d: throughput %f actual %f \n",(int)chid, child_stat->throughput, + child_stat->actual); + fflush(stdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_FWRITE_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + + if (debug1) { + printf(" child/slot: %lld, wall-cpu: %8.3f %8.3fC" " -> %6.2f%%\n", + xx, walltime, cputime, + cpu_util(cputime, walltime)); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + stopped=0; + + /*******************************************************************/ + /* End fwrite performance test. *************************************/ + /*******************************************************************/ + + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + if(Q_flag && (thread_wqfd !=0) ) + fclose(thread_wqfd); + free(dummyfile[xx]); + if(w_traj_flag) + fclose(w_traj_fd); + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Fwrite test finished: ",now_string); + fclose(thread_Lwqfd); + } + /*******************************************************************/ + if(!include_close) + { + wval=fflush(stream); + if(wval==-1){ + perror("fflush"); + signal_handler(); + } + wval=fsync(fd); + if(wval==-1){ + perror("fsync"); + signal_handler(); + } + wval=fclose(stream); + if(wval==-1){ + perror("fclose"); + signal_handler(); + } + } + + if(restf) + sleep((int)(int)rest_val); + + free(stdio_buf); + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + /*******************************************************************/ + if(hist_summary) + dump_hist("Fwrite",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif + +return(0); +} + + +#ifdef HAVE_ANSIC_C +void * thread_fread_test(void *x) +#else +void * thread_fread_test( x) +#endif +{ + struct child_stats *child_stat; + double starttime1 = 0; + double temp_time; + double hist_time; + double desired_op_rate_time; + double actual_rate; + double walltime, cputime; + double compute_val = (double)0; + float delay = (float)0; + double thread_qtime_stop,thread_qtime_start; + off64_t traj_offset; + long long w_traj_bytes_completed; + long long w_traj_ops_completed; + FILE *w_traj_fd; + int fd; + long long recs_per_buffer; + long long stopped,i; + off64_t written_so_far, read_so_far, re_written_so_far,re_read_so_far; + long long xx,xx2; + char *dummyfile [MAXSTREAMS]; /* name of dummy file */ + char *nbuff; + char *maddr; + char *wmaddr,*free_addr; + char now_string[30]; + int anwser,bind_cpu; +#if defined(VXFS) || defined(solaris) + int test_foo = 0; +#endif + off64_t filebytes64; + char tmpname[256]; + FILE *thread_wqfd; + FILE *thread_Lwqfd; + + if(compute_flag) + delay=compute_time; + nbuff=maddr=wmaddr=free_addr=0; + thread_qtime_stop=thread_qtime_start=0; + thread_wqfd=w_traj_fd=thread_Lwqfd=(FILE *)0; + traj_offset=walltime=cputime=0; + anwser=bind_cpu=0; + char *stdio_buf; + long long Index = 0; + FILE *stream = NULL; + char *filename; + + if(w_traj_flag) + { + filebytes64 = w_traj_fsize; + numrecs64=w_traj_ops; + } + else + { + filebytes64 = numrecs64*reclen; + } + written_so_far=read_so_far=re_written_so_far=re_read_so_far=0; + w_traj_bytes_completed=w_traj_ops_completed=0; + recs_per_buffer = cache_size/reclen ; +#ifdef NO_THREADS + xx=chid; +#else + if(use_thread) + { + xx = (long long)((long)x); + } + else + { + xx=chid; + } +#endif +#ifndef NO_THREADS +#ifdef _HPUX_SOURCE + if(ioz_processor_bind) + { + bind_cpu=(begin_proc+(int)xx)%num_processors; + pthread_processor_bind_np(PTHREAD_BIND_FORCED_NP, + (pthread_spu_t *)&anwser, (pthread_spu_t)bind_cpu, pthread_self()); + my_nap(40); /* Switch to new cpu */ + } +#endif +#endif + if(use_thread) + nbuff=barray[xx]; + else + nbuff=buffer; + if(debug1 ) + { + if(use_thread) +#ifdef NO_PRINT_LLD + printf("\nStarting child %ld\n",xx); +#else + printf("\nStarting child %lld\n",xx); +#endif + else +#ifdef NO_PRINT_LLD + printf("\nStarting process %d slot %ld\n",getpid(),xx); +#else + printf("\nStarting process %d slot %lld\n",getpid(),xx); +#endif + + } + dummyfile[xx]=(char *)malloc((size_t)MAXNAMESIZE); + xx2=xx; + if(share_file) + xx2=(long long)0; + if(mfflag) + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#else + sprintf(dummyfile[xx],"%s",filearray[xx2]); +#endif + } + else + { +#ifdef NO_PRINT_LLD + sprintf(dummyfile[xx],"%s.DUMMY.%ld",filearray[xx2],xx2); +#else + sprintf(dummyfile[xx],"%s.DUMMY.%lld",filearray[xx2],xx2); +#endif + } + + filename=dummyfile[xx]; + + if(mmapflag || async_flag) + return(0); + + stdio_buf=(char *)malloc((size_t)reclen); + + if(Uflag) /* Unmount and re-mount the mountpoint */ + { + purge_buffer_cache(); + } +#ifdef IRIX64 + if((stream=(FILE *)fopen(filename,"r")) == 0) + { + printf("\nCan not fdopen temp file: %s\n", + filename); + perror("fdopen"); + exit(51); + } +#else + if((stream=(FILE *)I_FOPEN(filename,"r")) == 0) + { + printf("\nCan not fdopen temp file: %s\n", + filename); + perror("fdopen"); + exit(52); + } +#endif + fd=I_OPEN(filename,O_RDONLY,0); + fsync(fd); + close(fd); + if(direct_flag) + setvbuf(stream,stdio_buf,_IONBF,reclen); + else + setvbuf(stream,stdio_buf,_IOFBF,reclen); + + buffer=mainbuffer; + if(fetchon) + fetchit(buffer,reclen); + compute_val=(double)0; + + /*******************************************************************/ + /*******************************************************************/ + + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->throughput = 0; + child_stat->actual = 0; + child_stat->flag=CHILD_STATE_READY; /* Tell parent child is ready to go */ + + if(distributed && client_iozone) + tell_master_ready(chid); + if(distributed && client_iozone) + { + if(cdebug) + { + printf("Child %d waiting for go from master\n",(int)xx); + fflush(stdout); + } + wait_for_master_go(chid); + if(cdebug) + { + printf("Child %d received go from master\n",(int)xx); + fflush(stdout); + } + } + else + { + while(child_stat->flag!=CHILD_STATE_BEGIN) /* Wait for signal from parent */ + Poll((long long)1); + } + + written_so_far=0; + child_stat = (struct child_stats *)&shmaddr[xx]; + child_stat->actual = 0; + child_stat->throughput = 0; + stopped=0; + if(Q_flag) + { + sprintf(tmpname,"Child_%d_frol.dat",(int)xx); + thread_wqfd=fopen(tmpname,"a"); + if(thread_wqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + fprintf(thread_wqfd,"Offset in Kbytes Latency in microseconds Transfer size in bytes\n"); + } + if(L_flag) + { + sprintf(tmpname,"Child_%d.log",(int)xx); + thread_Lwqfd=fopen(tmpname,"a"); + if(thread_Lwqfd==0) + { + client_error=errno; + if(distributed && client_iozone) + send_stop(); + printf("Unable to open %s\n",tmpname); + exit(40); + } + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","fread test start: ",now_string); + } + starttime1 = time_so_far(); + if(cpuutilflag) + { + walltime = starttime1; + cputime = cputime_so_far(); + } + + + + /*******************************************************************/ + + for(i=0; i (MAXBUFFERSIZE-reclen)) + Index=0; + buffer = mbuffer + Index; + } + if(purge) + purgeit(buffer,reclen); + if(Q_flag || hist_summary || op_rate_flag) + { + thread_qtime_start=time_so_far(); + } + if(fread(buffer, (size_t) reclen,1, stream) != 1) + { +#ifdef _64BIT_ARCH_ +#ifdef NO_PRINT_LLD + printf("\nError freading block %ld %x\n", i, + (unsigned long)buffer); +#else + printf("\nError freading block %lld %llx\n", i, + (unsigned long long)buffer); +#endif +#else +#ifdef NO_PRINT_LLD + printf("\nError freading block %ld %lx\n", i, + (long)buffer); +#else + printf("\nError freading block %lld %lx\n", i, + (long)buffer); +#endif +#endif + perror("read"); + exit(54); + } + if(verify){ + if(verify_buffer(buffer,reclen,(off64_t)i,reclen,(long long)pattern,sverify)){ + exit(55); + } + } + if(hist_summary) + { + thread_qtime_stop=time_so_far(); + hist_time =(thread_qtime_stop-thread_qtime_start); + hist_insert(hist_time); + } + if(op_rate_flag) + { + thread_qtime_stop=time_so_far(); + desired_op_rate_time = ((double)1.0/(double)op_rate); + actual_rate = (double)(thread_qtime_stop-thread_qtime_start); + if( actual_rate < desired_op_rate_time) + my_unap((unsigned long long)((desired_op_rate_time-actual_rate)*1000000.0 )); + } + if(Q_flag) + { + thread_qtime_stop=time_so_far(); +#ifdef NO_PRINT_LLD + fprintf(thread_wqfd,"%10.1ld %10.0f %10.1ld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#else + fprintf(thread_wqfd,"%10.1lld %10.0f %10.1lld\n",(traj_offset)/1024,((thread_qtime_stop-thread_qtime_start-time_res))*1000000,reclen); +#endif + } + w_traj_ops_completed++; + w_traj_bytes_completed+=reclen; + written_so_far+=reclen/1024; + + } + if(include_flush) + fflush(stream); + if(include_close) + { + fclose(stream); + } + + /*******************************************************************/ + + if(!stopped){ + temp_time = time_so_far(); + child_stat->throughput = ((temp_time - starttime1)-time_res) + -compute_val; + if(child_stat->throughput < (double).000001) + { + child_stat->throughput= time_res; + if(rec_prob < reclen) + rec_prob = reclen; + res_prob=1; + } + + if(OPS_flag){ + /*written_so_far=(written_so_far*1024)/reclen;*/ + written_so_far=w_traj_ops_completed; + } + child_stat->throughput = + (double)written_so_far/child_stat->throughput; + child_stat->actual = (double)written_so_far; + } + if(cdebug) + { + printf("Child %d: throughput %f actual %f \n",(int)chid, child_stat->throughput, + child_stat->actual); + fflush(stdout); + } + if(cpuutilflag) + { + cputime = cputime_so_far() - cputime; + if (cputime < cputime_res) + cputime = 0.0; + child_stat->cputime = cputime; + walltime = time_so_far() - walltime; + child_stat->walltime = walltime; + } + if(distributed && client_iozone) + tell_master_stats(THREAD_FREAD_TEST, chid, child_stat->throughput, + child_stat->actual, + child_stat->cputime, child_stat->walltime, + (char)*stop_flag, + (long long)CHILD_STATE_HOLD); + + if (debug1) { + printf(" child/slot: %lld, wall-cpu: %8.3f %8.3fC" " -> %6.2f%%\n", + xx, walltime, cputime, + cpu_util(cputime, walltime)); + } + child_stat->flag = CHILD_STATE_HOLD; /* Tell parent I'm done */ + stopped=0; + + /*******************************************************************/ + /* End fead performance test. *************************************/ + /*******************************************************************/ + + if(debug1) +#ifdef NO_PRINT_LLD + printf("\nChild finished %ld\n",xx); +#else + printf("\nChild finished %lld\n",xx); +#endif + if(Q_flag && (thread_wqfd !=0) ) + fclose(thread_wqfd); + free(dummyfile[xx]); + if(w_traj_flag) + fclose(w_traj_fd); + + if(L_flag) + { + get_date(now_string); + fprintf(thread_Lwqfd,"%-25s %s","Fread test finished: ",now_string); + fclose(thread_Lwqfd); + } + + /*******************************************************************/ + + if(!include_close) + { + fflush(stream); + fclose(stream); + } + stream = NULL; + + if(restf) + sleep((int)(int)rest_val); + + free(stdio_buf); + if(OPS_flag || MS_flag){ + filebytes64=filebytes64/reclen; + } + + /*******************************************************************/ + if(hist_summary) + dump_hist("Fread",(int)xx); + if(distributed && client_iozone) + return(0); +#ifdef NO_THREADS + exit(0); +#else + if(use_thread) + thread_exit(); + else + exit(0); +#endif + +return(0); +} diff --git a/src/components/appio/tests/iozone/iozone_visualizer.pl b/src/components/appio/tests/iozone/iozone_visualizer.pl new file mode 100755 index 0000000..a7c6b13 --- /dev/null +++ b/src/components/appio/tests/iozone/iozone_visualizer.pl @@ -0,0 +1,262 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +# arguments: one of more report files +# +# Christian Mautner , 2005-10-31 +# Marc Schoechlin , 2007-12-02 +# +# This script is just a hack :-) +# +# This script is based loosely on the Generate_Graph set +# of scripts that come with iozone, but is a complete re-write +# +# The main reason to write this was the need to compare the behaviour of +# two or more different setups, for tuning filesystems or +# comparing different pieces of hardware. +# +# This script is in the public domain, too short and too trivial +# to deserve a copyright. +# +# Simply run iozone like, for example, ./iozone -a -g 4G > config1.out (if your machine has 4GB) +# +# and then run perl report.pl config1.out +# or get another report from another box into config2.out and run +# perl report.pl config1.out config2.out +# the look in the report_* directory for .png +# +# If you don't like png or the graphic size, search for "set terminal" in this file and put whatever gnuplot +# terminal you want. Note I've also noticed that gnuplot switched the set terminal png syntax +# a while back, you might need "set terminal png small size 900,700" +# +use Getopt::Long; + +my $column; +my %columns; +my $datafile; +my @datafiles; +my $outdir; +my $report; +my $nooffset=0; +my @Reports; +my @split; +my $size3d; my $size2d; + +# evaluate options +GetOptions( + '3d=s' => \$size3d, + '2d=s' => \$size2d, + 'nooffset' => \$nooffset +); + +$size3d = "900,700" unless defined $size3d; +$size2d = "800,500" unless defined $size2d; + + +my $xoffset = "offset -7"; +my $yoffset = "offset -3"; + +if ($nooffset == 1){ + $xoffset = ""; $yoffset = ""; +} + +print "\niozone_visualizer.pl : this script is distributed as public domain\n"; +print "Christian Mautner , 2005-10-31\n"; +print "Marc Schoechlin , 2007-12-02\n"; + + +@Reports=@ARGV; + +die "usage: $0 --3d=x,y -2d=x,y [...]\n" if not @Reports or grep (m|^-|, @Reports); + +die "report files must be in current directory" if grep (m|/|, @Reports); + +print "Configured xtics-offset '$xoffset', configured ytics-offfset '$yoffset' (disable with --nooffset)\n"; +print "Size 3d graphs : ".$size3d." (modify with '--3d=x,y')\n"; +print "Size 2d graphs : ".$size2d." (modify with '--2d=x,y')\n"; + +#KB reclen write rewrite read reread read write read rewrite read fwrite frewrite fread freread +%columns=( + 'KB' =>1, + 'reclen' =>2, + 'write' =>3, + 'rewrite' =>4, + 'read' =>5, + 'reread' =>6, + 'randread' =>7, + 'randwrite' =>8, + 'bkwdread' =>9, + 'recrewrite'=>10, + 'strideread'=>11, + 'fwrite' =>12, + 'frewrite' =>13, + 'fread' =>14, + 'freread' =>15, + ); + +# +# create output directory. the name is the concatenation +# of all report file names (minus the file extension, plus +# prefix report_) +# +$outdir="report_".join("_",map{/([^\.]+)(\..*)?/ && $1}(@Reports)); + +print STDERR "Output directory: $outdir "; + +if ( -d $outdir ) +{ + print STDERR "(removing old directory) "; + system "rm -rf $outdir"; +} + +mkdir $outdir or die "cannot make directory $outdir"; + +print STDERR "done.\nPreparing data files..."; + +foreach $report (@Reports) +{ + open(I, $report) or die "cannot open $report for reading"; + $report=~/^([^\.]+)/; + $datafile="$1.dat"; + push @datafiles, $datafile; + open(O, ">$outdir/$datafile") or die "cannot open $outdir/$datafile for writing"; + open(O2, ">$outdir/2d-$datafile") or die "cannot open $outdir/$datafile for writing"; + + my @sorted = sort { $columns{$a} <=> $columns{$b} } keys %columns; + print O "# ".join(" ",@sorted)."\n"; + print O2 "# ".join(" ",@sorted)."\n"; + + while() + { + next unless ( /^[\s\d]+$/ ); + @split = split(); + next unless ( @split == 15 ); + print O; + print O2 if $split[1] == 16384 or $split[0] == $split[1]; + } + close(I); + close(O); + close(O2); +} + +print STDERR "done.\nGenerating graphs:"; + + +open(HTML, ">$outdir/index.html") or die "cannot open $outdir/index.html for writing"; + +print HTML qq{ + + + +IOZone Statistics + + + + +

IOZone Statistics

+ + +\n"; +# Genereate 3d plots +foreach $column (keys %columns) +{ + print STDERR " $column"; + + open(G, ">$outdir/$column.do") or die "cannot open $outdir/$column.do for writing"; + + + + print G qq{ +set title "Iozone performance: $column" +set grid lt 2 lw 1 +set surface +set parametric +set xtics $xoffset +set ytics $yoffset +set logscale x 2 +set logscale y 2 +set autoscale z +#set xrange [2.**5:2.**24] +set xlabel "File size in KBytes" -2 +set ylabel "Record size in Kbytes" 2 +set zlabel "Kbytes/sec" 4,8 +set style data lines +set dgrid3d 80,80,3 +#set terminal png small picsize 900 700 +set terminal png small size $size3d nocrop +set output "$column.png" +}; + + print HTML qq{ + + + + }; + + print G "splot ". join(", ", map{qq{"$_" using 1:2:$columns{$column} title "$_"}}(@datafiles)); + + print G "\n"; + + close G; + + open(G, ">$outdir/2d-$column.do") or die "cannot open $outdir/$column.do for writing"; + print G qq{ +set title "Iozone performance: $column" +#set terminal png small picsize 450 350 +set terminal png medium size $size2d nocrop +set logscale x +set xlabel "File size in KBytes" +set ylabel "Kbytes/sec" +set output "2d-$column.png" +}; + + print HTML qq{ + + + + }; + + + + print G "plot ". join(", ", map{qq{"2d-$_" using 1:$columns{$column} title "$_" with lines}}(@datafiles)); + + print G "\n"; + + close G; + + if ( system("cd $outdir && gnuplot $column.do && gnuplot 2d-$column.do") ) + { + print STDERR "(failed) "; + } + else + { + print STDERR "(ok) "; + } +} + +print HTML qq{ +
+}; + +# Generate Menu +print HTML "## Overview\n
    \n"; +foreach $column (keys %columns){ + print HTML '
  • '.uc($column).' : '. + '3d\n". + '2d
  • \n"; +} +print HTML "
+

3d-$column

[top]
+ 3d-$column
+
+

2d-$column

[top]
+ 2d-$column
+
+ + +}; +print STDERR "done.\n"; diff --git a/src/components/appio/tests/iozone/libasync.c b/src/components/appio/tests/iozone/libasync.c new file mode 100644 index 0000000..50ae128 --- /dev/null +++ b/src/components/appio/tests/iozone/libasync.c @@ -0,0 +1,1604 @@ + + +/* + * Library for Posix async read operations with hints. + * Author: Don Capps + * Company: Iozone + * Date: 4/24/1998 + * + * Two models are supported. First model is a replacement for read() where the async + * operations are performed and the requested data is bcopy()-ed back into the users + * buffer. The second model is a new version of read() where the caller does not + * supply the address of the buffer but instead is returned an address to the + * location of the data. The second model eliminates a bcopy from the path. + * + * To use model #1: + * 1. Call async_init(&pointer_on_stack,fd,direct_flag); + * The fd is the file descriptor for the async operations. + * The direct_flag sets VX_DIRECT + * + * 2. Call async_read(gc, fd, ubuffer, offset, size, stride, max, depth) + * Where: + * gc ............ is the pointer on the stack + * fd ............ is the file descriptor + * ubuffer ....... is the address of the user buffer. + * offset ........ is the offset in the file to begin reading + * size .......... is the size of the transfer. + * stride ........ is the distance, in size units, to space the async reads. + * max ........... is the max size of the file to be read. + * depth ......... is the number of async operations to perform. + * + * 3. Call end_async(gc) when finished. + * Where: + * gc ............ is the pointer on the stack. + * + * To use model #2: + * 1. Call async_init(&pointer_on_stack,fd,direct_flag); + * The fd is the file descriptor for the async operations. + * The direct_flag sets VX_DIRECT + * 2. Call async_read(gc, fd, &ubuffer, offset, size, stride, max, depth) + * Where: + * gc ............ is the pointer on the stack + * fd ............ is the file descriptor + * ubuffer ....... is the address of a pointer that will be filled in + * by the async library. + * offset ........ is the offset in the file to begin reading + * size .......... is the size of the transfer. + * stride ........ is the distance, in size units, to space the async reads. + * max ........... is the max size of the file to be read. + * depth ......... is the number of async operations to perform. + * + * 3. Call async_release(gc) when finished with the data that was returned. + * This allows the async library to reuse the memory that was filled in + * and returned to the user. + * + * 4. Call end_async(gc) when finished. + * Where: + * gc ............ is the pointer on the stack. + * + * To use model #1: (WRITES) + * 1. Call async_init(&pointer_on_stack,fd,direct_flag); + * The fd is the file descriptor for the async operations. + * + * 2. Call async_write(gc, fd, ubuffer, size, offset, depth) + * Where: + * gc ............ is the pointer on the stack + * fd ............ is the file descriptor + * ubuffer ....... is the address of the user buffer. + * size .......... is the size of the transfer. + * offset ........ is the offset in the file to begin reading + * depth ......... is the number of async operations to perform. + * + * 4. Call end_async(gc) when finished. + * Where: + * gc ............ is the pointer on the stack. + * + * Notes: + * The intended use is to replace calls to read() with calls to + * async_read() and allow the user to make suggestions on + * what kind of async read-ahead would be nice to have. + * The first transfer requested is guarenteed to be complete + * before returning to the caller. The async operations will + * be started and will also be guarenteed to have completed + * if the next call specifies its first request to be one + * that was previously performed with an async operation. + * + * The async_read_no_copy() function allows the async operations + * to return the data to the user and not have to perform + * a bcopy of the data back into the user specified buffer + * location. This model is faster but assumes that the user + * application has been modified to work with this model. + * + * The async_write() is intended to enhance the performance of + * initial writes to a file. This is the slowest case in the write + * path as it must perform meta-data allocations and wait. + */ + +#include +#include +#if defined(solaris) || defined(linux) || defined(SCO_Unixware_gcc) +#else +#include +#endif +#include +#include +#ifndef bsd4_4 +#include +#endif +#ifdef VXFS +#include +#endif + +#if defined(OSFV5) || defined(linux) +#include +#endif + +#if defined(linux) +#include +#include +#include +#endif + +#if (defined(solaris) && defined(__LP64__)) || defined(__s390x__) || defined(FreeBSD) +/* If we are building for 64-bit Solaris, all functions that return pointers + * must be declared before they are used; otherwise the compiler will assume + * that they return ints and the top 32 bits of the pointer will be lost, + * causing segmentation faults. The following includes take care of this. + * It should be safe to add these for all other OSs too, but we're only + * doing it for Solaris now in case another OS turns out to be a special case. + */ +#include +#include +#include /* For the BSD string functions */ +#endif + +void mbcopy(char *source, char *dest, size_t len); + + +#if !defined(solaris) && !defined(off64_t) && !defined(_OFF64_T) && !defined(__off64_t_defined) && !defined(SCO_Unixware_gcc) +typedef long long off64_t; +#endif +#if defined(OSFV5) +#include +#endif + + +extern long long page_size; +extern int one; +/* + * Internal cache entrys. Each entry on the global + * cache, pointed to by async_init(gc) will be of + * this structure type. + */ +char version[] = "Libasync Version $Revision$"; +struct cache_ent { + struct aiocb myaiocb; /* For use in small file mode */ +#ifdef _LARGEFILE64_SOURCE +#if defined(__CrayX1__) + aiocb64_t myaiocb64; /* For use in large file mode */ +#else + struct aiocb64 myaiocb64; /* For use in large file mode */ +#endif +#endif + long long fd; /* File descriptor */ + long long size; /* Size of the transfer */ + struct cache_ent *forward; /* link to next element on cache list */ + struct cache_ent *back; /* link to previous element on the cache list */ + long long direct; /* flag to indicate if the buffer should be */ + /* de-allocated by library */ + char *real_address; /* Real address to free */ + + volatile void *oldbuf; /* Used for firewall to prevent in flight */ + /* accidents */ + int oldfd; /* Used for firewall to prevent in flight */ + /* accidents */ + size_t oldsize; /* Used for firewall to prevent in flight */ + /* accidents */ +}; + +/* + * Head of the cache list + */ +struct cache { + struct cache_ent *head; /* Head of cache list */ + struct cache_ent *tail; /* tail of cache list */ + struct cache_ent *inuse_head; /* head of in-use list */ + long long count; /* How many elements on the cache list */ + struct cache_ent *w_head; /* Head of cache list */ + struct cache_ent *w_tail; /* tail of cache list */ + long long w_count; /* How many elements on the write list */ + }; + +long long max_depth; +extern int errno; +struct cache_ent *alloc_cache(); +struct cache_ent *incache(); +void async_init(); +void end_async(); +int async_suspend(); +int async_read(); +void takeoff_cache(); +void del_cache(); +void async_release(); +void putoninuse(); +void takeoffinuse(); +struct cache_ent *allocate_write_buffer(); +size_t async_write(); +void async_wait_for_write(); +void async_put_on_write_queue(); +void async_write_finish(); + +/* On Solaris _LP64 will be defined by if we're compiling + * as a 64-bit binary. Make sure that __LP64__ gets defined in this case, + * too -- it should be defined on the compiler command line, but let's + * not rely on this. + */ +#if defined(_LP64) +#if !defined(__LP64__) +#define __LP64__ +#endif +#endif + + +/***********************************************/ +/* Initialization routine to setup the library */ +/***********************************************/ +void +async_init(gc,fd,flag) +struct cache **gc; +int fd; +int flag; +{ +#ifdef VXFS + if(flag) + ioctl(fd,VX_SETCACHE,VX_DIRECT); +#endif + if(*gc) + { + printf("Warning calling async_init two times ?\n"); + return; + } + *gc=(struct cache *)malloc((size_t)sizeof(struct cache)); + if(*gc == 0) + { + printf("Malloc failed\n"); + exit(174); + } + bzero(*gc,sizeof(struct cache)); +#if defined(__AIX__) || defined(SCO_Unixware_gcc) + max_depth=500; +#else + max_depth=sysconf(_SC_AIO_MAX); +#endif +} + +/***********************************************/ +/* Tear down routine to shutdown the library */ +/***********************************************/ +void +end_async(gc) +struct cache *gc; +{ + del_cache(gc); + async_write_finish(gc); + free((void *)gc); +} + +/***********************************************/ +/* Wait for a request to finish */ +/***********************************************/ +int +async_suspend(struct cache_ent *ce) +{ +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + const struct aiocb * const cblist[1] = {&ce->myaiocb}; +#else + const struct aiocb64 * const cblist[1] = {&ce->myaiocb64}; +#endif +#else + const struct aiocb * const cblist[1] = {&ce->myaiocb}; +#endif + +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + return aio_suspend(cblist, 1, NULL); +#else + return aio_suspend64(cblist, 1, NULL); +#endif +#else + return aio_suspend(cblist, 1, NULL); +#endif +} + +/************************************************************************* + * This routine is a generic async reader assist funtion. It takes + * the same calling parameters as read() but also extends the + * interface to include: + * stride ..... For the async reads, what is the distance, in size units, + * to space the reads. Note: Stride of 0 indicates that + * you do not want any read-ahead. + * max ..... What is the maximum file offset for this operation. + * depth ..... How much read-ahead do you want. + * + * The calls to this will guarentee to complete the read() operation + * before returning to the caller. The completion may occur in two + * ways. First the operation may be completed by calling aio_read() + * and then waiting for it to complete. Second the operation may be + * completed by copying the data from a cache of previously completed + * async operations. + * In the event the read to be satisfied is not in the cache then a + * series of async operations will be scheduled and then the first + * async read will be completed. In the event that the read() can be + * satisfied from the cache then the data is copied back to the + * user buffer and a series of async reads will be initiated. If a + * read is issued and the cache contains data and the read can not + * be satisfied from the cache, then the cache is discarded, and + * a new cache is constructed. + * Note: All operations are aio_read(). The series will be issued + * as asyncs in the order requested. After all are in flight + * then the code will wait for the manditory first read. + *************************************************************************/ + +int +async_read(gc, fd, ubuffer, offset, size, stride, max, depth) +struct cache *gc; +long long fd; +char *ubuffer; +off64_t offset; +long long size; +long long stride; +off64_t max; +long long depth; +{ + off64_t a_offset,r_offset; + long long a_size; + struct cache_ent *ce,*first_ce=0; + long long i; + ssize_t retval=0; + ssize_t ret; + long long start = 0; + long long del_read=0; + + a_offset=offset; + a_size = size; + /* + * Check to see if it can be completed from the cache + */ + if((ce=(struct cache_ent *)incache(gc,fd,offset,size))) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#else + while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) + { + async_suspend(ce); + } +#endif +#else + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#endif + if(ret) + { + printf("aio_error 1: ret %d %d\n",ret,errno); + } +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + retval=aio_return(&ce->myaiocb); +#else +#if defined(__CrayX1__) + retval=aio_return64((aiocb64_t *)&ce->myaiocb64); +#else + retval=aio_return64((struct aiocb64 *)&ce->myaiocb64); +#endif + +#endif +#else + retval=aio_return(&ce->myaiocb); +#endif + if(retval > 0) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + mbcopy((char *)ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); +#else + mbcopy((char *)ce->myaiocb64.aio_buf,(char *)ubuffer,(size_t)retval); +#endif +#else + mbcopy((char *)ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); +#endif + } +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(retval < ce->myaiocb.aio_nbytes) +#else + if(retval < ce->myaiocb64.aio_nbytes) +#endif +#else + if(retval < ce->myaiocb.aio_nbytes) +#endif + { + printf("aio_return error1: ret %d %d\n",retval,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("aio_return error1: fd %d offset %ld buffer %lx size %d Opcode %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +#else + printf("aio_return error1: fd %d offset %lld buffer %lx size %d Opcode %d\n", + ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_offset, + (long)(ce->myaiocb64.aio_buf), + ce->myaiocb64.aio_nbytes, + ce->myaiocb64.aio_lio_opcode +#endif +#else + printf("aio_return error1: fd %d offset %d buffer %lx size %d Opcode %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +#endif + ); + } + ce->direct=0; + takeoff_cache(gc,ce); + }else + { + /* + * Clear the cache and issue the first request async() + */ + del_cache(gc); + del_read++; + first_ce=alloc_cache(gc,fd,offset,size,(long long)LIO_READ); +again: +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ret=aio_read(&first_ce->myaiocb); +#else + ret=aio_read64(&first_ce->myaiocb64); +#endif +#else + ret=aio_read(&first_ce->myaiocb); +#endif + if(ret!=0) + { + if(errno==EAGAIN) + goto again; + else + printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); + } + } + if(stride==0) /* User does not want read-ahead */ + goto out; + if(a_offset<0) /* Before beginning of file */ + goto out; + if(a_offset+size>max) /* After end of file */ + goto out; + if(depth >=(max_depth-1)) + depth=max_depth-1; + if(depth==0) + goto out; + if(gc->count > 1) + start=depth-1; + for(i=start;i max) + continue; + if((ce=incache(gc,fd,r_offset,a_size))) + continue; + ce=alloc_cache(gc,fd,r_offset,a_size,(long long)LIO_READ); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ret=aio_read(&ce->myaiocb); +#else + ret=aio_read64(&ce->myaiocb64); +#endif +#else + ret=aio_read(&ce->myaiocb); +#endif + if(ret!=0) + { + takeoff_cache(gc,ce); + break; + } + } +out: + if(del_read) /* Wait for the first read to complete */ + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) + { + async_suspend(first_ce); + } +#else + while((ret=aio_error64(&first_ce->myaiocb64))== EINPROGRESS) + { + async_suspend(first_ce); + } +#endif +#else + while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) + { + async_suspend(first_ce); + } +#endif + if(ret) + printf("aio_error 2: ret %d %d\n",ret,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + retval=aio_return(&first_ce->myaiocb); +#else + retval=aio_return64(&first_ce->myaiocb64); +#endif +#else + retval=aio_return(&first_ce->myaiocb); +#endif +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(retval < first_ce->myaiocb.aio_nbytes) +#else + if(retval < first_ce->myaiocb64.aio_nbytes) +#endif +#else + if(retval < first_ce->myaiocb.aio_nbytes) +#endif + { + printf("aio_return error2: ret %d %d\n",retval,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("aio_return error2: fd %d offset %lld buffer %lx size %d Opcode %d\n", + first_ce->myaiocb.aio_fildes, + first_ce->myaiocb.aio_offset, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +#else + printf("aio_return error2: fd %d offset %lld buffer %lx size %d Opcode %d\n", + first_ce->myaiocb64.aio_fildes, + first_ce->myaiocb64.aio_offset, + (long)(first_ce->myaiocb64.aio_buf), + first_ce->myaiocb64.aio_nbytes, + first_ce->myaiocb64.aio_lio_opcode +#endif +#else + printf("aio_return error2: fd %d offset %d buffer %lx size %d Opcode %d\n", + first_ce->myaiocb.aio_fildes, + first_ce->myaiocb.aio_offset, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +#endif + ); + } + if(retval > 0) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + mbcopy((char *)first_ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); +#else + mbcopy((char *)first_ce->myaiocb64.aio_buf,(char *)ubuffer,(size_t)retval); +#endif +#else + mbcopy((char *)first_ce->myaiocb.aio_buf,(char *)ubuffer,(size_t)retval); +#endif + } + first_ce->direct=0; + takeoff_cache(gc,first_ce); + } + return((int)retval); +} + +/************************************************************************ + * This routine allocates a cache_entry. It contains the + * aiocb block as well as linkage for use in the cache mechanism. + * The space allocated here will be released after the cache entry + * has been consumed. The routine takeoff_cache() will be called + * after the data has been copied to user buffer or when the + * cache is purged. The routine takeoff_cache() will also release + * all memory associated with this cache entry. + ************************************************************************/ + +struct cache_ent * +alloc_cache(gc,fd,offset,size,op) +struct cache *gc; +long long fd,size,op; +off64_t offset; +{ + struct cache_ent *ce; + long temp; + ce=(struct cache_ent *)malloc((size_t)sizeof(struct cache_ent)); + if(ce == (struct cache_ent *)0) + { + printf("Malloc failed\n"); + exit(175); + } + bzero(ce,sizeof(struct cache_ent)); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ce->myaiocb.aio_fildes=(int)fd; + ce->myaiocb.aio_offset=(off64_t)offset; + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb.aio_buf=(volatile void *)temp; + if(ce->myaiocb.aio_buf == 0) +#else + ce->myaiocb64.aio_fildes=(int)fd; + ce->myaiocb64.aio_offset=(off64_t)offset; + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb64.aio_buf=(volatile void *)temp; + if(ce->myaiocb64.aio_buf == 0) +#endif +#else + ce->myaiocb.aio_fildes=(int)fd; + ce->myaiocb.aio_offset=(off_t)offset; + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb.aio_buf=(volatile void *)temp; + if(ce->myaiocb.aio_buf == 0) +#endif + { + printf("Malloc failed\n"); + exit(176); + } + /*bzero(ce->myaiocb.aio_buf,(size_t)size);*/ +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ce->myaiocb.aio_reqprio=0; + ce->myaiocb.aio_nbytes=(size_t)size; + ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb.aio_lio_opcode=(int)op; +#else + ce->myaiocb64.aio_reqprio=0; + ce->myaiocb64.aio_nbytes=(size_t)size; + ce->myaiocb64.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb64.aio_lio_opcode=(int)op; +#endif +#else + ce->myaiocb.aio_reqprio=0; + ce->myaiocb.aio_nbytes=(size_t)size; + ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb.aio_lio_opcode=(int)op; +#endif + ce->fd=(int)fd; + ce->forward=0; + ce->back=gc->tail; + if(gc->tail) + gc->tail->forward = ce; + gc->tail= ce; + if(!gc->head) + gc->head=ce; + gc->count++; + return(ce); +} + +/************************************************************************ + * This routine checks to see if the requested data is in the + * cache. +*************************************************************************/ +struct cache_ent * +incache(gc,fd,offset,size) +struct cache *gc; +long long fd,size; +off64_t offset; +{ + struct cache_ent *move; + if(gc->head==0) + { + return(0); + } + move=gc->head; +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while(move) + { + if((move->fd == fd) && (move->myaiocb.aio_offset==(off64_t)offset) && + ((size_t)size==move->myaiocb.aio_nbytes)) + { + return(move); + } + move=move->forward; + } +#else + while(move) + { + if((move->fd == fd) && (move->myaiocb64.aio_offset==(off64_t)offset) && + ((size_t)size==move->myaiocb64.aio_nbytes)) + { + return(move); + } + move=move->forward; + } +#endif +#else + while(move) + { + if((move->fd == fd) && (move->myaiocb.aio_offset==(off_t)offset) && + ((size_t)size==move->myaiocb.aio_nbytes)) + { + return(move); + } + move=move->forward; + } +#endif + return(0); +} + +/************************************************************************ + * This routine removes a specific cache entry from the cache, and + * releases all memory associated witht the cache entry (if not direct). +*************************************************************************/ + +void +takeoff_cache(gc,ce) +struct cache *gc; +struct cache_ent *ce; +{ + struct cache_ent *move; + long long found; + move=gc->head; + if(move==ce) /* Head of list */ + { + + gc->head=ce->forward; + if(gc->head) + gc->head->back=0; + else + gc->tail = 0; + if(!ce->direct) + { + free((void *)(ce->real_address)); + free((void *)ce); + } + gc->count--; + return; + } + found=0; + while(move) + { + if(move==ce) + { + if(move->forward) + { + move->forward->back=move->back; + } + if(move->back) + { + move->back->forward=move->forward; + } + found=1; + break; + } + else + { + move=move->forward; + } + } + if(gc->head == ce) + gc->tail = ce; + if(!found) + printf("Internal Error in takeoff cache\n"); + move=gc->head; + if(!ce->direct) + { + free((void *)(ce->real_address)); + free((void *)ce); + } + gc->count--; +} + +/************************************************************************ + * This routine is used to purge the entire cache. This is called when + * the cache contains data but the incomming read was not able to + * be satisfied from the cache. This indicates that the previous + * async read-ahead was not correct and a new pattern is emerging. + ************************************************************************/ +void +del_cache(gc) +struct cache *gc; +{ + struct cache_ent *ce; + ssize_t ret; + ce=gc->head; + while(1) + { + ce=gc->head; + if(ce==0) + return; +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret = aio_cancel(0,&ce->myaiocb))==AIO_NOTCANCELED) +#else + while((ret = aio_cancel64(0,&ce->myaiocb64))==AIO_NOTCANCELED) +#endif +#else + while((ret = aio_cancel(0,&ce->myaiocb))==AIO_NOTCANCELED) +#endif + ; + +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ret = aio_return(&ce->myaiocb); +#else + ret = aio_return64(&ce->myaiocb64); +#endif +#else + ret = aio_return(&ce->myaiocb); +#endif + ce->direct=0; + takeoff_cache(gc,ce); /* remove from cache */ + } +} + +/************************************************************************ + * Like its sister async_read() this function performs async I/O for + * all buffers but it differs in that it expects the caller to + * request a pointer to the data to be returned instead of handing + * the function a location to put the data. This will allow the + * async I/O to be performed and does not require any bcopy to be + * done to put the data back into the location specified by the caller. + ************************************************************************/ +int +async_read_no_copy(gc, fd, ubuffer, offset, size, stride, max, depth) +struct cache *gc; +long long fd; +char **ubuffer; +off64_t offset; +long long size; +long long stride; +off64_t max; +long long depth; +{ + off64_t a_offset,r_offset; + long long a_size; + struct cache_ent *ce,*first_ce=0; + long long i; + ssize_t retval=0; + ssize_t ret; + long long del_read=0; + long long start=0; + + a_offset=offset; + a_size = size; + /* + * Check to see if it can be completed from the cache + */ + if((ce=(struct cache_ent *)incache(gc,fd,offset,size))) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#else + while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) + { + async_suspend(ce); + } +#endif +#else + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#endif + if(ret) + printf("aio_error 3: ret %d %d\n",ret,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(ce->oldbuf != ce->myaiocb.aio_buf || + ce->oldfd != ce->myaiocb.aio_fildes || + ce->oldsize != ce->myaiocb.aio_nbytes) +#else + if(ce->oldbuf != ce->myaiocb64.aio_buf || + ce->oldfd != ce->myaiocb64.aio_fildes || + ce->oldsize != ce->myaiocb64.aio_nbytes) +#endif +#else + if(ce->oldbuf != ce->myaiocb.aio_buf || + ce->oldfd != ce->myaiocb.aio_fildes || + ce->oldsize != ce->myaiocb.aio_nbytes) +#endif + printf("It changed in flight\n"); + +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + retval=aio_return(&ce->myaiocb); +#else + retval=aio_return64(&ce->myaiocb64); +#endif +#else + retval=aio_return(&ce->myaiocb); +#endif + if(retval > 0) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + *ubuffer=(char *)ce->myaiocb.aio_buf; +#else + *ubuffer=(char *)ce->myaiocb64.aio_buf; +#endif +#else + *ubuffer=(char *)ce->myaiocb.aio_buf; +#endif + }else + *ubuffer=0; +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(retval < ce->myaiocb.aio_nbytes) +#else + if(retval < ce->myaiocb64.aio_nbytes) +#endif +#else + if(retval < ce->myaiocb.aio_nbytes) +#endif + { + printf("aio_return error4: ret %d %d\n",retval,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("aio_return error4: fd %d offset %lld buffer %lx size %d Opcode %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +#else + printf("aio_return error4: fd %d offset %lld buffer %lx size %d Opcode %d\n", + ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_offset, + (long)(ce->myaiocb64.aio_buf), + ce->myaiocb64.aio_nbytes, + ce->myaiocb64.aio_lio_opcode +#endif +#else + printf("aio_return error4: fd %d offset %d buffer %lx size %d Opcode %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_nbytes, + ce->myaiocb.aio_lio_opcode +#endif + ); + } + ce->direct=1; + takeoff_cache(gc,ce); /* do not delete buffer*/ + putoninuse(gc,ce); + }else + { + /* + * Clear the cache and issue the first request async() + */ + del_cache(gc); + del_read++; + first_ce=alloc_cache(gc,fd,offset,size,(long long)LIO_READ); /* allocate buffer */ + /*printf("allocated buffer/read %x offset %d\n",first_ce->myaiocb.aio_buf,offset);*/ +again: +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + first_ce->oldbuf=first_ce->myaiocb.aio_buf; + first_ce->oldfd=first_ce->myaiocb.aio_fildes; + first_ce->oldsize=first_ce->myaiocb.aio_nbytes; + ret=aio_read(&first_ce->myaiocb); +#else + first_ce->oldbuf=first_ce->myaiocb64.aio_buf; + first_ce->oldfd=first_ce->myaiocb64.aio_fildes; + first_ce->oldsize=first_ce->myaiocb64.aio_nbytes; + ret=aio_read64(&first_ce->myaiocb64); +#endif +#else + first_ce->oldbuf=first_ce->myaiocb.aio_buf; + first_ce->oldfd=first_ce->myaiocb.aio_fildes; + first_ce->oldsize=first_ce->myaiocb.aio_nbytes; + ret=aio_read(&first_ce->myaiocb); +#endif + if(ret!=0) + { + if(errno==EAGAIN) + goto again; + else + printf("error returned from aio_read(). Ret %d errno %d\n",ret,errno); + } + } + if(stride==0) /* User does not want read-ahead */ + goto out; + if(a_offset<0) /* Before beginning of file */ + goto out; + if(a_offset+size>max) /* After end of file */ + goto out; + if(depth >=(max_depth-1)) + depth=max_depth-1; + if(depth==0) + goto out; + if(gc->count > 1) + start=depth-1; + for(i=start;i max) + continue; + if((ce=incache(gc,fd,r_offset,a_size))) + continue; + ce=alloc_cache(gc,fd,r_offset,a_size,(long long)LIO_READ); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ce->oldbuf=ce->myaiocb.aio_buf; + ce->oldfd=ce->myaiocb.aio_fildes; + ce->oldsize=ce->myaiocb.aio_nbytes; + ret=aio_read(&ce->myaiocb); +#else + ce->oldbuf=ce->myaiocb64.aio_buf; + ce->oldfd=ce->myaiocb64.aio_fildes; + ce->oldsize=ce->myaiocb64.aio_nbytes; + ret=aio_read64(&ce->myaiocb64); +#endif +#else + ce->oldbuf=ce->myaiocb.aio_buf; + ce->oldfd=ce->myaiocb.aio_fildes; + ce->oldsize=ce->myaiocb.aio_nbytes; + ret=aio_read(&ce->myaiocb); +#endif + if(ret!=0) + { + takeoff_cache(gc,ce); + break; + } + } +out: + if(del_read) /* Wait for the first read to complete */ + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) + { + async_suspend(first_ce); + } +#else + while((ret=aio_error64(&first_ce->myaiocb64))== EINPROGRESS) + { + async_suspend(first_ce); + } +#endif +#else + while((ret=aio_error(&first_ce->myaiocb))== EINPROGRESS) + { + async_suspend(first_ce); + } +#endif + if(ret) + printf("aio_error 4: ret %d %d\n",ret,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(first_ce->oldbuf != first_ce->myaiocb.aio_buf || + first_ce->oldfd != first_ce->myaiocb.aio_fildes || + first_ce->oldsize != first_ce->myaiocb.aio_nbytes) + printf("It changed in flight2\n"); + retval=aio_return(&first_ce->myaiocb); +#else + if(first_ce->oldbuf != first_ce->myaiocb64.aio_buf || + first_ce->oldfd != first_ce->myaiocb64.aio_fildes || + first_ce->oldsize != first_ce->myaiocb64.aio_nbytes) + printf("It changed in flight2\n"); + retval=aio_return64(&first_ce->myaiocb64); +#endif +#else + if(first_ce->oldbuf != first_ce->myaiocb.aio_buf || + first_ce->oldfd != first_ce->myaiocb.aio_fildes || + first_ce->oldsize != first_ce->myaiocb.aio_nbytes) + printf("It changed in flight2\n"); + retval=aio_return(&first_ce->myaiocb); +#endif +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + if(retval < first_ce->myaiocb.aio_nbytes) +#else + if(retval < first_ce->myaiocb64.aio_nbytes) +#endif +#else + if(retval < first_ce->myaiocb.aio_nbytes) +#endif + { + printf("aio_return error5: ret %d %d\n",retval,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("aio_return error5: fd %d offset %lld buffer %lx size %d Opcode %d\n", + first_ce->myaiocb.aio_fildes, + first_ce->myaiocb.aio_offset, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +#else + printf("aio_return error5: fd %d offset %lld buffer %lx size %d Opcode %d\n", + first_ce->myaiocb64.aio_fildes, + first_ce->myaiocb64.aio_offset, + (long)(first_ce->myaiocb64.aio_buf), + first_ce->myaiocb64.aio_nbytes, + first_ce->myaiocb64.aio_lio_opcode +#endif +#else + printf("aio_return error5: fd %d offset %ld buffer %lx size %d Opcode %d\n", + first_ce->myaiocb.aio_fildes, + first_ce->myaiocb.aio_offset, + (long)(first_ce->myaiocb.aio_buf), + first_ce->myaiocb.aio_nbytes, + first_ce->myaiocb.aio_lio_opcode +#endif + ); + } + if(retval > 0) + { +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + *ubuffer=(char *)first_ce->myaiocb.aio_buf; +#else + *ubuffer=(char *)first_ce->myaiocb64.aio_buf; +#endif +#else + *ubuffer=(char *)first_ce->myaiocb.aio_buf; +#endif + }else + *ubuffer=(char *)0; + first_ce->direct=1; /* do not delete the buffer */ + takeoff_cache(gc,first_ce); + putoninuse(gc,first_ce); + } + return((int)retval); +} + +/************************************************************************ + * The caller is now finished with the data that was provided so + * the library is now free to return the memory to the pool for later + * reuse. + ************************************************************************/ +void +async_release(gc) +struct cache *gc; +{ + takeoffinuse(gc); +} + + +/************************************************************************ + * Put the buffer on the inuse list. When the user is finished with + * the buffer it will call back into async_release and the items on the + * inuse list will be deallocated. + ************************************************************************/ +void +putoninuse(gc,entry) +struct cache *gc; +struct cache_ent *entry; +{ + if(gc->inuse_head) + entry->forward=gc->inuse_head; + else + entry->forward=0; + gc->inuse_head=entry; +} + +/************************************************************************ + * This is called when the application is finished with the data that + * was provided. The memory may now be returned to the pool. + ************************************************************************/ +void +takeoffinuse(gc) +struct cache *gc; +{ + struct cache_ent *ce; + if(gc->inuse_head==0) + printf("Takeoffinuse error\n"); + ce=gc->inuse_head; + gc->inuse_head=gc->inuse_head->forward; + + if(gc->inuse_head !=0) + printf("Error in take off inuse\n"); + free((void*)(ce->real_address)); + free(ce); +} + +/************************************************************************* + * This routine is a generic async writer assist funtion. It takes + * the same calling parameters as write() but also extends the + * interface to include: + * + * offset ..... offset in the file. + * depth ..... How much read-ahead do you want. + * + *************************************************************************/ +size_t +async_write(gc,fd,buffer,size,offset,depth) +struct cache *gc; +long long fd,size; +char *buffer; +off64_t offset; +long long depth; +{ + struct cache_ent *ce; + size_t ret; + ce=allocate_write_buffer(gc,fd,offset,size,(long long)LIO_WRITE,depth,0LL,(char *)0,(char *)0); + ce->direct=0; /* not direct. Lib supplies buffer and must free it */ +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + mbcopy(buffer,(char *)(ce->myaiocb.aio_buf),(size_t)size); +#else + mbcopy(buffer,(char *)(ce->myaiocb64.aio_buf),(size_t)size); +#endif +#else + mbcopy(buffer,(char *)(ce->myaiocb.aio_buf),(size_t)size); +#endif + async_put_on_write_queue(gc,ce); + /* + printf("asw: fd %d offset %lld, size %d\n",ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); + */ + +again: +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ret=aio_write(&ce->myaiocb); +#else + ret=aio_write64(&ce->myaiocb64); +#endif +#else + ret=aio_write(&ce->myaiocb); +#endif + if(ret==-1) + { + if(errno==EAGAIN) + { + async_wait_for_write(gc); + goto again; + } + if(errno==0) + { + /* Compensate for bug in async library */ + async_wait_for_write(gc); + goto again; + } + else + { + printf("Error in aio_write: ret %d errno %d count %lld\n",ret,errno,gc->w_count); + /* + printf("aio_write_no_copy: fd %d buffer %x offset %lld size %d\n", + ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_buf, + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); + */ + exit(177); + } + } + return((ssize_t)size); +} + +/************************************************************************* + * Allocate a write aiocb and write buffer of the size specified. Also + * put some extra buffer padding so that VX_DIRECT can do its job when + * needed. + *************************************************************************/ + +struct cache_ent * +allocate_write_buffer(gc,fd,offset,size,op,w_depth,direct,buffer,free_addr) +struct cache *gc; +long long fd,size,op; +off64_t offset; +long long w_depth; +long long direct; +char *buffer,*free_addr; +{ + struct cache_ent *ce; + long temp; + if(fd==0LL) + { + printf("Setting up write buffer insane\n"); + exit(178); + } + if(gc->w_count > w_depth) + async_wait_for_write(gc); + ce=(struct cache_ent *)malloc((size_t)sizeof(struct cache_ent)); + if(ce == (struct cache_ent *)0) + { + printf("Malloc failed 1\n"); + exit(179); + } + bzero(ce,sizeof(struct cache_ent)); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ce->myaiocb.aio_fildes=(int)fd; + ce->myaiocb.aio_offset=(off64_t)offset; + if(!direct) + { + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb.aio_buf=(volatile void *)temp; + }else + { + ce->myaiocb.aio_buf=(volatile void *)buffer; + ce->real_address=(char *)free_addr; + } + if(ce->myaiocb.aio_buf == 0) +#else + ce->myaiocb64.aio_fildes=(int)fd; + ce->myaiocb64.aio_offset=(off64_t)offset; + if(!direct) + { + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb64.aio_buf=(volatile void *)temp; + } + else + { + ce->myaiocb64.aio_buf=(volatile void *)buffer; + ce->real_address=(char *)free_addr; + } + if(ce->myaiocb64.aio_buf == 0) +#endif +#else + ce->myaiocb.aio_fildes=(int)fd; + ce->myaiocb.aio_offset=(off_t)offset; + if(!direct) + { + ce->real_address = (char *)malloc((size_t)(size+page_size)); + temp=(long)ce->real_address; + temp = (temp+page_size) & ~(page_size-1); + ce->myaiocb.aio_buf=(volatile void *)temp; + } + else + { + ce->myaiocb.aio_buf=(volatile void *)buffer; + ce->real_address=(char *)free_addr; + } + if(ce->myaiocb.aio_buf == 0) +#endif + { + printf("Malloc failed 2\n"); + exit(180); + } +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ce->myaiocb.aio_reqprio=0; + ce->myaiocb.aio_nbytes=(size_t)size; + ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb.aio_lio_opcode=(int)op; +#else + ce->myaiocb64.aio_reqprio=0; + ce->myaiocb64.aio_nbytes=(size_t)size; + ce->myaiocb64.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb64.aio_lio_opcode=(int)op; +#endif +#else + ce->myaiocb.aio_reqprio=0; + ce->myaiocb.aio_nbytes=(size_t)size; + ce->myaiocb.aio_sigevent.sigev_notify=SIGEV_NONE; + ce->myaiocb.aio_lio_opcode=(int)op; +#endif + ce->fd=(int)fd; + return(ce); +} + +/************************************************************************* + * Put it on the outbound queue. + *************************************************************************/ + +void +async_put_on_write_queue(gc,ce) +struct cache *gc; +struct cache_ent *ce; +{ + ce->forward=0; + ce->back=gc->w_tail; + if(gc->w_tail) + gc->w_tail->forward = ce; + gc->w_tail= ce; + if(!gc->w_head) + gc->w_head=ce; + gc->w_count++; + return; +} + +/************************************************************************* + * Cleanup all outstanding writes + *************************************************************************/ +void +async_write_finish(gc) +struct cache *gc; +{ + while(gc->w_head) + { + /*printf("async_write_finish: Waiting for buffer %x to finish\n",gc->w_head->myaiocb64.aio_buf);*/ + async_wait_for_write(gc); + } +} + +/************************************************************************* + * Wait for an I/O to finish + *************************************************************************/ + +void +async_wait_for_write(gc) +struct cache *gc; +{ + struct cache_ent *ce; + size_t ret,retval; + if(gc->w_head==0) + return; + ce=gc->w_head; + gc->w_head=ce->forward; + gc->w_count--; + ce->forward=0; + if(ce==gc->w_tail) + gc->w_tail=0; + /*printf("Wait for buffer %x offset %lld size %d to finish\n", + ce->myaiocb64.aio_buf, + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); + printf("write count %lld \n",gc->w_count); + */ +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#else + while((ret=aio_error64(&ce->myaiocb64))== EINPROGRESS) + { + async_suspend(ce); + } +#endif +#else + while((ret=aio_error(&ce->myaiocb))== EINPROGRESS) + { + async_suspend(ce); + } +#endif + if(ret) + { + printf("aio_error 5: ret %d %d\n",ret,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("fd %d offset %lld size %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + ce->myaiocb.aio_nbytes); +#else + printf("fd %d offset %lld size %d\n", + ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); +#endif +#else + printf("fd %d offset %lld size %d\n", + ce->myaiocb.aio_fildes, + ce->myaiocb.aio_offset, + ce->myaiocb.aio_nbytes); +#endif + exit(181); + } + +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + retval=aio_return(&ce->myaiocb); +#else +#if defined(__CrayX1__) + retval=aio_return64((aiocb64_t *)&ce->myaiocb64); +#else + retval=aio_return64((struct aiocb64 *)&ce->myaiocb64); +#endif + +#endif +#else + retval=aio_return(&ce->myaiocb); +#endif + if((int)retval < 0) + { + printf("aio_return error: %d\n",errno); + } + + if(!ce->direct) + { + /* printf("Freeing buffer %x\n",ce->real_address);*/ + free((void *)(ce->real_address)); + free((void *)ce); + } + +} + +/************************************************************************* + * This routine is a generic async writer assist funtion. It takes + * the same calling parameters as write() but also extends the + * interface to include: + * + * offset ..... offset in the file. + * depth ..... How much read-ahead do you want. + * free_addr .. address of memory to free after write is completed. + * + *************************************************************************/ +size_t +async_write_no_copy(gc,fd,buffer,size,offset,depth,free_addr) +struct cache *gc; +long long fd,size; +char *buffer; +off64_t offset; +long long depth; +char *free_addr; +{ + struct cache_ent *ce; + size_t ret; + long long direct = 1; + ce=allocate_write_buffer(gc,fd,offset,size,(long long)LIO_WRITE,depth,direct,buffer,free_addr); + ce->direct=0; /* have library de-allocate the buffer */ + async_put_on_write_queue(gc,ce); + /* + printf("awnc: fd %d offset %lld, size %d\n",ce->myaiocb64.aio_fildes, + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); + */ + +again: +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + ret=aio_write(&ce->myaiocb); +#else + ret=aio_write64(&ce->myaiocb64); +#endif +#else + ret=aio_write(&ce->myaiocb); +#endif + if(ret==-1) + { + if(errno==EAGAIN) + { + async_wait_for_write(gc); + goto again; + } + if(errno==0) + { + /* Compensate for bug in async library */ + async_wait_for_write(gc); + goto again; + } + else + { + printf("Error in aio_write: ret %d errno %d\n",ret,errno); +#ifdef _LARGEFILE64_SOURCE +#ifdef __LP64__ + printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", + ce->myaiocb.aio_fildes, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_offset, + ce->myaiocb.aio_nbytes); +#else + printf("aio_write_no_copy: fd %d buffer %lx offset %lld size %d\n", + ce->myaiocb64.aio_fildes, + (long)(ce->myaiocb64.aio_buf), + ce->myaiocb64.aio_offset, + ce->myaiocb64.aio_nbytes); +#endif +#else + printf("aio_write_no_copy: fd %d buffer %lx offset %ld size %d\n", + ce->myaiocb.aio_fildes, + (long)(ce->myaiocb.aio_buf), + ce->myaiocb.aio_offset, + ce->myaiocb.aio_nbytes); +#endif + exit(182); + } + } + else + { + return((ssize_t)size); + } +} + +void mbcopy(source, dest, len) +char *source,*dest; +size_t len; +{ + int i; + for(i=0;i +#endif +#include +#include +#include +#if defined(__AIX__) || defined(__FreeBSD__) || defined(__DragonFly__) +#include +#else +#include +#endif + +#if defined(OSV5) || defined(linux) || defined (__FreeBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__APPLE__) || defined(__DragonFly__) +#include +#endif + +#if defined(linux) || defined(__DragonFly__) || defined(macosx) +#include +#include +#endif + +#if (defined(solaris) && defined( __LP64__ )) || defined(__s390x__) || defined(FreeBSD) +/* If we are building for 64-bit Solaris, all functions that return pointers + * must be declared before they are used; otherwise the compiler will assume + * that they return ints and the top 32 bits of the pointer will be lost, + * causing segmentation faults. The following includes take care of this. + * It should be safe to add these for all other OSs too, but we're only + * doing it for Solaris now in case another OS turns out to be a special case. + */ +#include +#include +#include +#include +#include +#endif +/* Little Endian */ +#define ENDIAN_1 1 +/* Big Endian */ +#define ENDIAN_2 2 +/* Middle Endian */ +#define ENDIAN_3 3 +/* Middle Endian */ +#define ENDIAN_4 4 + +int junk, *junkp; + + +#ifdef HAVE_ANSIC_C +/************************************************************************/ +/* Here is the API... Enjoy */ +/************************************************************************/ +/* Create worksheet */ +int create_xls(char *); +/* Args: Filename */ +/* */ +/* Close worksheet */ +void close_xls(int); +/* Args: file descriptor */ +/* */ +/* Put a 16 bit integer in worksheet */ +void do_int(int,int,int,int); +/* Args: file descriptor, */ +/* value, */ +/* row, */ +/* column */ + +/* Put a double in 8 byte float */ +void do_float(int,double,int,int); +/* Args: file descriptor, */ +/* value, */ +/* row, */ +/* column */ +/* Put a string in worksheet */ +void do_label(int,char *,int,int); +/* Args: file descriptor, */ +/* string, */ +/* row, */ +/* column */ +/************************************************************************/ + +char libbif_version[] = "Libbif Version $Revision$"; +void do_eof(int ); /* Used internally */ +void do_header(int ); /* Used internally */ +int endian(void); +#endif + +#define BOF 0x9 +#define INTEGER 0x2 +#define FLOAT 0x3 +#define LABEL 0x4 +#define EXCEL_VERS 0x2 +#define WORKSHEET 0x10 + +struct bof_record{ /* Beginning of file */ + char hi_opcode; + char lo_opcode; + char hi_length; + char lo_length; + char hi_version; /* Excel version */ + char lo_version; + char hi_filetype; + char lo_filetype; + }; +struct int_record { + char hi_opcode; /* Type 2 of record */ + char lo_opcode; + char hi_length; + char lo_length; + char hi_row; + char lo_row; + char hi_column; + char lo_column; + char rgbhi; + char rgbmed; + char rgblo; + char hi_data; + char lo_data; + }; +struct label_record { + char hi_opcode; /* Type 4 of record */ + char lo_opcode; + char hi_length; + char lo_length; + char hi_row; + char lo_row; + char hi_column; + char lo_column; + char rgbhi; + char rgbmed; + char rgblo; + char string_length; + char str_array[256]; + }; +struct float_record { /* Type 3 record */ + char hi_opcode; + char lo_opcode; + char hi_length; + char lo_length; + char hi_row; + char lo_row; + char hi_column; + char lo_column; + char rgbhi; + char rgbmed; + char rgblo; + double data; + }; +/* + * Write the EOF and close the file + */ +#ifdef HAVE_ANSIC_C +void +close_xls(int fd) +{ +#else +close_xls(fd) +int fd; +{ +#endif + do_eof(fd); + close(fd); +} + +/* + * Create xls worksheet. Create file and put the BOF record in it. + */ +#ifdef HAVE_ANSIC_C +int +create_xls(char *name) +{ +#else +create_xls(name) +char *name; +{ +#endif + int fd; + unlink(name); +#ifdef Windows + fd=open(name,O_BINARY|O_CREAT|O_RDWR,0666); +#else + fd=open(name,O_CREAT|O_RDWR,0666); +#endif + if(fd<0) + { + printf("Error opening file %s\n",name); + exit(-1); + } + do_header(fd); + return(fd); +} + +#ifdef HAVE_ANSIC_C +void +do_header(int fd) /* Stick the BOF at the beginning of the file */ +{ +#else +do_header(fd) +int fd; +{ +#endif + struct bof_record bof; + bof.hi_opcode=BOF; + bof.lo_opcode = 0x0; + bof.hi_length=0x4; + bof.lo_length=0x0; + bof.hi_version=EXCEL_VERS; + bof.lo_version=0x0; + bof.hi_filetype=WORKSHEET; + bof.lo_filetype=0x0; + junk=write(fd,&bof,sizeof(struct bof_record)); +} + +/* + * Put an integer (16 bit) in the worksheet + */ +#ifdef HAVE_ANSIC_C +void +do_int(int fd,int val, int row, int column) +{ +#else +do_int(fd,val,row,column) +int fd,val,row,column; +{ +#endif + struct int_record intrec; + short s_row,s_column; + s_row=(short)row; + s_column=(short)column; + intrec.hi_opcode=INTEGER; + intrec.lo_opcode=0x00; + intrec.hi_length=0x09; + intrec.lo_length=0x00; + intrec.rgbhi=0x0; + intrec.rgbmed=0x0; + intrec.rgblo=0x0; + intrec.hi_row=(char)s_row&0xff; + intrec.lo_row=(char)(s_row>>8)&0xff; + intrec.hi_column=(char)(s_column&0xff); + intrec.lo_column=(char)(s_column>>8)&0xff; + intrec.hi_data=(val & 0xff); + intrec.lo_data=(val & 0xff00)>>8; + junk=write(fd,&intrec,13); +} + +/* Note: This routine converts Big Endian to Little Endian + * and writes the record out. + */ + +/* + * Put a double in the worksheet as 8 byte float in IEEE format. + */ +#ifdef HAVE_ANSIC_C +void +do_float(int fd, double value, int row, int column) +{ +#else +do_float(fd, value, row, column) +int fd; +double value; +int row,column; +{ +#endif + struct float_record floatrec; + short s_row,s_column; + unsigned char *sptr,*dptr; + s_row=(short)row; + s_column=(short)column; + floatrec.hi_opcode=FLOAT; + floatrec.lo_opcode=0x00; + floatrec.hi_length=0xf; + floatrec.lo_length=0x00; + floatrec.rgbhi=0x0; + floatrec.rgbmed=0x0; + floatrec.rgblo=0x0; + floatrec.hi_row=(char)(s_row&0xff); + floatrec.lo_row=(char)((s_row>>8)&0xff); + floatrec.hi_column=(char)(s_column&0xff); + floatrec.lo_column=(char)((s_column>>8)&0xff); + sptr =(unsigned char *) &value; + dptr =(unsigned char *) &floatrec.data; + + if(endian()==ENDIAN_2) /* Big Endian */ + { + dptr[0]=sptr[7]; /* Convert to Little Endian */ + dptr[1]=sptr[6]; + dptr[2]=sptr[5]; + dptr[3]=sptr[4]; + dptr[4]=sptr[3]; + dptr[5]=sptr[2]; + dptr[6]=sptr[1]; + dptr[7]=sptr[0]; + } + if(endian()==ENDIAN_3) /* Middle Endian */ + { + dptr[0]=sptr[4]; /* 16 bit swapped ARM */ + dptr[1]=sptr[5]; + dptr[2]=sptr[6]; + dptr[3]=sptr[7]; + dptr[4]=sptr[0]; + dptr[5]=sptr[1]; + dptr[6]=sptr[2]; + dptr[7]=sptr[3]; + } + + if(endian()==ENDIAN_1) /* Little Endian */ + { + dptr[0]=sptr[0]; /* Do not convert to Little Endian */ + dptr[1]=sptr[1]; + dptr[2]=sptr[2]; + dptr[3]=sptr[3]; + dptr[4]=sptr[4]; + dptr[5]=sptr[5]; + dptr[6]=sptr[6]; + dptr[7]=sptr[7]; + } + if(endian()==-1) /* Unsupported architecture */ + { + dptr[0]=0; + dptr[1]=0; + dptr[2]=0; + dptr[3]=0; + dptr[4]=0; + dptr[5]=0; + dptr[6]=0; + dptr[7]=0; + printf("Excel output not supported on this architecture.\n"); + } + junk=write(fd,&floatrec,11); /* Don't write floatrec. Padding problems */ + junk=write(fd,&floatrec.data,8); /* Write value seperately */ +} + +/* + * Put a string as a label in the worksheet. + */ +#ifdef HAVE_ANSIC_C +void +do_label(int fd, char *string, int row, int column) +{ +#else +do_label(fd, string, row, column) +int fd; +char *string; +int row,column; +{ +#endif + struct label_record labelrec; + short s_row,s_column; + int i; + for(i=0;i<255;i++) + labelrec.str_array[i]=0; + s_row=(short)row; + s_column=(short)column; + i=strlen(string); + labelrec.hi_opcode=LABEL; + labelrec.lo_opcode=0x00; + labelrec.hi_length=0x08; /* 264 total bytes */ + labelrec.lo_length=0x01; + labelrec.rgblo=0x0; + labelrec.rgbmed=0x0; + labelrec.rgbhi=0x0; + labelrec.hi_row=(char)(s_row&0xff); + labelrec.lo_row=(char)((s_row>>8)&0xff); + labelrec.hi_column=(char)(s_column&0xff); + labelrec.lo_column=(char)((s_column>>8)&0xff); + labelrec.string_length=i; + if(i > 255) /* If too long then terminate it early */ + string[254]=0; + i=strlen(string); + strcpy(labelrec.str_array,string); + + junk=write(fd,&labelrec,sizeof(struct label_record)); + +} + +/* + * Write the EOF in the file + */ +#ifdef HAVE_ANSIC_C +void +do_eof(int fd) +{ +#else +do_eof(fd) +int fd; +{ +#endif + char buf[]={0x0a,0x00,0x00,0x00}; + junk=write(fd,buf,4); +} + +/* + * Routine to determine the Endian-ness of the system. This + * is needed for Iozone to convert doubles (floats) into + * Little-endian format. This is needed for Excel to be + * able to interpret the file + */ +int +endian(void) +{ + long long foo = 0x0102030405060708LL; + long foo1 = 0x012345678; + unsigned char *c,c1,c2,c3,c4,c5,c6,c7,c8; + c=(unsigned char *)&foo; + c1=*c++; + c2=*c++; + c3=*c++; + c4=*c++; + c5=*c++; + c6=*c++; + c7=*c++; + c8=*c; + + /*--------------------------------------------------------------*/ + /* printf("%x %x %x %x %x %x %x %x\n",c1,c2,c3,c4,c5,c6,c7,c8); */ + /*--------------------------------------------------------------*/ + + /* Little Endian format ? ( Intel ) */ + if( (c1==0x08) && (c2==0x07) && (c3==0x06) && (c4==0x05) && + (c5==0x04) && (c6==0x03) && (c7==0x02) && (c8==0x01) ) + return(ENDIAN_1); + /* Big Endian format ? ( Sparc, Risc... */ + if( (c1==0x01) && (c2==0x02) && (c3==0x03) && (c4==0x04) && + (c5==0x05) && (c6==0x06) && (c7==0x07) && (c8==0x08) ) + return(ENDIAN_2); + /* Middle Endian format ? ( ARM ... ) */ + if( (c1==0x04) && (c2==0x03) && (c3==0x02) && (c4==0x01) && + (c5==0x08) && (c6==0x07) && (c7==0x06) && (c8==0x05) ) + return(ENDIAN_3); + c=(unsigned char *)&foo1; + c1=*c++; + c2=*c++; + c3=*c++; + c4=*c++; + /* Another middle endian format ? ( PDP-11 ... ) */ + if( (c1==0x34) && (c2==0x12) && (c3==0x78) && (c4==0x56)) + return(ENDIAN_4); + + return(-1); +} diff --git a/src/components/appio/tests/iozone/makefile b/src/components/appio/tests/iozone/makefile new file mode 100644 index 0000000..22e088b --- /dev/null +++ b/src/components/appio/tests/iozone/makefile @@ -0,0 +1,1461 @@ +# +# Version $Revision$ +# +# The makefile for building all versions of iozone for all supported +# platforms +# +# Supports: hpux, hpux_no_ansi, hpux-10.1, hpux_no_ansi-10.1, +# sppux, sppux-10.1, ghpux, sppux, +# convex, FreeBSD, OpenBSD, OSFV3, OSFV4, OSFV5, SCO +# SCO_Unixware_gcc,NetBSD,TRU64, Mac OS X + +CC = cc +C89 = c89 +GCC = gcc +CCS = /usr/ccs/bin/cc +NACC = /opt/ansic/bin/cc +CFLAGS = +S10GCCFLAGS = -m64 +S10CCFLAGS = -m64 +FLAG64BIT = -m64 + +# If your Linux kernel supports preadv and pwritev system calls +# and you want iozone to use them, add -DHAVE_PREADV -DHAVE_PWRITEV +# to CFLAGS + +all: + @echo "" + @echo "You must specify the target. " + @echo " -> AIX (32bit) <-" + @echo " -> AIX-LF (32bit) <-" + @echo " -> AIX64 (32bit) <-" + @echo " -> AIX64-LF (32bit) <-" + @echo " -> bsdi (32bit) <-" + @echo " -> convex (32bit) <-" + @echo " -> CrayX1 (32bit) <-" + @echo " -> dragonfly (32bit) <-" + @echo " -> freebsd (32bit) <-" + @echo " -> generic (32bit) <-" + @echo " -> ghpux (32bit) <-" + @echo " -> hpuxs-11.0 (simple) (32bit) <-" + @echo " -> hpux-11.0w (64bit) <-" + @echo " -> hpuxs-11.0w (64bit) <-" + @echo " -> hpux-11.0 (32bit) <-" + @echo " -> hpux-10.1 (32bit) <-" + @echo " -> hpux-10.20 (32bit) <-" + @echo " -> hpux (32bit) <-" + @echo " -> hpux_no_ansi (32bit) <-" + @echo " -> hpux_no_ansi-10.1 (32bit) <-" + @echo " -> IRIX (32bit) <-" + @echo " -> IRIX64 (64bit) <-" + @echo " -> linux (32bit) <-" + @echo " -> linux-arm (32bit) <-" + @echo " -> linux-AMD64 (64bit) <-" + @echo " -> linux-ia64 (64bit) <-" + @echo " -> linux-powerpc (32bit) <-" + @echo " -> linux-powerpc64 (64bit) <-" + @echo " -> linux-sparc (32bit) <-" + @echo " -> macosx (32bit) <-" + @echo " -> netbsd (32bit) <-" + @echo " -> openbsd (32bit) <-" + @echo " -> openbsd-threads (32bit) <-" + @echo " -> OSFV3 (64bit) <-" + @echo " -> OSFV4 (64bit) <-" + @echo " -> OSFV5 (64bit) <-" + @echo " -> linux-S390 (32bit) <-" + @echo " -> linux-S390X (64bit) <-" + @echo " -> SCO (32bit) <-" + @echo " -> SCO_Unixware_gcc (32bit) <-" + @echo " -> Solaris (32bit) <-" + @echo " -> Solaris-2.6 (32bit) <-" + @echo " -> Solaris7gcc (32bit) <-" + @echo " -> Solaris8-64 (64bit) <-" + @echo " -> Solaris8-64-VXFS (64bit) <-" + @echo " -> Solaris10 (32bit) <-" + @echo " -> Solaris10cc (64bit) <-" + @echo " -> Solaris10gcc (32bit) <-" + @echo " -> Solaris10gcc-64 (64bit) <-" + @echo " -> sppux (32bit) <-" + @echo " -> sppux-10.1 (32bit) <-" + @echo " -> sppux_no_ansi-10.1 (32bit) <-" + @echo " -> SUA (32bit) <-" + @echo " -> TRU64 (64bit) <-" + @echo " -> UWIN (32bit) <-" + @echo " -> Windows (95/98/NT) (32bit) <-" + @echo "" + +clean: + rm -f *.o iozone fileop pit_server + +rpm: + cp ../../iozone*.tar /usr/src/red*/SO* + rpmbuild -ba spec.in + + +# +# Turn on the optimizer, largefiles, Posix async I/O and threads. +# +hpux-11.0: iozone_hpux-11.0.o libasync.o libbif.o + $(CC) +O3 +Oparallel $(LDFLAGS) iozone_hpux-11.0.o libasync.o \ + libbif.o -lpthread -lrt -o iozone + +# +# Turn on wide-mode, the optimizer, largefiles, Posix async I/O and threads. +# +hpux-11.0w: iozone_hpux-11.0w.o libasyncw.o libbif.o + $(CC) +DD64 +O3 $(LDFLAGS) iozone_hpux-11.0w.o libasyncw.o \ + libbif.o -lpthread -lrt -o iozone + + +# +# Simple build with largefiles, Posix threads and Posix async I/O +# +hpuxs-11.0: iozone_hpuxs-11.0.o libasync.o libbif.o fileop_hpuxs-11.0.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_hpuxs-11.0.o \ + libasync.o libbif.o -lpthread -lrt -o iozone + $(CC) -O $(LDFLAGS) fileop_hpuxs-11.0.o -o fileop + $(CC) -O $(LDFLAGS) pit_server.o -o pit_server + +# +# Simple build with wide-mode, largefiles, Posix threads and Posix async I/O +# +hpuxs-11.0w: iozone_hpuxs-11.0w.o libasyncw.o libbif.o + $(CC) -O +DD64 $(LDFLAGS) iozone_hpuxs-11.0w.o \ + libasyncw.o libbif.o -lpthread -lrt -o iozone + +# +# Simple 10.1 build with no threads, no largefiles, no async I/O +# +hpux-10.1: iozone_hpux-10.1.o libbif.o + $(C89) +e -O $(LDFLAGS) iozone_hpux-10.1.o libbif.o -o iozone + +hpux-10.20: iozone_hpux-10.20.o libbif.o + $(C89) +e -O $(LDFLAGS) iozone_hpux-10.20.o libbif.o -o iozone + +# +# Simple generic HP build with no threads, no largefiles, no async I/O +# +hpux: iozone_hpux.o + $(C89) +e -O $(LDFLAGS) iozone_hpux.o libbif.o -o iozone + +# +# GNU HP build with no threads, no largefiles, no async I/O +# +ghpux: iozone_ghpux.o libbif.o + $(GCC) -O $(LDFLAGS) iozone_ghpux.o libbif.o -static -o iozone + +# +# GNU Generic build with no threads, no largefiles, no async I/O +# +generic: iozone_generic.o libbif.o + $(CC) -O $(LDFLAGS) iozone_generic.o libbif.o -o iozone + +# +# No ansii 'C' compiler HP build with no threads, no largefiles, no async I/O +# +hpux_no_ansi-10.1: iozone_hpux_no-10.1.o libbif.o + $(NACC) -O $(LDFLAGS) iozone_hpux_no-10.1.o libbif.o -o iozone + +# +# No ansii 'C' compiler HP build with no threads, no largefiles, no async I/O +# +hpux_no_ansi: iozone_hpux_no.o libbif.o + $(C89) -O $(LDFLAGS) iozone_hpux_no.o libbif.o -o iozone + +# +# GNU 'C' compiler Linux build with threads, largefiles, async I/O +# +linux: iozone_linux.o libasync.o libbif.o fileop_linux.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux.o libasync.o libbif.o -lpthread \ + -lrt -o iozone + $(CC) -O3 -Dlinux fileop_linux.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build for powerpc chip with threads, largefiles, async I/O +# +linux-powerpc: iozone_linux-powerpc.o libbif.o libasync.o fileop_linux-ppc.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux-powerpc.o libasync.o \ + libbif.o -lpthread -lrt -o iozone + $(CC) -O3 -Dlinux fileop_linux-ppc.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server +# +# GNU 'C' compiler Linux build for sparc chip with threads, largefiles, async I/O +# +linux-sparc: iozone_linux-sparc.o libbif.o libasync.o fileop_linux.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux-sparc.o libasync.o libbif.o \ + -lpthread -lrt -o iozone + $(CC) -O3 -Dlinux fileop_linux.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build with threads, largefiles, async I/O +# +linux-ia64: iozone_linux-ia64.o libbif.o libasync.o fileop_linux-ia64.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux-ia64.o libbif.o libasync.o \ + -lrt -lpthread -o iozone + $(CC) -O3 -Dlinux fileop_linux-ia64.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build for powerpc chip with threads, largefiles, async I/O +# +linux-powerpc64: iozone_linux-powerpc64.o libbif.o libasync.o fileop_linux-ppc64.o pit_server-linux-powerpc64.o + $(CC) -O3 -Dunix -DHAVE_ANSIC_C -DSHARED_MEM -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -Dlinux \ + iozone_linux-powerpc64.o libasync.o libbif.o -lpthread \ + -lrt $(FLAG64BIT) -o iozone + $(CC) -O3 -Dlinux fileop_linux-ppc64.o $(FLAG64BIT) -o fileop + $(CC) -O3 -Dlinux pit_server-linux-powerpc64.o $(FLAG64BIT) -o pit_server + +# +# GNU 'C' compiler Linux build with threads, largefiles, async I/O +# +linux-arm: iozone_linux-arm.o libbif.o libasync.o fileop_linux-arm.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux-arm.o libbif.o libasync.o \ + -lrt -lpthread -o iozone + $(CC) -O3 -Dlinux fileop_linux-arm.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build with threads, largefiles, async I/O +# +linux-AMD64: iozone_linux-AMD64.o libbif.o libasync.o fileop_linux-AMD64.o pit_server.o + $(CC) -O3 $(LDFLAGS) iozone_linux-AMD64.o libbif.o libasync.o \ + -lrt -lpthread -o iozone + $(CC) -O3 -Dlinux fileop_linux-AMD64.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build with S/390, threads, largfiles, async I/O +# +linux-S390: iozone_linux-s390.o libbif.o libasync.o fileop_linux-s390.o pit_server.o + $(CC) -O2 $(LDFLAGS) -lpthread -lrt iozone_linux-s390.o \ + libbif.o libasync.o -o iozone + $(CC) -O3 -Dlinux fileop_linux-s390.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + +# +# GNU 'C' compiler Linux build with S/390, threads, largfiles, async I/O +# +linux-S390X: iozone_linux-s390x.o libbif.o libasync.o fileop_linux-s390x.o pit_server.o + $(CC) -O2 $(LDFLAGS) -lpthread -lrt iozone_linux-s390x.o \ + libbif.o libasync.o -o iozone + $(CC) -O3 -Dlinux fileop_linux-s390x.o -o fileop + $(CC) -O3 -Dlinux pit_server.o -o pit_server + + +# +# AIX +# I would have built with ASYNC_IO but the AIX machine does not have +# POSIX 1003.1b compliant async I/O header files. Has threads, no +# largefile support. +# +AIX: iozone_AIX.o libbif.o fileop_AIX.o + $(CC) -O $(LDFLAGS) iozone_AIX.o libbif.o \ + -lpthreads -o iozone + $(CC) -O -Dlinux fileop_AIX.o -o fileop + +# +# AIX-LF +# I would have built with ASYNC_IO but the AIX machine does not have +# POSIX 1003.1b compliant async I/O header files. Has threads, and +# largefile support. +# +AIX-LF: iozone_AIX-LF.o libbif.o fileop_AIX-LF.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_AIX-LF.o libbif.o \ + -lpthreads -o iozone + $(CC) -O fileop_AIX-LF.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# AIX64 +# This version uses the 64 bit interfaces and is compiled as 64 bit code. +# Has threads, async I/O but no largefile support. +# +AIX64: iozone_AIX64.o libbif.o fileop_AIX64.o libasync.o pit_server.o + $(GCC) -maix64 -O3 $(LDFLAGS) iozone_AIX64.o libasync.o \ + libbif.o -lpthreads -o iozone + $(GCC) -maix64 -O3 $(LDFLAGS) -Dlinux fileop_AIX64.o -o fileop + $(GCC) -maix32 -O3 $(LDFLAGS) pit_server.o -o pit_server + +# +# AIX64-LF +# This version uses the 64 bit interfaces and is compiled as 64 bit code. +# Has threads, async I/O and largefile support. +# +AIX64-LF: iozone_AIX64-LF.o libbif.o fileop_AIX64-LF.o libasync.o pit_server.o + $(GCC) -maix64 -O3 $(LDFLAGS) iozone_AIX64-LF.o libasync.o \ + libbif.o -lpthreads -o iozone + $(GCC) -maix64 -O3 $(LDFLAGS) -Dlinux fileop_AIX64-LF.o -o fileop + $(GCC) -maix32 -O3 $(LDFLAGS) pit_server.o -o pit_server + +# +# IRIX 32 bit build with threads, largefiles, async I/O +# This would like to be in 64 bit mode but it hangs whenever in 64 bit mode. +# This version uses the 64 bit interfaces but is compiled as 32 bit code +# +IRIX64: iozone_IRIX64.o libasyncw.o libbif.o + $(CC) -32 -O $(LDFLAGS) iozone_IRIX64.o libbif.o \ + -lpthread libasyncw.o -o iozone + +# +# IRIX 32 bit build with threads, No largefiles, and async I/O +# This version uses the 32 bit interfaces and is compiled as 32 bit code +# +IRIX: iozone_IRIX.o libasync.o libbif.o + $(CC) -O -32 $(LDFLAGS) iozone_IRIX.o libbif.o -lpthread \ + libasync.o -o iozone + +# +# CrayX1: 32 bit build with threads, No largefiles, and async I/O +# This version uses the 32 bit interfaces and is compiled as 32 bit code +# +CrayX1: iozone_CrayX1.o libasync.o libbif.o + $(CC) -O $(LDFLAGS) iozone_CrayX1.o libbif.o \ + -lpthread libasyncw.o -o iozone + +# +# SPP-UX 32 bit build with threads, No largefiles, and No async I/O, +# pread extensions +# For older SPP-UX machines with 9.05 compatibility +# +sppux: iozone_sppux.o libbif.o + $(NACC) -O $(LDFLAGS) iozone_sppux.o libbif.o \ + -Wl,+parallel -lcnx_syscall -lpthread -lail -o iozone + +# +# SPP-UX 32 bit build with threads, No largefiles, and No async I/O, pread +# extensions +# For Newer SPP-UX machines with 10.01 compatibility +# +sppux-10.1: iozone_sppux-10.1.o libbif.o + $(NACC) -O $(LDFLAGS) iozone_sppux-10.1.o libbif.o \ + -lcnx_syscall -Wl,+parallel -lpthread -lail -o iozone + +# +# SPP-UX 32 bit build with threads, No largefiles, and No async I/O, pread +# extensions +# For Newer SPP-UX machines with 10.01 compatibility, and no ansi 'C' compiler. +# +sppux_no_ansi-10.1: iozone_sppux_no-10.1.o libbif.o + $(CCS) -O $(LDFLAGS) iozone_sppux_no-10.1.o libbif.o \ + -Wl,+parallel -lcnx_syscall \ + -lpthread -lail -o iozone + +# +# Convex 'C' series 32 bit build with No threads, No largefiles, and No async I/O +# +convex: iozone_convex.o libbif.o + $(CC) -O $(LDFLAGS)iozone_convex.o libbif.o -o iozone + +# +# Solaris 32 bit build with threads, largefiles, and async I/O +# +Solaris: iozone_solaris.o libasync.o libbif.o fileop_Solaris.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_solaris.o libasync.o libbif.o \ + -lthread -lpthread -lposix4 -lnsl -laio -lsocket \ + -o iozone + $(CC) -O fileop_Solaris.o -o fileop + $(CC) -O pit_server.o -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o pit_server + +# +# Solaris 32 bit build with threads, largefiles, and async I/O +# +Solaris7gcc: iozone_solaris7gcc.o libasync7.o libbif7.o + $(GCC) -O $(LDFLAGS) iozone_solaris7gcc.o libasync7.o libbif7.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone +# +# Solaris 32 bit build with threads, largefiles, and async I/O +# +Solaris10: iozone_solaris10.o libasync10.o libbif10.o fileop_Solaris10.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_solaris10.o libasync10.o libbif10.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + $(CC) -O fileop_Solaris10.o -o fileop + $(CC) -O pit_server.o -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o pit_server + +# +# Solaris 32 bit build with threads, largefiles, and async I/O +# +Solaris10cc: iozone_solaris10cc.o libasync10cc.o libbif10cc.o fileop_Solaris10cc.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_solaris10cc.o libasync10cc.o libbif10cc.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + $(CC) -O fileop_Solaris10cc.o -o fileop + $(CC) -O pit_server.o -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o pit_server + +# +# Solaris 32 bit build with threads, largefiles, and async I/O +# +Solaris10gcc: iozone_solaris10gcc.o libasync10.o libbif10.o fileop_Solaris10gcc.o pit_server_solaris10gcc.o + $(GCC) -O $(LDFLAGS) iozone_solaris10gcc.o libasync10.o libbif10.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + $(GCC) -O fileop_Solaris10gcc.o -o fileop + $(GCC) -O pit_server_solaris10gcc.o -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o pit_server + +# +# Solaris 64 bit build with threads, largefiles, and async I/O +# +Solaris10gcc-64: iozone_solaris10gcc-64.o libasync10-64.o libbif10-64.o fileop_Solaris10gcc-64.o pit_server_solaris10gcc-64.o + $(GCC) -O $(LDFLAGS) $(S10GCCFLAGS) iozone_solaris10gcc-64.o libasync10-64.o libbif10-64.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + $(GCC) -O $(S10GCCFLAGS) fileop_Solaris10gcc-64.o -o fileop + $(GCC) -O $(S10GCCFLAGS) pit_server_solaris10gcc-64.o -lthread -lpthread -lposix4 \ + -lnsl -laio -lsocket -o pit_server + + +# +# Solaris 64 bit build with threads, largefiles, and async I/O +# +Solaris10cc-64: iozone_solaris10cc-64.o libasync10-64.o libbif10-64.o fileop_Solaris10cc-64.o pit_server.o + $(CC) -O $(LDFLAGS) $(S10CCFLAGS) iozone_solaris10cc-64.o libasync10-64.o libbif10-64.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + $(CC) -O $(S10CCFLAGS) fileop_Solaris10cc-64.o -o fileop + $(CC) -O $(S10CCFLAGS) pit_server.o -lthread -lpthread -lposix4 \ + -lnsl -laio -lsocket -o pit_server + + + +# +# Solaris 2.6 (32 bit) build with no threads, no largefiles, and no async I/O +# +Solaris-2.6: iozone_solaris-2.6.o libbif.o + $(CC) -O $(LDFLAGS) iozone_solaris-2.6.o libbif.o \ + -lnsl -laio -lsocket -o iozone + +# +# Solaris 64 bit build with threads, largefiles, and async I/O +# +Solaris8-64: iozone_solaris8-64.o libasync.o libbif.o + $(CC) $(LDFLAGS) -fast -xtarget=generic64 -v iozone_solaris8-64.o \ + libasync.o libbif.o -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + +# +# Solaris 64 bit build with threads, largefiles, async I/O, and Vxfs +# +Solaris8-64-VXFS: iozone_solaris8-64-VXFS.o libasync.o libbif.o + $(CC) $(LDFLAGS) -fast -xtarget=generic64 -v -I/opt/VRTSvxfs/include/ + iozone_solaris8-64-VXFS.o libasync.o libbif.o \ + -lthread -lpthread -lposix4 -lnsl -laio \ + -lsocket -o iozone + +# +# Windows build requires Cygwin development environment. You +# can get this from www.cygwin.com +# No largefiles, No async I/O +# +Windows: iozone_windows.o libbif.o fileop_windows.o pit_server_win.o + $(GCC) -O $(LDFLAGS) iozone_windows.o libbif.o -o iozone + $(GCC) -O $(LDFLAGS) fileop_windows.o -o fileop + $(GCC) -O $(LDFLAGS) pit_server_win.o -o pit_server + +# +# Windows build requires SUA development environment. You +# can get this from Microsoft +# No largefiles, No async I/O +# +SUA: iozone_sua.o libbif.o fileop_sua.o pit_server_sua.o + $(GCC) -O $(LDFLAGS) iozone_sua.o libbif.o -o iozone + $(GCC) -O $(LDFLAGS) fileop_sua.o -o fileop + $(GCC) -O $(LDFLAGS) pit_server_sua.o -o pit_server + +# +# Uwin build requires UWIN development environment. +# No threads, No largefiles, No async I/O +# +UWIN: iozone_uwin.o libbif.o + $(GCC) -O $(LDFLAGS) iozone_uwin.o libbif.o -o iozone + +# +# GNU C compiler BSD/OS build with threads, largefiles, no async I/O +# + +bsdi: iozone_bsdi.o libbif.o fileop_bsdi.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_bsdi.o libbif.o -o iozone + $(CC) -O fileop_bsdi.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# +# GNU C compiler FreeBSD build with no threads, no largefiles, no async I/O +# + +freebsd: iozone_freebsd.o libbif.o fileop_freebsd.o libasync.o pit_server.o + $(CC) $(LDFLAGS) iozone_freebsd.o libbif.o -lpthread libasync.o \ + -o iozone + $(CC) -O fileop_freebsd.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# +# GNU C compiler DragonFly build with no threads, no largefiles +# +dragonfly: iozone_dragonfly.o libbif.o fileop_dragonfly.o pit_server.o + $(CC) $(LDFLAGS) iozone_dragonfly.o libbif.o -o iozone + $(CC) -O fileop_dragonfly.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# +# GNU C compiler MacosX build with no threads, no largefiles, no async I/O +# + +macosx: iozone_macosx.o fileop_macosx.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_macosx.o libbif.o -o iozone + $(CC) -O $(LDFLAGS) fileop_macosx.o -o fileop + $(CC) -O $(LDFLAGS) pit_server.o -o pit_server +# +# +# GNU C compiler OpenBSD build with no threads, no largefiles, no async I/O +# + +openbsd: iozone_openbsd.o libbif.o fileop_openbsd.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_openbsd.o libbif.o -o iozone + $(CC) -O fileop_openbsd.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# +# GNU C compiler OpenBSD build with threads, no largefiles, no async I/O +# + +openbsd-threads: iozone_openbsd-threads.o libbif.o + $(CC) -O $(LDFLAGS) -pthread iozone_openbsd-threads.o \ + libbif.o -o iozone + +# +# GNU C compiler OSFV3 build +# Has threads and async I/O but no largefiles. +# + +OSFV3: iozone_OSFV3.o libbif.o libasync.o + $(CC) -O $(LDFLAGS) iozone_OSFV3.o libbif.o \ + -lpthreads libasync.o -laio -o iozone + +# +# GNU C compiler OSFV4 build +# Has threads and async I/O but no largefiles. +# + +OSFV4: iozone_OSFV4.o libbif.o libasync.o + $(CC) -O $(LDFLAGS) iozone_OSFV4.o libbif.o -lpthread \ + libasync.o -laio -o iozone + +# +# GNU C compiler OSFV5 build +# Has threads and async I/O but no largefiles. +# + +OSFV5: iozone_OSFV5.o libbif.o libasync.o + $(CC) -O $(LDFLAGS) iozone_OSFV5.o libbif.o -lpthread \ + libasync.o -laio -o iozone + +# +# GNU C compiler TRU64 build +# Has threads and async I/O but no largefiles. +# + +TRU64: iozone_TRU64.o libbif.o libasync.o + $(CC) -O $(LDFLAGS) iozone_TRU64.o libbif.o -lpthread \ + libasync.o -laio -o iozone + +# +# GNU Generic build with no threads, no largefiles, no async I/O +# for SCO +# Note: Be sure you have the latest patches for SCO's Openserver +# or you will get warnings about timer problems. +# + +SCO: iozone_SCO.o libbif.o + $(GCC) -O $(LDFLAGS) iozone_SCO.o -lsocket -s libbif.o -o iozone + + +# +# GNU build with threads, largefiles, async I/O +# for SCO Unixware 5 7.1.1 i386 x86at SCO UNIX SVR5 +# Note: Be sure you have the latest patches for SCO's Openserver +# or you will get warnings about timer problems. +# + +SCO_Unixware_gcc: iozone_SCO_Unixware_gcc.o libbif.o libasync.o + $(GCC) -O $(LDFLAGS) iozone_SCO_Unixware_gcc.o libbif.o libasync.o \ + -lsocket -lthread -o iozone + +# +# GNU C compiler NetBSD build with no threads, no largefiles, no async I/O +# + +netbsd: iozone_netbsd.o libbif.o fileop_netbsd.o pit_server.o + $(CC) -O $(LDFLAGS) iozone_netbsd.o libbif.o -o iozone + $(CC) -O fileop_netbsd.o -o fileop + $(CC) -O pit_server.o -o pit_server + +# +# +# Now for the machine specific stuff +# + +iozone_hpux.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (9.05)" + @echo "" + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"hpux"' $(CFLAGS) iozone.c -o iozone_hpux.o + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_hpux-11.0.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (11.0)" + @echo "" + $(CC) -c +O3 +Oparallel -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DNAME='"hpux-11.0"' -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) \ + iozone.c -o iozone_hpux-11.0.o + $(CC) -c +O3 +Oparallel -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libasync.c -o libasync.o + $(CC) -c +O3 +Oparallel -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libbif.c -o libbif.o + +iozone_hpux-11.0w.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (11.0w)" + @echo "" + $(CC) -c +DD64 +O3 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DNAME='"hpux-11.0w"' -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) iozone.c \ + -o iozone_hpux-11.0w.o + $(CC) -c +DD64 +O3 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libasync.c -o libasyncw.o + $(CC) -c +DD64 +O3 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libbif.c -o libbif.o + +iozone_hpuxs-11.0.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building simple iozone for HP-UX (11.0)" + @echo "" + $(CC) -c -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE -DHAVE_ANSIC_C \ + -DNAME='"hpuxs-11.0"' -DASYNC_IO -DVXFS -DHAVE_PREAD $(CFLAGS) iozone.c \ + -o iozone_hpuxs-11.0.o + $(CC) -c -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE -DHAVE_ANSIC_C \ + -DASYNC_IO -DVXFS $(CFLAGS) libasync.c -o libasync.o + $(CC) -c -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE -DHAVE_ANSIC_C \ + -DASYNC_IO -DVXFS $(CFLAGS) libbif.c -o libbif.o + +fileop_hpuxs-11.0.o: fileop.c + @echo "" + @echo "Building simple fileop for HP-UX (11.0)" + @echo "" + $(CC) -c $(CFLAGS) fileop.c -o fileop_hpuxs-11.0.o + +pit_server_solaris10gcc-64.o: pit_server.c + @echo "" + @echo "Building the pit_server" + @echo "" + $(CC) -c $(CFLAGS) $(S10GCCFLAGS) pit_server.c -o pit_server_solaris10gcc-64.o + +pit_server.o: pit_server.c + @echo "" + @echo "Building the pit_server" + @echo "" + $(CC) -c $(CFLAGS) pit_server.c -o pit_server.o + +pit_server-linux-powerpc64.o: pit_server.c + @echo "" + @echo "Building the pit_server" + @echo "" + $(CC) -c $(CFLAGS) $(FLAG64BIT) pit_server.c -o pit_server-linux-powerpc64.o + +pit_server_solaris10gcc.o: pit_server.c + @echo "" + @echo "Building the pit_server" + @echo "" + $(GCC) -c $(CFLAGS) pit_server.c -o pit_server_solaris10gcc.o + + +pit_server_win.o: pit_server.c + @echo "" + @echo "Building the pit_server for Windows" + @echo "" + $(GCC) -c $(CFLAGS) -DWindows pit_server.c -o pit_server_win.o + +pit_server_sua.o: pit_server.c + @echo "" + @echo "Building the pit_server for Windows SUA" + @echo "" + $(GCC) -c $(CFLAGS) -D_SUA_ pit_server.c -o pit_server_sua.o + +iozone_hpuxs-11.0w.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building simple iozone for HP-UX (11.0w)" + @echo "" + $(CC) -c +DD64 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DNAME='"hpuxs-11.0w"' -DHAVE_ANSIC_C -DASYNC_IO -DVXFS \ + -DHAVE_PREAD $(CFLAGS) iozone.c -o iozone_hpuxs-11.0w.o + $(CC) -c +DD64 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libasync.c -o libasyncw.o + $(CC) -c +DD64 -Dunix -D_LARGEFILE64_SOURCE -D_HPUX_SOURCE \ + -DHAVE_ANSIC_C -DASYNC_IO -DVXFS $(CFLAGS) libbif.c -o libbif.o + +iozone_hpux-10.1.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (10.1)" + @echo "" + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"hpux-10.1"' $(CFLAGS) iozone.c -o iozone_hpux-10.1.o + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_hpux-10.20.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (10.20)" + @echo "" + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"hpux-10.20"' $(CFLAGS) iozone.c -o iozone_hpux-10.20.o + $(C89) +e -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_ghpux.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for GCC HP-UX (9.05) " + @echo "" + $(GCC) -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS $(CFLAGS) iozone.c \ + -DNAME='"h=ghpux"' -o iozone_ghpux.o + $(GCC) -c -O -Dunix -D_HPUX_SOURCE -DHAVE_ANSIC_C -DNO_THREADS \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_generic.o: iozone.c libbif.c + @echo "" + @echo "Building iozone Generic " + @echo "" + $(CC) -c -O -Dgeneric -Dunix -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"Generic"' $(CFLAGS) iozone.c -o iozone_generic.o + $(CC) -c -O -Dgeneric -Dunix -DHAVE_ANSIC_C -DNO_THREADS \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_hpux_no.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for HP-UX (9.05) without ansi compiler" + @echo "" + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -DNO_THREADS iozone.c \ + -DNAME='"hpux_no_ansi"' -o iozone_hpux_no.o + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -DNO_THREADS \ + libbif.c -o libbif.o + +iozone_hpux_no-10.1.o: iozone.c + @echo "" + @echo "Building iozone for HP-UX (10.1) without ansi compiler" + @echo "" + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -DNO_THREADS iozone.c \ + -DNAME='"hpux_no_ansi_10.1"' -o iozone_hpux_no-10.1.o + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -DNO_THREADS \ + libbif.c -o libbif.o + +iozone_linux-powerpc.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux PowerPC" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DDONT_HAVE_O_DIRECT \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"linux-powerpc"' -o iozone_linux-powerpc.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux-powerpc64.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux PowerPC64" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DNAME='"linux-powerpc64"' \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + $(FLAG64BIT) -o iozone_linux-powerpc64.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c $(FLAG64BIT) -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c $(FLAG64BIT) -o libasync.o + + +iozone_linux-sparc.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux Sparc" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DDONT_HAVE_O_DIRECT \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"linux-sparc"' -o iozone_linux-sparc.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux" + @echo "" + $(CC) -Wall -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DHAVE_PREAD \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"linux"' -o iozone_linux.o + $(CC) -Wall -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -Wall -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +fileop_AIX.o: fileop.c + @echo "" + @echo "Building fileop for AIX" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_AIX.o + +fileop_AIX-LF.o: fileop.c + @echo "" + @echo "Building fileop for AIX-LF" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_AIX-LF.o + +fileop_AIX64.o: fileop.c + @echo "" + @echo "Building fileop for AIX64" + @echo "" + $(GCC) -maix64 -c -O3 $(CFLAGS) fileop.c -o fileop_AIX64.o + +fileop_AIX64-LF.o: fileop.c + @echo "" + @echo "Building fileop for AIX64-LF" + @echo "" + $(GCC) -maix64 -c -O3 $(CFLAGS) fileop.c -o fileop_AIX64-LF.o + +fileop_bsdi.o: fileop.c + @echo "" + @echo "Building fileop for BSDi" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_bsdi.o + +fileop_freebsd.o: fileop.c + @echo "" + @echo "Building fileop for FreeBSD" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_freebsd.o + +fileop_netbsd.o: fileop.c + @echo "" + @echo "Building fileop for NetBSD" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_netbsd.o + +fileop_Solaris.o: fileop.c + @echo "" + @echo "Building fileop for Solaris" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_Solaris.o + +fileop_Solaris10.o: fileop.c + @echo "" + @echo "Building fileop for Solaris10" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_Solaris10.o + +fileop_Solaris10cc.o: fileop.c + @echo "" + @echo "Building fileop for Solaris10cc" + @echo "" + $(CC) -c -O $(CFLAGS) fileop.c -o fileop_Solaris10cc.o + + +fileop_Solaris10gcc.o: fileop.c + @echo "" + @echo "Building fileop for Solaris10gcc" + @echo "" + $(GCC) -c -O $(CFLAGS) fileop.c -o fileop_Solaris10gcc.o + +fileop_Solaris10gcc-64.o: fileop.c + @echo "" + @echo "Building fileop for Solaris10gcc-64" + @echo "" + $(GCC) -c -O $(CFLAGS) $(S10GCCFLAGS) fileop.c -o fileop_Solaris10gcc-64.o + +fileop_Solaris10cc-64.o: fileop.c + @echo "" + @echo "Building fileop for Solaris10cc-64" + @echo "" + $(CC) -c -O $(CFLAGS) $(S10CCFLAGS) fileop.c -o fileop_Solaris10cc-64.o + + +fileop_linux.o: fileop.c + @echo "" + @echo "Building fileop for Linux" + @echo "" + $(CC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux.o + +fileop_openbsd.o: fileop.c + @echo "" + @echo "Building fileop for OpenBSD" + @echo "" + $(CC) -Wall -c -O $(CFLAGS) fileop.c -o fileop_openbsd.o + +fileop_macosx.o: fileop.c + @echo "" + @echo "Building fileop for MAC OS X" + @echo "" + $(CC) -Wall -c -O -Dmacosx $(CFLAGS) fileop.c -o fileop_macosx.o + +fileop_linux-ia64.o: fileop.c + @echo "" + @echo "Building fileop for Linux-ia64" + @echo "" + $(CC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-ia64.o + +fileop_linux-ppc.o: fileop.c + @echo "" + @echo "Building fileop for Linux-powerpc" + @echo "" + $(CC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-ppc.o + +fileop_linux-ppc64.o: fileop.c + @echo "" + @echo "Building fileop for Linux-powerpc64" + @echo "" + $(CC) -Wall -c -O3 $(CFLAGS) $(FLAG64BIT) fileop.c -o fileop_linux-ppc64.o + +fileop_linux-AMD64.o: fileop.c + @echo "" + @echo "Building fileop for Linux-AMD64" + @echo "" + $(CC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-AMD64.o + +fileop_linux-arm.o: fileop.c + @echo "" + @echo "Building fileop for Linux-arm" + @echo "" + $(GCC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-arm.o + +fileop_linux-s390.o: fileop.c + @echo "" + @echo "Building fileop for Linux-S390" + @echo "" + $(GCC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-s390.o + +fileop_linux-s390x.o: fileop.c + @echo "" + @echo "Building fileop for Linux-s390x" + @echo "" + $(GCC) -Wall -c -O3 $(CFLAGS) fileop.c -o fileop_linux-s390x.o + +fileop_windows.o: fileop.c + @echo "" + @echo "Building fileop for Windows" + @echo "" + $(GCC) -Wall -c -O3 $(CFLAGS) -DWindows fileop.c -o fileop_windows.o + +fileop_sua.o: fileop.c + @echo "" + @echo "Building fileop for Windows SUA" + @echo "" + $(GCC) -Wall -c -O3 $(CFLAGS) -D_SUA_ fileop.c -o fileop_sua.o + +iozone_linux-ia64.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux-ia64" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DNAME='"linux-ia64"' \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -o iozone_linux-ia64.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux-arm.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux-arm" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DHAVE_PREAD \ + -DNAME='"linux-arm"' -DLINUX_ARM -DSHARED_MEM \ + -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -o iozone_linux-arm.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux-AMD64.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux-AMD64" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DNAME='"linux-AMD64"' \ + -D__AMD64__ -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE \ + -DHAVE_PREAD $(CFLAGS) iozone.c -o iozone_linux-AMD64.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DSHARED_MEM -Dlinux $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux-s390.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux-s390" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DHAVE_PREAD \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"linux-s390"' -o iozone_linux-s390.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DSHARED_MEM -Dlinux \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_linux-s390x.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for Linux-s390x" + @echo "" + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DHAVE_PREAD \ + -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"linux-s390x"' -o iozone_linux-s390x.o + $(CC) -c -O3 -Dunix -DHAVE_ANSIC_C -DSHARED_MEM -Dlinux \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libbif.c -o libbif.o + $(CC) -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + + +iozone_AIX.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for AIX" + @echo "" + $(CC) -c -O -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DNAME='"AIX"' -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_AIX.o + $(CC) -c -O -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + +iozone_AIX-LF.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for AIX with Large files" + @echo "" + $(CC) -c -O -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DSHARED_MEM -D_LARGEFILE64_SOURCE -D_LARGE_FILES \ + -DNAME='"AIX-LF"' $(CFLAGS) iozone.c -o iozone_AIX-LF.o + $(CC) -c -O -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DSHARED_MEM -D_LARGEFILE64_SOURCE -D_LARGE_FILES \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_AIX64.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for AIX64" + @echo "" + $(GCC) -maix64 -c -O3 -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -DNAME='"AIX64"' -DSHARED_MEM \ + $(CFLAGS) iozone.c -o iozone_AIX64.o + $(GCC) -maix64 -c -O3 -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + $(GCC) -maix64 -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + $(CFLAGS) libasync.c -o libasync.o + +iozone_AIX64-LF.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone for AIX64 with Large files" + @echo "" + $(GCC) -maix64 -c -O3 -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -DNAME='"AIX64-LF"' -DSHARED_MEM \ + -D_LARGEFILE64_SOURCE -D_LARGE_FILES \ + $(CFLAGS) iozone.c -o iozone_AIX64-LF.o + $(GCC) -maix64 -c -O3 -D__AIX__ -D_NO_PROTO -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -DSHARED_MEM -D_LARGEFILE64_SOURCE -D_LARGE_FILES \ + $(CFLAGS) libbif.c -o libbif.o + $(GCC) -maix64 -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -D_LARGE_FILES \ + $(CFLAGS) libasync.c -o libasync.o + +iozone_solaris.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for Solaris" + @echo "" + $(CC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris"' $(CFLAGS) iozone.c -o iozone_solaris.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libasync.c -o libasync.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_solaris7gcc.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for Solaris7gcc" + @echo "" + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris7gcc"' $(CFLAGS) libasync.c -o libasync7.o + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libbif.c -o libbif7.o + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris7gcc"' $(CFLAGS) iozone.c -o iozone_solaris7gcc.o + +iozone_solaris10.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris10" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libbif.c -o libbif10.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10"' $(CFLAGS) libasync.c -o libasync10.o + $(CC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO -Dstudio11 \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10"' $(CFLAGS) iozone.c -o iozone_solaris10.o + +iozone_solaris10cc.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris10cc" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libbif.c -o libbif10cc.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10"' $(CFLAGS) libasync.c -o libasync10cc.o + $(CC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO -Dstudio11 \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10"' $(CFLAGS) iozone.c -o iozone_solaris10cc.o + +iozone_solaris10gcc.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris10gcc" + @echo "" + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) libbif.c -o libbif10.o + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10gcc"' $(CFLAGS) libasync.c -o libasync10.o + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10gcc"' $(CFLAGS) iozone.c -o iozone_solaris10gcc.o + +iozone_solaris10gcc-64.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris10gcc-64" + @echo "" + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) $(S10GCCFLAGS) libbif.c -o libbif10-64.o + $(GCC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10gcc-64"' $(CFLAGS) $(S10GCCFLAGS) libasync.c -o libasync10-64.o + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10gcc-64"' $(CFLAGS) $(S10GCCFLAGS) iozone.c -o iozone_solaris10gcc-64.o + +iozone_solaris10cc-64.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris10cc-64" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + $(CFLAGS) $(S10CCFLAGS) libbif.c -o libbif10-64.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D__LP64__ \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10cc-64"' $(CFLAGS) $(S10CCFLAGS) libasync.c -o libasync10-64.o + $(CC) -c -O -Dunix -DHAVE_ANSIC_C -DASYNC_IO -Dstudio11 \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Dsolaris \ + -DNAME='"Solaris10cc-64"' $(CFLAGS) $(S10CCFLAGS) iozone.c -o iozone_solaris10cc-64.o + + +# +# -DSHARED_MEM -Dsolaris libasync.c -o libasync.o +# -DSHARED_MEM -Dsolaris iozone.c -o iozone_solaris.o +# + +iozone_solaris-2.6.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Solaris-2.6" + @echo "" + $(CC) -c -O -Dunix -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"Solaris-2.6"' -Dsolaris $(CFLAGS) iozone.c -o iozone_solaris-2.6.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C \ + -Dsolaris $(CFLAGS) libbif.c -o libbif.o + +iozone_solaris8-64.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for Solaris8-64" + @echo "" + $(CC) -fast -xtarget=generic64 -v -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -DNAME='"Solaris8-64"' -Dsolaris -DHAVE_PREAD \ + $(CFLAGS) iozone.c -o iozone_solaris8-64.o + $(CC) -fast -xtarget=generic64 -v -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -Dsolaris -DHAVE_PREAD $(CFLAGS) libasync.c -o libasync.o + $(CC) -fast -xtarget=generic64 -v -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -Dsolaris -DHAVE_PREAD $(CFLAGS) libbif.c -o libbif.o + +iozone_solaris8-64-VXFS.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for Solaris8-64-VXFS" + @echo "" + $(CC) -fast -xtarget=generic64 -v -c -I/opt/VRTSvxfs/include/ -Dunix \ + -DVXFS -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -DNAME='"Solaris8-64"' -Dsolaris -DHAVE_PREAD \ + $(CFLAGS) iozone.c -o iozone_solaris8-64-VXFS.o + $(CC) -fast -xtarget=generic64 -v -c -I/opt/VRTSvxfs/include/ -Dunix \ + -DVXFS -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -Dsolaris -DHAVE_PREAD $(CFLAGS) libasync.c -o libasync.o + $(CC) -fast -xtarget=generic64 -v -c -I/opt/VRTSvxfs/include/ -Dunix \ + -DVXFS -DHAVE_ANSIC_C -DASYNC_IO \ + -D__LP64__ -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \ + -Dsolaris -DHAVE_PREAD $(CFLAGS) libbif.c -o libbif.o + +iozone_windows.o: iozone.c libasync.c libbif.c fileop.c + @echo "" + @echo "Building iozone for Windows (No async I/O)" + @echo "" + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -DNO_MADVISE \ + -DWindows $(CFLAGS) -DDONT_HAVE_O_DIRECT iozone.c \ + -o iozone_windows.o + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -DNO_MADVISE \ + -DWindows $(CFLAGS) libbif.c -o libbif.o + + +# -D_SUA_ $(CFLAGS) -DDONT_HAVE_O_DIRECT iozone.c \ + +iozone_sua.o: iozone.c libasync.c libbif.c fileop.c + @echo "" + @echo "Building iozone for Windows SUA (No async I/O)" + @echo "" + $(GCC) -c -O -Dunix -DHAVE_ANSIC_C -D_XOPEN_SOURCE -DNO_MADVISE \ + -D_SUA_ $(CFLAGS) iozone.c \ + -DNAME='"Windows SUA"' -o iozone_sua.o + $(GCC) -c -O -Dunix -D_SUA_ -D_XOPEN_SOURCE -DHAVE_ANSIC_C \ + -DNO_MADVISE $(CFLAGS) libbif.c -o libbif.o + +iozone_uwin.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for UWIN (No threads, No async I/O)" + @echo "" + $(GCC) -c -O -DUWIN -Dunix -DHAVE_ANSIC_C -DNO_THREADS -DNO_MADVISE \ + -DNAME='"UWIN"' -DSHARED_MEM -DWindows $(CFLAGS) iozone.c -o iozone_uwin.o + $(GCC) -c -O -DUWIN -Dunix -DHAVE_ANSIC_C -DNO_THREADS -DNO_MADVISE \ + -DSHARED_MEM -DWindows $(CFLAGS) libbif.c -o libbif.o + +iozone_IRIX64.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for IRIX64" + @echo "" + $(CC) -32 -O -c -Dunix -DHAVE_ANSIC_C -D_LARGEFILE64_SOURCE -DASYNC_IO \ + -DNAME='"IRIX64"' -DIRIX64 -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_IRIX64.o + $(CC) -32 -O -c -Dunix -DHAVE_ANSIC_C -D_LARGEFILE64_SOURCE -DASYNC_IO \ + -DIRIX64 -DSHARED_MEM $(CFLAGS) libasync.c -o libasyncw.o + $(CC) -32 -O -c -Dunix -DHAVE_ANSIC_C -D_LARGEFILE64_SOURCE -DASYNC_IO \ + -DIRIX64 -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + +iozone_IRIX.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for IRIX" + @echo "" + $(CC) -O -32 -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -DNAME='"IRIX"' -DIRIX -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_IRIX.o + $(CC) -O -32 -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -DIRIX -DSHARED_MEM $(CFLAGS) libasync.c -o libasync.o + $(CC) -O -32 -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO \ + -DIRIX -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + +iozone_CrayX1.o: iozone.c libasync.c libbif.c + @echo "" + @echo "Building iozone for CrayX1" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DNAME='"CrayX1"' -DIRIX64 -DSHARED_MEM -D__CrayX1__ \ + $(CFLAGS) iozone.c -o iozone_CrayX1.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DIRIX64 -DSHARED_MEM -D__CrayX1__ \ + $(CFLAGS) libasync.c -o libasyncw.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \ + -DIRIX64 -DSHARED_MEM -D__CrayX1__ $(CFLAGS) libbif.c \ + -o libbif.o + +iozone_sppux.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for SPP-UX using Convex compiler" + @echo "" + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -DNAME='"sppux"' -Wl,+parallel -DHAVE_ANSIC_C -DHAVE_PREAD \ + -DHAVE_PREADV $(CFLAGS) iozone.c -o iozone_sppux.o + $(NACC) -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -Wl,+parallel -DHAVE_ANSIC_C -DHAVE_PREAD -DHAVE_PREADV \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_sppux-10.1.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for SPP-UX using HP ansic compiler" + @echo "" + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -DHAVE_ANSIC_C -DHAVE_PREAD -DHAVE_PREADV $(CFLAGS) iozone.c \ + -DNAME='"sppux-10.1"' -Wl,+parallel -o iozone_sppux-10.1.o + $(NACC) -c -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -DHAVE_ANSIC_C -DHAVE_PREAD -DHAVE_PREADV \ + $(CFLAGS) libbif.c -Wl,+parallel -o libbif.o + +iozone_sppux_no-10.1.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for SPP-UX no ANSI c compiler" + @echo "" + $(CCS) -c -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -DNAME='"sppux_no_ansi_10.1"' -Wl,+parallel -DHAVE_PREAD \ + -DHAVE_PREADV $(CFLAGS) iozone.c -o iozone_sppux_no-10.1.o + $(CCS) -c -O -Dunix -D_HPUX_SOURCE -D__convex_spp \ + -Wl,+parallel -DHAVE_PREAD -DHAVE_PREADV $(CFLAGS) \ + libbif.c -o libbif.o + +iozone_convex.o: iozone.c libbif.c + @echo "" + @echo "Building iozone for Convex 'C' series" + @echo "" + $(CC) -c -O -Dunix -DNO_THREADS -Dbsd4_2 $(CFLAGS) iozone.c \ + -DNAME='"Convex"' -o iozone_convex.o + $(CC) -c -O -Dunix -DNO_THREADS -Dbsd4_2 \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_bsdi.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for BSD/OS" + @echo "" + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C \ + -DNAME='"bsdi"' $(CFLAGS) iozone.c -o iozone_bsdi.o + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_freebsd.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Build iozone for FreeBSD" + @echo "" + $(CC) -c ${CFLAGS} -DFreeBSD -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DASYNC_IO \ + -DHAVE_PREAD -DNAME='"freebsd"' -DSHARED_MEM \ + $(CFLAGS) iozone.c -o iozone_freebsd.o + $(CC) -c ${CFLAGS} -DFreeBSD -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DASYNC_IO \ + -DSHARED_MEM -DHAVE_PREAD $(CFLAGS) libbif.c \ + -o libbif.o + $(CC) -c ${CFLAGS} -DFreeBSD -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DASYNC_IO \ + -DSHARED_MEM -DHAVE_PREAD $(CFLAGS) libasync.c \ + -o libasync.o + +iozone_dragonfly.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for DragonFly" + @echo "" + $(CC) -c ${CFLAGS} -D__DragonFly__ -Dunix -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"dragonfly"' -DSHARED_MEM -DHAVE_PREAD -DHAVE_PREADV \ + $(CFLAGS) iozone.c -o iozone_dragonfly.o + $(CC) -c ${CFLAGS} -D__DragonFly__ -Dunix -DHAVE_ANSIC_C -DNO_THREADS \ + -DSHARED_MEM -DHAVE_PREAD -DHAVE_PREADV $(CFLAGS) libbif.c \ + -o libbif.o + +iozone_macosx.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for MacOSX" + @echo "" + $(CC) -c -O -Dunix -Dbsd4_2 -Dmacosx -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"macosx"' -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_macosx.o + $(CC) -c -O -Dunix -Dbsd4_2 -Dmacosx -DHAVE_ANSIC_C -DNO_THREADS \ + -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + +iozone_openbsd.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for OpenBSD" + @echo "" + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"openbsd"' -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_openbsd.o + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DNO_THREADS \ + -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o + +iozone_openbsd-threads.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for OpenBSD with threads" + @echo "" + $(CC) -c -O -pthread -Dunix -Dbsd4_4 -DHAVE_ANSIC_C \ + -DNAME='"openbsd-threads"' $(CFLAGS) iozone.c \ + -o iozone_openbsd-threads.o + $(CC) -c -O -pthread -Dunix -Dbsd4_4 -DHAVE_ANSIC_C \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_OSFV3.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for OSFV3" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV3 \ + -DNAME='"OSFV3"' -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) iozone.c \ + -o iozone_OSFV3.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV3 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libbif.c -o libbif.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV3 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libasync.c -o libasync.o + +iozone_OSFV4.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for OSFV4" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV4 \ + -DNAME='"OSFV4"' -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) iozone.c \ + -o iozone_OSFV4.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV4 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libbif.c -o libbif.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV4 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libasync.c -o libasync.o + +iozone_OSFV5.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for OSFV5" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 \ + -DNAME='"OSFV5"' -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) iozone.c \ + -o iozone_OSFV5.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libbif.c -o libbif.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libasync.c -o libasync.o + +iozone_TRU64.o: iozone.c libbif.c + @echo "" + @echo "Build iozone for TRU64" + @echo "" + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 -DTRU64 -DHAVE_PREAD \ + -DNAME='"TRU64"' -DNO_PRINT_LLD -DOSF_64 -pthread $(CFLAGS) iozone.c \ + -o iozone_TRU64.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 -DHAVE_PREAD \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libbif.c -o libbif.o + $(CC) -O -c -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DOSFV5 -DHAVE_PREAD \ + -DNO_PRINT_LLD -DOSF_64 $(CFLAGS) libasync.c -o libasync.o + +iozone_SCO.o: iozone.c libbif.c + @echo "" + @echo "Building iozone SCO " + @echo "" + $(GCC) -c -O -DSCO -Dunix -DHAVE_ANSIC_C -DNO_THREADS -DNO_MADVISE \ + -DNAME='"SCO"' $(CFLAGS) iozone.c -o iozone_SCO.o + $(GCC) -c -O -DSCO -Dunix -DHAVE_ANSIC_C -DNO_THREADS -DNO_MADVISE \ + $(CFLAGS) libbif.c -o libbif.o + +iozone_SCO_Unixware_gcc.o: iozone.c libbif.c libasync.c + @echo "" + @echo "Building iozone SCO_Unixware_gcc " + @echo "" + $(GCC) -c -O -DSCO_Unixware_gcc -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -D_LARGEFILE64_SOURCE $(CFLAGS) iozone.c \ + -DNAME='"SCO_Unixware_gcc"' -o iozone_SCO_Unixware_gcc.o + $(GCC) -c -O -DSCO_Unixware_gcc -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -D_LARGEFILE64_SOURCE $(CFLAGS) libbif.c -o libbif.o + $(GCC) -c -O -DSCO_Unixware_gcc -Dunix -DHAVE_ANSIC_C \ + -DASYNC_IO -D_LARGEFILE64_SOURCE $(CFLAGS) libasync.c -o libasync.o + +iozone_netbsd.o: iozone.c libbif.c + @echo "" + @echo "Building iozone NetBSD " + @echo "" + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DNO_THREADS \ + -DNAME='"netbsd"' -DSHARED_MEM $(CFLAGS) iozone.c -o iozone_netbsd.o + $(CC) -c -O -Dunix -Dbsd4_4 -DHAVE_ANSIC_C -DNO_THREADS \ + -DSHARED_MEM $(CFLAGS) libbif.c -o libbif.o diff --git a/src/components/appio/tests/iozone/pit_server.c b/src/components/appio/tests/iozone/pit_server.c new file mode 100644 index 0000000..638e2f0 --- /dev/null +++ b/src/components/appio/tests/iozone/pit_server.c @@ -0,0 +1,831 @@ +/****************************************************************************** +* File: pit_server.c +* +* Description: Contains source code for an IPv6-capable 'PIT' server. +* This is a derivative of the tod6 (time-of-day) server that was written +* by John Wenker. +* ....... +* Author of tod6: John Wenker, Sr. Software Engineer, +* Performance Technologies, San Diego, USA +* ....... +* The program tod6 was a time of day server. It has beeen modified +* to provide a microsecond timestamp on request. Modified and adapted +* for PIT purposes by Don Capps. [ capps@iozone.org ] +* +* This server sends the current value of gettimeofday() in +* microseconds back to the client, as a numerical string. +* +* /etc/services should contain "PIT" with a specified port value. +* +******************************************************************************/ +/* +** System header files. +*/ +#include /* errno declaration & error codes. */ +#include /* getaddrinfo(3) et al. */ +#include /* sockaddr_in & sockaddr_in6 definition. */ +#include /* printf(3) et al. */ +#include /* exit(2). */ +#include /* String manipulation & memory functions. */ +#if defined(_SUA_) +#include /* poll(2) and related definitions. */ +#else +#include /* poll(2) and related definitions. */ +#endif +#include /* Socket functions (socket(2), bind(2), etc). */ +#include /* time(2) & ctime(3). */ +#include /* gettimeofday */ +#include /* getopt(3), read(2), etc. */ +/* Include for Cygnus development environment for Windows */ +#if defined (Windows) +#include +int errno; +#endif + +#if defined(_SUA_) +extern char *optarg, *opterr; +#endif + +/* +** Constants. +** +** Please remember to add PIT service to the /etc/services file. +*/ +#define DFLT_SERVICE "PIT" /* Programmable Interdimensional Timer */ + +#define INVALID_DESC -1 /* Invalid file descriptor. */ +#define MAXCONNQLEN 3 /* Max nbr of connection requests to queue. */ +#define MAXTCPSCKTS 2 /* One TCP socket for IPv4 & one for IPv6. */ +#define MAXUDPSCKTS 2 /* One UDP socket for IPv4 & one for IPv6. */ +#define VALIDOPTS "vh:p:" /* Valid command options. */ +/* +** Simple boolean type definition. +*/ +int false = 0; +int true = 1; +/* +** Prototypes for internal helper functions. +*/ +static int openSckt( const char *service, + const char *protocol, + int desc[ ], + size_t *descSize ); +static void pit( int tSckt[ ], + size_t tScktSize, + int uSckt[ ], + size_t uScktSize ); +/* +** Global data objects. +*/ +static char hostBfr[ NI_MAXHOST ]; /* For use w/getnameinfo(3). */ +static const char *pgmName; /* Program name w/o dir prefix. */ +static char servBfr[ NI_MAXSERV ]; /* For use w/getnameinfo(3). */ +static int verbose = 0; /* Verbose mode indication. */ +struct timeval tm; /* Timeval structure, used with gettimeofday() */ +char timeStr[40]; /* String for time in microseconds */ +char service_name[20]; +int need; +/* +** Usage macro for command syntax violations. +*/ +#define USAGE \ + { \ + fprintf( stderr, \ + "Usage: %s [-v] -p service \n", \ + pgmName ); \ + exit( 127 ); \ + } /* End USAGE macro. */ +/* +** Macro to terminate the program if a system call error occurs. The system +** call must be one of the usual type that returns -1 on error. +*/ +#define CHK(expr) \ + do \ + { \ + if ( (expr) == -1 ) \ + { \ + fprintf( stderr, \ + "%s (line %d): System call ERROR - %s.\n", \ + pgmName, \ + __LINE__, \ + strerror( errno ) ); \ + exit( 1 ); \ + } /* End IF system call failed. */ \ + } while ( false ) +/****************************************************************************** +* Function: main +* +* Description: +* Set up a PIT server and handle network requests. This server +* handles both TCP and UDP requests. +* +* Parameters: +* The usual argc and argv parameters to a main() function. +* +* Return Value: +* This is a daemon program and never returns. However, in the degenerate +* case where no sockets are created, the function returns zero. +******************************************************************************/ +int main( int argc, + char *argv[ ] ) +{ + int opt; + int tSckt[ MAXTCPSCKTS ]; /* Array of TCP socket descriptors. */ + size_t tScktSize = MAXTCPSCKTS; /* Size of uSckt (# of elements). */ + int uSckt[ MAXUDPSCKTS ]; /* Array of UDP socket descriptors. */ + size_t uScktSize = MAXUDPSCKTS; /* Size of uSckt (# of elements). */ + + strcpy(service_name,DFLT_SERVICE); + /* + ** Set the program name (w/o directory prefix). + */ + pgmName = strrchr( argv[ 0 ], '/' ); + pgmName = pgmName == NULL ? argv[ 0 ] : pgmName + 1; + /* + ** Process command options. + */ + opterr = 0; /* Turns off "invalid option" error messages. */ + while ( ( opt = getopt( argc, argv, VALIDOPTS ) ) >= 0 ) + { + switch ( opt ) + { + case 'v': /* Verbose mode. */ + { + verbose = true; + break; + } + case 'p': /* Get the port number */ + { + strcpy(service_name,optarg); + need++; + break; + } + default: + { + USAGE; + } + } /* End SWITCH on command option. */ + } /* End WHILE processing options. */ + + if(need < 1) + { + USAGE; + exit; + } + /* + ** Open both a TCP and UDP socket, for both IPv4 & IPv6, on which to receive + ** service requests. + */ + if ( ( openSckt( service_name, "tcp", tSckt, &tScktSize ) < 0 ) || + ( openSckt( service_name, "udp", uSckt, &uScktSize ) < 0 ) ) + { + exit( 1 ); + } + /* + ** Run the Programmable Interdimensional Timer server. + */ + if ( ( tScktSize > 0 ) || ( uScktSize > 0 ) ) + { + pit( tSckt, /* pit() never returns. */ + tScktSize, + uSckt, + uScktSize ); + } + /* + ** Since pit() never returns, execution only gets here if no sockets were + ** created. + */ + if ( verbose ) + { + fprintf( stderr, + "%s: No sockets opened... terminating.\n", + pgmName ); + } + return 0; +} /* End main() */ +/****************************************************************************** +* Function: openSckt +* +* Description: +* Open passive (server) sockets for the indicated inet service & protocol. +* Notice in the last sentence that "sockets" is plural. During the interim +* transition period while everyone is switching over to IPv6, the server +* application has to open two sockets on which to listen for connections... +* one for IPv4 traffic and one for IPv6 traffic. +* +* Parameters: +* service - Pointer to a character string representing the well-known port +* on which to listen (can be a service name or a decimal number). +* protocol - Pointer to a character string representing the transport layer +* protocol (only "tcp" or "udp" are valid). +* desc - Pointer to an array into which the socket descriptors are +* placed when opened. +* descSize - This is a value-result parameter. On input, it contains the +* max number of descriptors that can be put into 'desc' (i.e. the +* number of elements in the array). Upon return, it will contain +* the number of descriptors actually opened. Any unused slots in +* 'desc' are set to INVALID_DESC. +* +* Return Value: +* 0 on success, -1 on error. +******************************************************************************/ +static int openSckt( const char *service, + const char *protocol, + int desc[ ], + size_t *descSize ) +{ + struct addrinfo *ai; + int aiErr; + struct addrinfo *aiHead; + struct addrinfo hints = { .ai_flags = AI_PASSIVE, /* Server mode. */ + .ai_family = PF_UNSPEC }; /* IPv4 or IPv6. */ + size_t maxDescs = *descSize; + /* + ** Initialize output parameters. When the loop completes, *descSize is 0. + */ + while ( *descSize > 0 ) + { + desc[ --( *descSize ) ] = INVALID_DESC; + } + /* + ** Check which protocol is selected (only TCP and UDP are valid). + */ + if ( strcmp( protocol, "tcp" ) == 0 ) /* TCP protocol. */ + { + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + } + else if ( strcmp( protocol, "udp" ) == 0 ) /* UDP protocol. */ + { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_UDP; + } + else /* Invalid protocol. */ + { + fprintf( stderr, + "%s (line %d): ERROR - Unknown transport " + "layer protocol \"%s\".\n", + pgmName, + __LINE__, + protocol ); + return -1; + } + /* + ** Look up the service's "well-known" port number. Notice that NULL is being + ** passed for the 'node' parameter, and that the AI_PASSIVE flag is set in + ** 'hints'. Thus, the program is requesting passive address information. + ** The network address is initialized to :: (all zeros) for IPv6 records, or + ** 0.0.0.0 for IPv4 records. + */ + if ( ( aiErr = getaddrinfo( NULL, + service, + &hints, + &aiHead ) ) != 0 ) + { + fprintf( stderr, + "%s (line %d): ERROR - %s.\n", + pgmName, + __LINE__, + gai_strerror( aiErr ) ); + return -1; + } + /* + ** For each of the address records returned, attempt to set up a passive + ** socket. + */ + for ( ai = aiHead; + ( ai != NULL ) && ( *descSize < maxDescs ); + ai = ai->ai_next ) + { + if ( verbose ) + { + /* + ** Display the current address info. Start with the protocol- + ** independent fields first. + */ + fprintf( stderr, + "Setting up a passive socket based on the " + "following address info:\n" + " ai_flags = %#02X\n" + " ai_family = %d (PF_INET = %d, PF_INET6 = %d)\n" + " ai_socktype = %d (SOCK_STREAM = %d, SOCK_DGRAM = %d)\n" + " ai_protocol = %d (IPPROTO_TCP = %d, IPPROTO_UDP = %d)\n" + " ai_addrlen = %d (sockaddr_in = %lu, " + "sockaddr_in6 = %lu)\n", + ai->ai_flags, + ai->ai_family, + PF_INET, + PF_INET6, + ai->ai_socktype, + SOCK_STREAM, + SOCK_DGRAM, + ai->ai_protocol, + IPPROTO_TCP, + IPPROTO_UDP, + ai->ai_addrlen, + sizeof( struct sockaddr_in ), + sizeof( struct sockaddr_in6 ) ); + /* + ** Now display the protocol-specific formatted socket address. Note + ** that the program is requesting that getnameinfo(3) convert the + ** host & service into numeric strings. + */ + getnameinfo( ai->ai_addr, + ai->ai_addrlen, + hostBfr, + sizeof( hostBfr ), + servBfr, + sizeof( servBfr ), + NI_NUMERICHOST | NI_NUMERICSERV ); + switch ( ai->ai_family ) + { + case PF_INET: /* IPv4 address record. */ + { + struct sockaddr_in *p = (struct sockaddr_in*) ai->ai_addr; + fprintf( stderr, + " ai_addr = sin_family: %d (AF_INET = %d, " + "AF_INET6 = %d)\n" + " sin_addr: %s\n" + " sin_port: %s\n", + p->sin_family, + AF_INET, + AF_INET6, + hostBfr, + servBfr ); + break; + } /* End CASE of IPv4. */ + case PF_INET6: /* IPv6 address record. */ + { + struct sockaddr_in6 *p = (struct sockaddr_in6*) ai->ai_addr; + fprintf( stderr, + " ai_addr = sin6_family: %d (AF_INET = %d, " + "AF_INET6 = %d)\n" + " sin6_addr: %s\n" + " sin6_port: %s\n" + " sin6_flowinfo: %d\n" + " sin6_scope_id: %d\n", + p->sin6_family, + AF_INET, + AF_INET6, + hostBfr, + servBfr, + p->sin6_flowinfo, + p->sin6_scope_id ); + break; + } /* End CASE of IPv6. */ + default: /* Can never get here, but just for completeness. */ + { + fprintf( stderr, + "%s (line %d): ERROR - Unknown protocol family (%d).\n", + pgmName, + __LINE__, + ai->ai_family ); + freeaddrinfo( aiHead ); + return -1; + } /* End DEFAULT case (unknown protocol family). */ + } /* End SWITCH on protocol family. */ + } /* End IF verbose mode. */ + /* + ** Create a socket using the info in the addrinfo structure. + */ + CHK( desc[ *descSize ] = socket( ai->ai_family, + ai->ai_socktype, + ai->ai_protocol ) ); + /* + ** Here is the code that prevents "IPv4 mapped addresses", as discussed + ** in Section 22.1.3.1. If an IPv6 socket was just created, then set the + ** IPV6_V6ONLY socket option. + */ + if ( ai->ai_family == PF_INET6 ) + { +#if defined( IPV6_V6ONLY ) + /* + ** Disable IPv4 mapped addresses. + */ + int v6Only = 1; + CHK( setsockopt( desc[ *descSize ], + IPPROTO_IPV6, + IPV6_V6ONLY, + &v6Only, + sizeof( v6Only ) ) ); +#else + /* + ** IPV6_V6ONLY is not defined, so the socket option can't be set and + ** thus IPv4 mapped addresses can't be disabled. Print a warning + ** message and close the socket. Design note: If the + ** #if...#else...#endif construct were removed, then this program + ** would not compile (because IPV6_V6ONLY isn't defined). That's an + ** acceptable approach; IPv4 mapped addresses are certainly disabled + ** if the program can't build! However, since this program is also + ** designed to work for IPv4 sockets as well as IPv6, I decided to + ** allow the program to compile when IPV6_V6ONLY is not defined, and + ** turn it into a run-time warning rather than a compile-time error. + ** IPv4 mapped addresses are still disabled because _all_ IPv6 traffic + ** is disabled (all IPv6 sockets are closed here), but at least this + ** way the server can still service IPv4 network traffic. + */ + fprintf( stderr, + "%s (line %d): WARNING - Cannot set IPV6_V6ONLY socket " + "option. Closing IPv6 %s socket.\n", + pgmName, + __LINE__, + ai->ai_protocol == IPPROTO_TCP ? "TCP" : "UDP" ); + CHK( close( desc[ *descSize ] ) ); + continue; /* Go to top of FOR loop w/o updating *descSize! */ +#endif /* IPV6_V6ONLY */ + } /* End IF this is an IPv6 socket. */ + /* + ** Bind the socket. Again, the info from the addrinfo structure is used. + */ + CHK( bind( desc[ *descSize ], + ai->ai_addr, + ai->ai_addrlen ) ); + /* + ** If this is a TCP socket, put the socket into passive listening mode + ** (listen is only valid on connection-oriented sockets). + */ + if ( ai->ai_socktype == SOCK_STREAM ) + { + CHK( listen( desc[ *descSize ], + MAXCONNQLEN ) ); + } + /* + ** Socket set up okay. Bump index to next descriptor array element. + */ + *descSize += 1; + } /* End FOR each address info structure returned. */ + /* + ** Dummy check for unused address records. + */ + if ( verbose && ( ai != NULL ) ) + { + fprintf( stderr, + "%s (line %d): WARNING - Some address records were " + "not processed due to insufficient array space.\n", + pgmName, + __LINE__ ); + } /* End IF verbose and some address records remain unprocessed. */ + /* + ** Clean up. + */ + freeaddrinfo( aiHead ); + return 0; +} /* End openSckt() */ +/****************************************************************************** +* Function: pit +* +* Description: +* Listen on a set of sockets and send the current microsecond counter +* that was produced by gettimeofday(), to any clients. This function +* never returns. +* +* Parameters: +* tSckt - Array of TCP socket descriptors on which to listen. +* tScktSize - Size of the tSckt array (nbr of elements). +* uSckt - Array of UDP socket descriptors on which to listen. +* uScktSize - Size of the uSckt array (nbr of elements). +* +* Return Value: None. +******************************************************************************/ +static void pit( int tSckt[ ], + size_t tScktSize, + int uSckt[ ], + size_t uScktSize ) +{ + char bfr[ 256 ]; + ssize_t count; + struct pollfd *desc; + size_t descSize = tScktSize + uScktSize; + int idx; + int newSckt; + struct sockaddr *sadr; + socklen_t sadrLen; + struct sockaddr_storage sockStor; + int status; + size_t timeLen; + time_t timeVal; + ssize_t wBytes; + unsigned long long secs; + int ret; + /* + ** Allocate memory for the poll(2) array. + */ + desc = malloc( descSize * sizeof( struct pollfd ) ); + if ( desc == NULL ) + { + fprintf( stderr, + "%s (line %d): ERROR - %s.\n", + pgmName, + __LINE__, + strerror( ENOMEM ) ); + exit( 1 ); + } + /* + ** Initialize the poll(2) array. + */ + for ( idx = 0; idx < descSize; idx++ ) + { + desc[ idx ].fd = idx < tScktSize ? tSckt[ idx ] + : uSckt[ idx - tScktSize ]; + desc[ idx ].events = POLLIN; + desc[ idx ].revents = 0; + } + /* + ** Main PIT server loop. Handles both TCP & UDP requests. This is + ** an interative server, and all requests are handled directly within the + ** main loop. + */ + while ( true ) /* Do forever. */ + { + /* + ** Wait for activity on one of the sockets. The DO..WHILE construct is + ** used to restart the system call in the event the process is + ** interrupted by a signal. + */ + do + { + status = poll( desc, + descSize, + -1 /* Wait indefinitely for input. */ ); + } while ( ( status < 0 ) && ( errno == EINTR ) ); + CHK( status ); /* Check for a bona fide system call error. */ + /* + ** Get the current time. + */ +#if defined(Windows) + LARGE_INTEGER freq,counter; + double wintime,bigcounter; + /* For Windows the time_of_day() is useless. It increments in 55 milli + * second increments. By using the Win32api one can get access to the + * high performance measurement interfaces. With this one can get back + * into the 8 to 9 microsecond resolution. + */ + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + bigcounter=(double)counter.HighPart *(double)0xffffffff + + (double)counter.LowPart; + wintime = (double)(bigcounter/(double)freq.LowPart); + secs = (long long)(wintime * 1000000); +#else + ret = gettimeofday( &tm,0 ); + secs = ((unsigned long long)tm.tv_sec * 1000000) + + (unsigned long long)tm.tv_usec; +#endif + + ret = sprintf(timeStr,"%llu",secs); + timeLen = strlen( timeStr ); + /* + ** Process sockets with input available. + */ + for ( idx = 0; idx < descSize; idx++ ) + { + switch ( desc[ idx ].revents ) + { + case 0: /* No activity on this socket; try the next. */ + continue; + case POLLIN: /* Network activity. Go process it. */ + break; + default: /* Invalid poll events. */ + { + fprintf( stderr, + "%s (line %d): ERROR - Invalid poll event (%#02X).\n", + pgmName, + __LINE__, + desc[ idx ].revents ); + exit( 1 ); + } + } /* End SWITCH on returned poll events. */ + /* + ** Determine if this is a TCP request or UDP request. + */ + if ( idx < tScktSize ) + { + /* + ** TCP connection requested. Accept it. Notice the use of + ** the sockaddr_storage data type. + */ + sadrLen = sizeof( sockStor ); + sadr = (struct sockaddr*) &sockStor; + CHK( newSckt = accept( desc[ idx ].fd, + sadr, + &sadrLen ) ); + CHK( shutdown( newSckt, /* Server never recv's anything. */ + SHUT_RD ) ); + if ( verbose ) + { + /* + ** Display the socket address of the remote client. Begin with + ** the address-independent fields. + */ + fprintf( stderr, + "Sockaddr info for new TCP client:\n" + " sa_family = %d (AF_INET = %d, AF_INET6 = %d)\n" + " addr len = %d (sockaddr_in = %lu, " + "sockaddr_in6 = %lu)\n", + sadr->sa_family, + AF_INET, + AF_INET6, + sadrLen, + sizeof( struct sockaddr_in ), + sizeof( struct sockaddr_in6 ) ); + /* + ** Display the address-specific fields. + */ + getnameinfo( sadr, + sadrLen, + hostBfr, + sizeof( hostBfr ), + servBfr, + sizeof( servBfr ), + NI_NUMERICHOST | NI_NUMERICSERV ); + /* + ** Notice that we're switching on an address family now, not a + ** protocol family. + */ + switch ( sadr->sa_family ) + { + case AF_INET: /* IPv4 address. */ + { + struct sockaddr_in *p = (struct sockaddr_in*) sadr; + fprintf( stderr, + " sin_addr = sin_family: %d\n" + " sin_addr: %s\n" + " sin_port: %s\n", + p->sin_family, + hostBfr, + servBfr ); + break; + } /* End CASE of IPv4. */ + case AF_INET6: /* IPv6 address. */ + { + struct sockaddr_in6 *p = (struct sockaddr_in6*) sadr; + fprintf( stderr, + " sin6_addr = sin6_family: %d\n" + " sin6_addr: %s\n" + " sin6_port: %s\n" + " sin6_flowinfo: %d\n" + " sin6_scope_id: %d\n", + p->sin6_family, + hostBfr, + servBfr, + p->sin6_flowinfo, + p->sin6_scope_id ); + break; + } /* End CASE of IPv6. */ + default: /* Can never get here, but for completeness. */ + { + fprintf( stderr, + "%s (line %d): ERROR - Unknown address " + "family (%d).\n", + pgmName, + __LINE__, + sadr->sa_family ); + break; + } /* End DEFAULT case (unknown address family). */ + } /* End SWITCH on address family. */ + } /* End IF verbose mode. */ + /* + ** Send the PIT to the client. + */ + wBytes = timeLen; + while ( wBytes > 0 ) + { + do + { + count = write( newSckt, + timeStr, + wBytes ); + } while ( ( count < 0 ) && ( errno == EINTR ) ); + CHK( count ); /* Check for an error. */ + wBytes -= count; + } /* End WHILE there is data to send. */ + CHK( close( newSckt ) ); + } /* End IF this was a TCP connection request. */ + else + { + /* + ** This is a UDP socket, and a datagram is available. The funny + ** thing about UDP requests is that this server doesn't require any + ** client input; but it can't send the PIT unless it knows a client + ** wants the data, and the only way that can occur with UDP is if + ** the server receives a datagram from the client. Thus, the + ** server must receive _something_, but the content of the datagram + ** is irrelevant. Read in the datagram. Again note the use of + ** sockaddr_storage to receive the address. + */ + sadrLen = sizeof( sockStor ); + sadr = (struct sockaddr*) &sockStor; + CHK( count = recvfrom( desc[ idx ].fd, + bfr, + sizeof( bfr ), + 0, + sadr, + &sadrLen ) ); + /* + ** Display whatever was received on stdout. + */ + if ( verbose ) + { + ssize_t rBytes = count; + fprintf( stderr, + "%s: UDP datagram received (%ld bytes).\n", + pgmName, + count ); + while ( count > 0 ) + { + fputc( bfr[ rBytes - count-- ], + stdout ); + } + if ( bfr[ rBytes-1 ] != '\n' ) + fputc( '\n', stdout ); /* Newline also flushes stdout. */ + /* + ** Display the socket address of the remote client. Address- + ** independent fields first. + */ + fprintf( stderr, + "Remote client's sockaddr info:\n" + " sa_family = %d (AF_INET = %d, AF_INET6 = %d)\n" + " addr len = %d (sockaddr_in = %lu, " + "sockaddr_in6 = %lu)\n", + sadr->sa_family, + AF_INET, + AF_INET6, + sadrLen, + sizeof( struct sockaddr_in ), + sizeof( struct sockaddr_in6 ) ); + /* + ** Display the address-specific information. + */ + getnameinfo( sadr, + sadrLen, + hostBfr, + sizeof( hostBfr ), + servBfr, + sizeof( servBfr ), + NI_NUMERICHOST | NI_NUMERICSERV ); + switch ( sadr->sa_family ) + { + case AF_INET: /* IPv4 address. */ + { + struct sockaddr_in *p = (struct sockaddr_in*) sadr; + fprintf( stderr, + " sin_addr = sin_family: %d\n" + " sin_addr: %s\n" + " sin_port: %s\n", + p->sin_family, + hostBfr, + servBfr ); + break; + } /* End CASE of IPv4 address. */ + case AF_INET6: /* IPv6 address. */ + { + struct sockaddr_in6 *p = (struct sockaddr_in6*) sadr; + fprintf( stderr, + " sin6_addr = sin6_family: %d\n" + " sin6_addr: %s\n" + " sin6_port: %s\n" + " sin6_flowinfo: %d\n" + " sin6_scope_id: %d\n", + p->sin6_family, + hostBfr, + servBfr, + p->sin6_flowinfo, + p->sin6_scope_id ); + break; + } /* End CASE of IPv6 address. */ + default: /* Can never get here, but for completeness. */ + { + fprintf( stderr, + "%s (line %d): ERROR - Unknown address " + "family (%d).\n", + pgmName, + __LINE__, + sadr->sa_family ); + break; + } /* End DEFAULT case (unknown address family). */ + } /* End SWITCH on address family. */ + } /* End IF verbose mode. */ + /* + ** Send the PIT to the client. + */ + wBytes = timeLen; + while ( wBytes > 0 ) + { + do + { + count = sendto( desc[ idx ].fd, + timeStr, + wBytes, + 0, + sadr, /* Address & address length */ + sadrLen ); /* received in recvfrom(). */ + } while ( ( count < 0 ) && ( errno == EINTR ) ); + CHK( count ); /* Check for a bona fide error. */ + wBytes -= count; + } /* End WHILE there is data to send. */ + } /* End ELSE a UDP datagram is available. */ + desc[ idx ].revents = 0; /* Clear the returned poll events. */ + } /* End FOR each socket descriptor. */ + } /* End WHILE forever. */ +} /* End pit() */ + diff --git a/src/components/appio/tests/iozone/read_telemetry b/src/components/appio/tests/iozone/read_telemetry new file mode 100644 index 0000000..1cfd252 --- /dev/null +++ b/src/components/appio/tests/iozone/read_telemetry @@ -0,0 +1,29 @@ +# +# +# The format is: +# +# All fields are space delimited. +# A # symbol in column 1 indicates a comment. +# First field: Byte offset within the file. +# Second field: Size in bytes of the I/O operation. +# Third field: Number of milliseconds to delay before I/O operation. +# +# This is an example of sequential 64k reader with 2 milliseconds +# before each read. +# +0 65536 2 +65536 65536 2 +131072 65536 2 +196608 65536 2 +262144 65536 2 +327680 65536 2 +393216 65536 2 +458752 65536 2 +524288 65536 2 +589824 65536 2 +655360 65536 2 +720896 65536 2 +786432 65536 2 +851968 65536 2 +917504 65536 2 +983040 65536 2 diff --git a/src/components/appio/tests/iozone/report.pl b/src/components/appio/tests/iozone/report.pl new file mode 100755 index 0000000..f9b9b30 --- /dev/null +++ b/src/components/appio/tests/iozone/report.pl @@ -0,0 +1,150 @@ +#!/usr/bin/perl +# +# arguments: one of more report files +# +# Christian Mautner , 2005-10-31 +# +# This script is based loosely on the Generate_Graph set +# of scripts that come with iozone, but is a complete re-write +# +# The main reason to write this was the need to compare the behaviour of +# two or more different setups, for tuning filesystems or +# comparing different pieces of hardware. +# +# This script is in the public domain, too short and too trivial +# to deserve a copyright. +# +# Simply run iozone like, for example, ./iozone -a -g 4G > config1.out (if your machine has 4GB) +# and then run perl report.pl config1.out +# or get another report from another box into config2.out and run +# perl report.pl config1.out config2.out +# the look in the report_* directory for .png +# +# If you don't like png or the graphic size, search for "set terminal" in this file and put whatever gnuplot +# terminal you want. Note I've also noticed that gnuplot switched the set terminal png syntax +# a while back, you might need "set terminal png small size 900,700" +# + + +@Reports=@ARGV; + +die "usage: $0 [...]\n" if not @Reports or grep (m|^-|, @Reports); + +die "report files must be in current directory" if grep (m|/|, @Reports); + +%columns=( + 'write' =>3, + 'read' =>5, + 'rewrite' =>4, + 'reread' =>6, + 'randread' =>7, + 'randwrite' =>8, + 'bkwdread' =>9, + 'recrewrite'=>10, + 'strideread'=>11, + 'fwrite' =>12, + 'frewrite' =>13, + 'fread' =>14, + 'freread' =>15, + ); + +# +# create output directory. the name is the concatenation +# of all report file names (minus the file extension, plus +# prefix report_) +# +$outdir="report_".join("_",map{/([^\.]+)(\..*)?/ && $1}(@Reports)); + +print STDERR "Output directory: $outdir "; + +if ( -d $outdir ) +{ + print STDERR "(removing old directory) "; + system "rm -rf $outdir"; +} + +mkdir $outdir or die "cannot make directory $outdir"; + +print STDERR "done.\nPreparing data files..."; + +foreach $report (@Reports) +{ + open(I, $report) or die "cannot open $report for reading"; + $report=~/^([^\.]+)/; + $datafile="$1.dat"; + push @datafiles, $datafile; + open(O, ">$outdir/$datafile") or die "cannot open $outdir/$datafile for writing"; + open(O2, ">$outdir/2d-$datafile") or die "cannot open $outdir/$datafile for writing"; + while() + { + next unless ( /^[\s\d]+$/ ); + @split = split(); + next unless ( @split >= 8 ); + print O; + print O2 if $split[1] == 16384 or $split[0] == $split[1]; + } + close I, O, O2; +} + +print STDERR "done.\nGenerating graphs:"; + +foreach $column (keys %columns) +{ + print STDERR " $column"; + + open(G, ">$outdir/$column.do") or die "cannot open $outdir/$column.do for writing"; + print G qq{ +set title "Iozone performance: $column" +set grid lt 2 lw 1 +set surface +set parametric +set xtics +set ytics +set logscale x 2 +set logscale y 2 +set autoscale z +#set xrange [2.**5:2.**24] +set xlabel "File size in KBytes" +set ylabel "Record size in Kbytes" +set zlabel "Kbytes/sec" +set style data lines +set dgrid3d 80,80,3 +#set terminal png small picsize 900 700 +set terminal png small size 900 700 +set output "$column.png" +}; + + print G "splot ". join(", ", map{qq{"$_" using 1:2:$columns{$column} title "$_"}}(@datafiles)); + + print G "\n"; + + close G; + + open(G, ">$outdir/2d-$column.do") or die "cannot open $outdir/$column.do for writing"; + print G qq{ +set title "Iozone performance: $column" +#set terminal png small picsize 450 350 +set terminal png small size 450 350 +set logscale x +set xlabel "File size in KBytes" +set ylabel "Kbytes/sec" +set output "2d-$column.png" +}; + + print G "plot ". join(", ", map{qq{"2d-$_" using 1:$columns{$column} title "$_" with lines}}(@datafiles)); + + print G "\n"; + + close G; + + if ( system("cd $outdir && gnuplot $column.do && gnuplot 2d-$column.do") ) + { + print STDERR "(failed) "; + } + else + { + print STDERR "(ok) "; + } +} + +print STDERR "done.\n"; diff --git a/src/components/appio/tests/iozone/spec.in b/src/components/appio/tests/iozone/spec.in new file mode 100644 index 0000000..b385b58 --- /dev/null +++ b/src/components/appio/tests/iozone/spec.in @@ -0,0 +1,107 @@ +Summary: Iozone Filesystem Benchmark +Name: iozone +Version: 3 +Release: 398 +License: Freeware +Group: Applications/Engineering +Source: %{name}%{version}_%{release}.tar +Buildroot: /var/tmp/%{name}-buildroot + +%description +IOzone is a filesystem benchmark tool. The benchmark generates and +measures a variety of file operations. Iozone has been ported to many machines and runs under many operating systems. + +Iozone is useful for performing a broad filesystem analysis of a vendors +computer platform. The benchmark tests file I/O performance for the following +operations: Read, write, re-read, re-write, read backwards, read strided, +fread, fwrite, random read, pread ,mmap, aio_read, aio_write. + + +## +## PREP +## +%prep + +## +## SETUP and PATCH +## +%setup -n iozone3_398/src/current + + +## +## BUILD +## +## +## BUILD +## +%build +%ifarch %{ix86} + make linux +%else + %ifarch x86_64 + make linux-AMD64 + %else + %ifarch ia64 + make linux-ia64 + %else + %ifarch ppc + make linux-powerpc + %else + %ifarch ppc64 + make linux-powerpc64 + %else + %ifarch s390 + make linux-S390 + %else + %ifarch s390x + make linux-S390X + %else + %ifarch %(arm) + make linux-arm + %else + echo "No idea how to build for your arch..." + exit 1 + %endif + %endif + %endif + %endif + %endif + %endif + %endif +%endif + +## +## INSTALL +## +%install +mkdir -p $RPM_BUILD_ROOT/opt/iozone/bin +cp $RPM_BUILD_DIR/iozone3_398/src/current/iozone $RPM_BUILD_ROOT/opt/iozone/bin/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/fileop $RPM_BUILD_ROOT/opt/iozone/bin/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/pit_server $RPM_BUILD_ROOT/opt/iozone/bin/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/Generate_Graphs $RPM_BUILD_ROOT/opt/iozone/bin/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/gengnuplot.sh $RPM_BUILD_ROOT/opt/iozone/bin/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/gnu3d.dem $RPM_BUILD_ROOT/opt/iozone/bin/ + +mkdir -p $RPM_BUILD_ROOT/opt/iozone/docs +cp $RPM_BUILD_DIR/iozone3_398/docs/IOzone_msword_98.pdf $RPM_BUILD_ROOT/opt/iozone/docs/ +cp $RPM_BUILD_DIR/iozone3_398/docs/Run_rules.doc $RPM_BUILD_ROOT/opt/iozone/docs/ +cp $RPM_BUILD_DIR/iozone3_398/docs/IOzone_msword_98.doc $RPM_BUILD_ROOT/opt/iozone/docs/ +cp $RPM_BUILD_DIR/iozone3_398/docs/Iozone_ps.gz $RPM_BUILD_ROOT/opt/iozone/docs/ +cp $RPM_BUILD_DIR/iozone3_398/src/current/Gnuplot.txt $RPM_BUILD_ROOT/opt/iozone/docs/ + +mkdir -p $RPM_BUILD_ROOT/opt/iozone/man/man1 +cp $RPM_BUILD_DIR/iozone3_398/docs/iozone.1 $RPM_BUILD_ROOT/opt/iozone/man/man1/ + + +## +## FILES +## +%files +%attr(755,root,root) /opt/ + + +## +## CLEAN +## +%clean +rm -rf $RPM_BUILD_ROOT diff --git a/src/components/appio/tests/iozone/write_telemetry b/src/components/appio/tests/iozone/write_telemetry new file mode 100644 index 0000000..8b789d1 --- /dev/null +++ b/src/components/appio/tests/iozone/write_telemetry @@ -0,0 +1,29 @@ +# +# +# The format is: +# +# All fields are space delimited. +# A # symbol in column 1 indicates a comment. +# First field: Byte offset within the file. +# Second field: Size in bytes of the I/O operation. +# Third field: Number of milliseconds to delay before I/O operation. +# +# This is an example of sequential 64k writer with 2 milliseconds +# before each write. +# +0 65536 2 +65536 65536 2 +131072 65536 2 +196608 65536 2 +262144 65536 2 +327680 65536 2 +393216 65536 2 +458752 65536 2 +524288 65536 2 +589824 65536 2 +655360 65536 2 +720896 65536 2 +786432 65536 2 +851968 65536 2 +917504 65536 2 +983040 65536 2 diff --git a/src/components/bgpm/CNKunit/Rules.CNKunit b/src/components/bgpm/CNKunit/Rules.CNKunit new file mode 100644 index 0000000..d9dee31 --- /dev/null +++ b/src/components/bgpm/CNKunit/Rules.CNKunit @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/bgpm/CNKunit/linux-CNKunit.c +COMPOBJS += linux-CNKunit.o + +linux-CNKunit.o: components/bgpm/CNKunit/linux-CNKunit.c components/bgpm/CNKunit/linux-CNKunit.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/CNKunit/linux-CNKunit.c -o linux-CNKunit.o diff --git a/src/components/bgpm/CNKunit/linux-CNKunit.c b/src/components/bgpm/CNKunit/linux-CNKunit.c new file mode 100644 index 0000000..0f233e3 --- /dev/null +++ b/src/components/bgpm/CNKunit/linux-CNKunit.c @@ -0,0 +1,517 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-CNKunit.c + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / CNKunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#include "linux-CNKunit.h" + +/* Declare our vector in advance */ +papi_vector_t _CNKunit_vector; + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +CNKUNIT_init_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_init_thread\n" ); +#endif + + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +CNKUNIT_init_component( int cidx ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_init_component\n" ); +#endif + + _CNKunit_vector.cmp_info.CmpIdx = cidx; +#ifdef DEBUG_BGQ + printf( "CNKUNIT_init_component cidx = %d\n", cidx ); +#endif + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +CNKUNIT_init_control_state( hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_init_control_state\n" ); +#endif + int retval; + + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + this_state->EventGroup = Bgpm_CreateEventSet(); + retval = _check_BGPM_error( this_state->EventGroup, "Bgpm_CreateEventSet" ); + if ( retval < 0 ) return retval; + + return PAPI_OK; +} + + +/* + * + */ +int +CNKUNIT_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_start\n" ); +#endif + ( void ) ctx; + int retval; + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + retval = Bgpm_Apply( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Apply" ); + if ( retval < 0 ) return retval; + + /* Bgpm_Apply() does an implicit reset; + hence no need to use Bgpm_ResetStart */ + retval = Bgpm_Start( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Start" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_stop\n" ); +#endif + ( void ) ctx; + int retval; + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_read( hwd_context_t * ctx, hwd_control_state_t * ptr, + long_long ** events, int flags ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_read\n" ); +#endif + ( void ) ctx; + ( void ) flags; + int i, numEvts; + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + numEvts = Bgpm_NumEvents( this_state->EventGroup ); + if ( numEvts == 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function Bgpm_NumEvents.\n", numEvts ); +#endif + //return ( EXIT_FAILURE ); + } + + for ( i = 0; i < numEvts; i++ ) + this_state->counts[i] = _common_getEventValue( i, this_state->EventGroup ); + + *events = this_state->counts; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_shutdown_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_shutdown_thread\n" ); +#endif + ( void ) ctx; + return ( PAPI_OK ); +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +CNKUNIT_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_ctl\n" ); +#endif + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + + +/* + * + */ +int +CNKUNIT_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_update_control_state: count = %d\n", count ); +#endif + ( void ) ctx; + int retval, index, i; + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + // Delete and re-create BGPM eventset + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + + // otherwise, add the events to the eventset + for ( i = 0; i < count; i++ ) { + index = ( native[i].ni_event ) + OFFSET; + + native[i].ni_position = i; + +#ifdef DEBUG_BGQ + printf("CNKUNIT_update_control_state: ADD event: i = %d, index = %d\n", i, index ); +#endif + + /* Add events to the BGPM eventGroup */ + retval = Bgpm_AddEvent( this_state->EventGroup, index ); + retval = _check_BGPM_error( retval, "Bgpm_AddEvent" ); + if ( retval < 0 ) return retval; + } + + return ( PAPI_OK ); +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int +CNKUNIT_set_domain( hwd_control_state_t * cntrl, int domain ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_set_domain\n" ); +#endif + int found = 0; + ( void ) cntrl; + + if ( PAPI_DOM_USER & domain ) + found = 1; + + if ( PAPI_DOM_KERNEL & domain ) + found = 1; + + if ( PAPI_DOM_OTHER & domain ) + found = 1; + + if ( !found ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_reset\n" ); +#endif + ( void ) ctx; + int retval; + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ptr; + + /* we can't simply call Bgpm_Reset() since PAPI doesn't have the + restriction that an EventSet has to be stopped before resetting is + possible. However, BGPM does have this restriction. + Hence we need to stop, reset and start */ + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * PAPI Cleanup Eventset + * + * Destroy and re-create the BGPM / CNKunit EventSet + */ +int +CNKUNIT_cleanup_eventset( hwd_control_state_t * ctrl ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_cleanup_eventset\n" ); +#endif + int retval; + + CNKUNIT_control_state_t * this_state = ( CNKUNIT_control_state_t * ) ctrl; + + // create a new empty bgpm eventset + // reason: bgpm doesn't permit to remove events from an eventset; + // hence we delete the old eventset and create a new one + retval = _common_deleteRecreate( &this_state->EventGroup ); // HJ try to use delete() only + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +CNKUNIT_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ +#ifdef DEBUG_BGQ +// printf( "CNKUNIT_ntv_enum_events\n" ); +#endif + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return ( PAPI_OK ); + break; + + case PAPI_ENUM_EVENTS: + { + int index = ( *EventCode ) + OFFSET; + + if ( index < CNKUNIT_MAX_COUNTERS ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + + break; + } + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_EINVAL ); +} + + +/* + * + */ +int +CNKUNIT_ntv_name_to_code( const char *name, unsigned int *event_code ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_ntv_name_to_code\n" ); +#endif + int ret; + + /* Return event id matching a given event label string */ + ret = Bgpm_GetEventIdFromLabel ( name ); + + if ( ret <= 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function '%s'.\n", + ret, "Bgpm_GetEventIdFromLabel" ); +#endif + return PAPI_ENOEVNT; + } + else if ( ret < OFFSET || ret > CNKUNIT_MAX_COUNTERS ) // not a CNKUnit event + return PAPI_ENOEVNT; + else + *event_code = ( ret - OFFSET ) ; + + return PAPI_OK; +} + + +/* + * + */ +int +CNKUNIT_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "CNKUNIT_ntv_code_to_name\n" ); +#endif + int index; + + index = ( EventCode ) + OFFSET; + + if ( index >= MAX_COUNTERS ) + return PAPI_ENOEVNT; + + strncpy( name, Bgpm_GetEventIdLabel( index ), len ); + //printf("----%s----\n", name); + + if ( name == NULL ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is NULL for BGPM API function Bgpm_GetEventIdLabel.\n" ); +#endif + return PAPI_ENOEVNT; + } + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "CNKUNIT_ntv_code_to_descr\n" ); +#endif + int retval, index; + + index = ( EventCode ) + OFFSET; + + retval = Bgpm_GetLongDesc( index, name, &len ); + retval = _check_BGPM_error( retval, "Bgpm_GetLongDesc" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +CNKUNIT_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ +#ifdef DEBUG_BGQ + printf( "CNKUNIT_ntv_code_to_bits\n" ); +#endif + ( void ) EventCode; + ( void ) bits; + return ( PAPI_OK ); +} + + +/* + * + */ +papi_vector_t _CNKunit_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "bgpm/CNKUnit", + .short_name = "CNKUnit", + .description = "Blue Gene/Q CNKUnit component", + .num_native_events = CNKUNIT_MAX_COUNTERS-OFFSET+1, + .num_cntrs = CNKUNIT_MAX_COUNTERS, + .num_mpx_cntrs = CNKUNIT_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + .kernel_multiplex = 0, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( CNKUNIT_context_t ), + .control_state = sizeof ( CNKUNIT_control_state_t ), + .reg_value = sizeof ( CNKUNIT_register_t ), + .reg_alloc = sizeof ( CNKUNIT_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = CNKUNIT_init_thread, + .init_component = CNKUNIT_init_component, + .init_control_state = CNKUNIT_init_control_state, + .start = CNKUNIT_start, + .stop = CNKUNIT_stop, + .read = CNKUNIT_read, + .shutdown_thread = CNKUNIT_shutdown_thread, + .cleanup_eventset = CNKUNIT_cleanup_eventset, + .ctl = CNKUNIT_ctl, + + .update_control_state = CNKUNIT_update_control_state, + .set_domain = CNKUNIT_set_domain, + .reset = CNKUNIT_reset, + + .ntv_name_to_code = CNKUNIT_ntv_name_to_code, + .ntv_enum_events = CNKUNIT_ntv_enum_events, + .ntv_code_to_name = CNKUNIT_ntv_code_to_name, + .ntv_code_to_descr = CNKUNIT_ntv_code_to_descr, + .ntv_code_to_bits = CNKUNIT_ntv_code_to_bits +}; diff --git a/src/components/bgpm/CNKunit/linux-CNKunit.h b/src/components/bgpm/CNKunit/linux-CNKunit.h new file mode 100644 index 0000000..9a43c2b --- /dev/null +++ b/src/components/bgpm/CNKunit/linux-CNKunit.h @@ -0,0 +1,67 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-CNKunit.h + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / CNKunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#ifndef _PAPI_CNKUNIT_H +#define _PAPI_CNKUNIT_H + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "../../../linux-bgq-common.h" + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ + +/* this number assumes that there will never be more events than indicated */ +#define CNKUNIT_MAX_COUNTERS PEVT_CNKUNIT_LAST_EVENT +#define OFFSET ( PEVT_NWUNIT_LAST_EVENT + 1 ) + + +/** Structure that stores private information of each event */ +typedef struct CNKUNIT_register +{ + unsigned int selector; + /* Signifies which counter slot is being used */ + /* Indexed from 1 as 0 has a special meaning */ +} CNKUNIT_register_t; + + +typedef struct CNKUNIT_reg_alloc +{ + CNKUNIT_register_t ra_bits; +} CNKUNIT_reg_alloc_t; + + +typedef struct CNKUNIT_control_state +{ + int EventGroup; + long long counts[CNKUNIT_MAX_COUNTERS]; +} CNKUNIT_control_state_t; + + +typedef struct CNKUNIT_context +{ + CNKUNIT_control_state_t state; +} CNKUNIT_context_t; + + +#endif /* _PAPI_CNKUNIT_H */ diff --git a/src/components/bgpm/IOunit/Rules.IOunit b/src/components/bgpm/IOunit/Rules.IOunit new file mode 100644 index 0000000..6c73702 --- /dev/null +++ b/src/components/bgpm/IOunit/Rules.IOunit @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/bgpm/IOunit/linux-IOunit.c +COMPOBJS += linux-IOunit.o + +linux-IOunit.o: components/bgpm/IOunit/linux-IOunit.c components/bgpm/IOunit/linux-IOunit.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/IOunit/linux-IOunit.c -o linux-IOunit.o diff --git a/src/components/bgpm/IOunit/linux-IOunit.c b/src/components/bgpm/IOunit/linux-IOunit.c new file mode 100644 index 0000000..429f070 --- /dev/null +++ b/src/components/bgpm/IOunit/linux-IOunit.c @@ -0,0 +1,684 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-IOunit.c + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / IOunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#include "linux-IOunit.h" + +/* Declare our vector in advance */ +papi_vector_t _IOunit_vector; + +/* prototypes */ +void user_signal_handler_IOUNIT( int hEvtSet, uint64_t address, uint64_t ovfVector, const ucontext_t *pContext ); + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +IOUNIT_init_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_init_thread\n" ); +#endif + + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +IOUNIT_init_component( int cidx ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_init_component\n" ); +#endif + + _IOunit_vector.cmp_info.CmpIdx = cidx; +#ifdef DEBUG_BGQ + printf( "IOUNIT_init_component cidx = %d\n", cidx ); +#endif + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +IOUNIT_init_control_state( hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_init_control_state\n" ); +#endif + int retval; + + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + this_state->EventGroup = Bgpm_CreateEventSet(); + retval = _check_BGPM_error( this_state->EventGroup, "Bgpm_CreateEventSet" ); + if ( retval < 0 ) return retval; + + // initialize overflow flag to OFF (0) + this_state->overflow = 0; + this_state->overflow_count = 0; + + return PAPI_OK; +} + + +/* + * + */ +int +IOUNIT_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_start\n" ); +#endif + ( void ) ctx; + int retval; + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_stop\n" ); +#endif + ( void ) ctx; + int retval; + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_read( hwd_context_t * ctx, hwd_control_state_t * ptr, + long_long ** events, int flags ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_read\n" ); +#endif + ( void ) ctx; + ( void ) flags; + int i, numEvts; + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + numEvts = Bgpm_NumEvents( this_state->EventGroup ); + if ( numEvts == 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function Bgpm_NumEvents.\n", numEvts ); +#endif + //return ( EXIT_FAILURE ); + } + + for ( i = 0; i < numEvts; i++ ) + this_state->counts[i] = _common_getEventValue( i, this_state->EventGroup ); + + *events = this_state->counts; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_shutdown_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_shutdown_thread\n" ); +#endif + + ( void ) ctx; + return ( PAPI_OK ); +} + + +/* + * user_signal_handler + * + * This function is used when hardware overflows are working or when + * software overflows are forced + */ +void +user_signal_handler_IOUNIT( int hEvtSet, uint64_t address, uint64_t ovfVector, const ucontext_t *pContext ) +{ +#ifdef DEBUG_BGQ + printf( "user_signal_handler_IOUNIT\n" ); +#endif + ( void ) address; + int retval; + unsigned i; + int isHardware = 1; + int cidx = _IOunit_vector.cmp_info.CmpIdx; + long_long overflow_bit = 0; + caddr_t address1; + _papi_hwi_context_t ctx; + ctx.ucontext = ( hwd_ucontext_t * ) pContext; + ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); + EventSetInfo_t *ESI; + ESI = thread->running_eventset[cidx]; + // Get the indices of all events which have overflowed. + unsigned ovfIdxs[BGPM_MAX_OVERFLOW_EVENTS]; + unsigned len = BGPM_MAX_OVERFLOW_EVENTS; + + retval = Bgpm_GetOverflowEventIndices( hEvtSet, ovfVector, ovfIdxs, &len ); + if ( retval < 0 ) { +#ifdef DEBUG_BGPM + printf ( "Error: ret value is %d for BGPM API function Bgpm_GetOverflowEventIndices.\n", + retval ); +#endif + return; + } + + if ( thread == NULL ) { + PAPIERROR( "thread == NULL in user_signal_handler!" ); + return; + } + + if ( ESI == NULL ) { + PAPIERROR( "ESI == NULL in user_signal_handler!"); + return; + } + + if ( ESI->overflow.flags == 0 ) { + PAPIERROR( "ESI->overflow.flags == 0 in user_signal_handler!"); + return; + } + + for ( i = 0; i < len; i++ ) { + uint64_t hProf; + Bgpm_GetEventUser1( hEvtSet, ovfIdxs[i], &hProf ); + if ( hProf ) { + overflow_bit ^= 1 << ovfIdxs[i]; + break; + } + + } + + if ( ESI->overflow.flags & PAPI_OVERFLOW_FORCE_SW ) { +#ifdef DEBUG_BGQ + printf("OVERFLOW_SOFTWARE\n"); +#endif + address1 = GET_OVERFLOW_ADDRESS( ctx ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address1, NULL, 0, 0, &thread, cidx ); + return; + } + else if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { +#ifdef DEBUG_BGQ + printf("OVERFLOW_HARDWARE\n"); +#endif + address1 = GET_OVERFLOW_ADDRESS( ctx ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address1, &isHardware, overflow_bit, 0, &thread, cidx ); + } + else { +#ifdef DEBUG_BGQ + printf("OVERFLOW_NONE\n"); +#endif + PAPIERROR( "ESI->overflow.flags is set to something other than PAPI_OVERFLOW_HARDWARE or PAPI_OVERFLOW_FORCE_SW (%#x)", thread->running_eventset[cidx]->overflow.flags); + } +} + + +/* + * Set Overflow + * + * This is commented out in BG/L/P - need to explore and complete... + * However, with true 64-bit counters in BG/Q and all counters for PAPI + * always starting from a true zero (we don't allow write...), the possibility + * for overflow is remote at best... + */ +int +IOUNIT_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ +#ifdef DEBUG_BGQ + printf("BEGIN IOUNIT_set_overflow\n"); +#endif + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ESI->ctl_state; + int retval; + int evt_idx; + + evt_idx = ESI->EventInfoArray[EventIndex].pos[0]; + SUBDBG( "Hardware counter %d (vs %d) used in overflow, threshold %d\n", + evt_idx, EventIndex, threshold ); +#ifdef DEBUG_BGQ + printf( "Hardware counter %d (vs %d) used in overflow, threshold %d\n", + evt_idx, EventIndex, threshold ); +#endif + /* If this counter isn't set to overflow, it's an error */ + if ( threshold == 0 ) { + /* Remove the signal handler */ + retval = _papi_hwi_stop_signal( _IOunit_vector.cmp_info.hardware_intr_sig ); + if ( retval != PAPI_OK ) + return ( retval ); + } + else { + this_state->overflow = 1; + this_state->overflow_count++; + this_state->overflow_list[this_state->overflow_count-1].threshold = threshold; + this_state->overflow_list[this_state->overflow_count-1].EventIndex = evt_idx; + +#ifdef DEBUG_BGQ + printf( "IOUNIT_set_overflow: Enable the signal handler\n" ); +#endif + /* Enable the signal handler */ + retval = _papi_hwi_start_signal( _IOunit_vector.cmp_info.hardware_intr_sig, + NEED_CONTEXT, + _IOunit_vector.cmp_info.CmpIdx ); + if ( retval != PAPI_OK ) + return ( retval ); + + retval = _common_set_overflow_BGPM( this_state->EventGroup, + this_state->overflow_list[this_state->overflow_count-1].EventIndex, + this_state->overflow_list[this_state->overflow_count-1].threshold, + user_signal_handler_IOUNIT ); + if ( retval < 0 ) return retval; + } + + return ( PAPI_OK ); +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +IOUNIT_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_ctl\n" ); +#endif + + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_update_control_state: count = %d\n", count ); +#endif + ( void ) ctx; + int retval, index, i, k; + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + // Delete and re-create BGPM eventset + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + +#ifdef DEBUG_BGQ + printf( "IOUNIT_update_control_state: EventGroup=%d, overflow = %d\n", + this_state->EventGroup, this_state->overflow ); +#endif + + // otherwise, add the events to the eventset + for ( i = 0; i < count; i++ ) { + index = ( native[i].ni_event ) + OFFSET; + + native[i].ni_position = i; + +#ifdef DEBUG_BGQ + printf("IOUNIT_update_control_state: ADD event: i = %d, index = %d\n", i, index ); +#endif + + /* Add events to the BGPM eventGroup */ + retval = Bgpm_AddEvent( this_state->EventGroup, index ); + retval = _check_BGPM_error( retval, "Bgpm_AddEvent" ); + if ( retval < 0 ) return retval; + } + + // since update_control_state trashes overflow settings, this puts things + // back into balance for BGPM + if ( 1 == this_state->overflow ) { + for ( k = 0; k < this_state->overflow_count; k++ ) { + retval = _common_set_overflow_BGPM( this_state->EventGroup, + this_state->overflow_list[k].EventIndex, + this_state->overflow_list[k].threshold, + user_signal_handler_IOUNIT ); + if ( retval < 0 ) return retval; + } + } + + return ( PAPI_OK ); +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int +IOUNIT_set_domain( hwd_control_state_t * cntrl, int domain ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_set_domain\n" ); +#endif + int found = 0; + ( void ) cntrl; + + if ( PAPI_DOM_USER & domain ) + found = 1; + + if ( PAPI_DOM_KERNEL & domain ) + found = 1; + + if ( PAPI_DOM_OTHER & domain ) + found = 1; + + if ( !found ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_reset\n" ); +#endif + ( void ) ctx; + int retval; + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ptr; + + /* we can't simply call Bgpm_Reset() since PAPI doesn't have the + restriction that an EventSet has to be stopped before resetting is + possible. However, BGPM does have this restriction. + Hence we need to stop, reset and start */ + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * PAPI Cleanup Eventset + * + * Destroy and re-create the BGPM / IOunit EventSet + */ +int +IOUNIT_cleanup_eventset( hwd_control_state_t * ctrl ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_cleanup_eventset\n" ); +#endif + int retval; + + IOUNIT_control_state_t * this_state = ( IOUNIT_control_state_t * ) ctrl; + + // create a new empty bgpm eventset + // reason: bgpm doesn't permit to remove events from an eventset; + // hence we delete the old eventset and create a new one + retval = _common_deleteRecreate( &this_state->EventGroup ); // HJ try to use delete() only + if ( retval < 0 ) return retval; + + // set overflow flag to OFF (0) + this_state->overflow = 0; + this_state->overflow_count = 0; + + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +IOUNIT_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ +#ifdef DEBUG_BGQ + //printf( "IOUNIT_ntv_enum_events\n" ); +#endif + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return ( PAPI_OK ); + break; + + case PAPI_ENUM_EVENTS: + { + int index = ( *EventCode ) + OFFSET; + + if ( index < IOUNIT_MAX_EVENTS ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + + break; + } + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_EINVAL ); +} + + +/* + * + */ +int +IOUNIT_ntv_name_to_code( const char *name, unsigned int *event_code ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_ntv_name_to_code\n" ); +#endif + int ret; + + /* Return event id matching a given event label string */ + ret = Bgpm_GetEventIdFromLabel ( name ); + + if ( ret <= 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function '%s'.\n", + ret, "Bgpm_GetEventIdFromLabel" ); +#endif + return PAPI_ENOEVNT; + } + else if ( ret < OFFSET || ret > IOUNIT_MAX_EVENTS ) // not an IOUnit event + return PAPI_ENOEVNT; + else + *event_code = ( ret - OFFSET ) ; + + return PAPI_OK; +} + + +/* + * + */ +int +IOUNIT_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "IOUNIT_ntv_code_to_name\n" ); +#endif + int index; + + index = ( EventCode ) + OFFSET; + + if ( index >= MAX_COUNTERS ) + return PAPI_ENOEVNT; + + strncpy( name, Bgpm_GetEventIdLabel( index ), len ); + + if ( name == NULL ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is NULL for BGPM API function Bgpm_GetEventIdLabel.\n" ); +#endif + return PAPI_ENOEVNT; + } + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "IOUNIT_ntv_code_to_descr\n" ); +#endif + int retval, index; + + index = ( EventCode ) + OFFSET; + + retval = Bgpm_GetLongDesc( index, name, &len ); + retval = _check_BGPM_error( retval, "Bgpm_GetLongDesc" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +IOUNIT_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ +#ifdef DEBUG_BGQ + printf( "IOUNIT_ntv_code_to_bits\n" ); +#endif + ( void ) EventCode; + ( void ) bits; + return ( PAPI_OK ); +} + + +/* + * + */ +papi_vector_t _IOunit_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "bgpm/IOUnit", + .short_name = "IOUnit", + .description = "Blue Gene/Q IOUnit component", + .num_native_events = IOUNIT_MAX_EVENTS-OFFSET+1, + .num_cntrs = IOUNIT_MAX_COUNTERS, + .num_mpx_cntrs = IOUNIT_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + .kernel_multiplex = 0, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( IOUNIT_context_t ), + .control_state = sizeof ( IOUNIT_control_state_t ), + .reg_value = sizeof ( IOUNIT_register_t ), + .reg_alloc = sizeof ( IOUNIT_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = IOUNIT_init_thread, + .init_component = IOUNIT_init_component, + .init_control_state = IOUNIT_init_control_state, + .start = IOUNIT_start, + .stop = IOUNIT_stop, + .read = IOUNIT_read, + .shutdown_thread = IOUNIT_shutdown_thread, + .set_overflow = IOUNIT_set_overflow, + .cleanup_eventset = IOUNIT_cleanup_eventset, + .ctl = IOUNIT_ctl, + + .update_control_state = IOUNIT_update_control_state, + .set_domain = IOUNIT_set_domain, + .reset = IOUNIT_reset, + + .ntv_name_to_code = IOUNIT_ntv_name_to_code, + .ntv_enum_events = IOUNIT_ntv_enum_events, + .ntv_code_to_name = IOUNIT_ntv_code_to_name, + .ntv_code_to_descr = IOUNIT_ntv_code_to_descr, + .ntv_code_to_bits = IOUNIT_ntv_code_to_bits +}; diff --git a/src/components/bgpm/IOunit/linux-IOunit.h b/src/components/bgpm/IOunit/linux-IOunit.h new file mode 100644 index 0000000..65062d6 --- /dev/null +++ b/src/components/bgpm/IOunit/linux-IOunit.h @@ -0,0 +1,76 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-IOunit.h + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / IOunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#ifndef _PAPI_IOUNIT_H +#define _PAPI_IOUNIT_H + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "../../../linux-bgq-common.h" + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ + +/* this number assumes that there will never be more events than indicated */ +#define IOUNIT_MAX_COUNTERS UPC_C_IOSRAM_NUM_COUNTERS +#define IOUNIT_MAX_EVENTS PEVT_IOUNIT_LAST_EVENT +#define OFFSET ( PEVT_L2UNIT_LAST_EVENT + 1 ) + + +/** Structure that stores private information of each event */ +typedef struct IOUNIT_register +{ + unsigned int selector; + /* Signifies which counter slot is being used */ + /* Indexed from 1 as 0 has a special meaning */ +} IOUNIT_register_t; + + +typedef struct IOUNIT_reg_alloc +{ + IOUNIT_register_t ra_bits; +} IOUNIT_reg_alloc_t; + +typedef struct IOUNIT_overflow +{ + int threshold; + int EventIndex; +} IOUNIT_overflow_t; + +typedef struct IOUNIT_control_state +{ + int EventGroup; + int overflow; // overflow enable + int overflow_count; + IOUNIT_overflow_t overflow_list[512]; + long long counts[IOUNIT_MAX_COUNTERS]; +} IOUNIT_control_state_t; + + +typedef struct IOUNIT_context +{ + IOUNIT_control_state_t state; +} IOUNIT_context_t; + + +#endif /* _PAPI_IOUNIT_H */ diff --git a/src/components/bgpm/L2unit/Rules.L2unit b/src/components/bgpm/L2unit/Rules.L2unit new file mode 100644 index 0000000..31e1bb5 --- /dev/null +++ b/src/components/bgpm/L2unit/Rules.L2unit @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/bgpm/L2unit/linux-L2unit.c +COMPOBJS += linux-L2unit.o + +linux-L2unit.o: components/bgpm/L2unit/linux-L2unit.c components/bgpm/L2unit/linux-L2unit.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/L2unit/linux-L2unit.c -o linux-L2unit.o diff --git a/src/components/bgpm/L2unit/linux-L2unit.c b/src/components/bgpm/L2unit/linux-L2unit.c new file mode 100644 index 0000000..6813a08 --- /dev/null +++ b/src/components/bgpm/L2unit/linux-L2unit.c @@ -0,0 +1,729 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-L2unit.c + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / L2unit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#include "linux-L2unit.h" + +/* Declare our vector in advance */ +papi_vector_t _L2unit_vector; + +/* prototypes */ +void user_signal_handler_L2UNIT( int hEvtSet, uint64_t address, uint64_t ovfVector, const ucontext_t *pContext ); + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +L2UNIT_init_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_init_thread\n" ); +#endif + + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +L2UNIT_init_component( int cidx ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_init_component\n" ); +#endif + + _L2unit_vector.cmp_info.CmpIdx = cidx; +#ifdef DEBUG_BGQ + printf( "L2UNIT_init_component cidx = %d\n", cidx ); +#endif + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +L2UNIT_init_control_state( hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_init_control_state\n" ); +#endif + int retval; + + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + this_state->EventGroup = Bgpm_CreateEventSet(); + retval = _check_BGPM_error( this_state->EventGroup, "Bgpm_CreateEventSet" ); + if ( retval < 0 ) return retval; + + // initialize overflow flag to OFF (0) + this_state->overflow = 0; + this_state->overflow_count = 0; + // initialized BGPM eventGroup flag to NOT applied yet (0) + this_state->bgpm_eventset_applied = 0; + + return PAPI_OK; +} + + +/* + * + */ +int +L2UNIT_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_start\n" ); +#endif + ( void ) ctx; + int retval; + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + retval = Bgpm_Apply( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Apply" ); + if ( retval < 0 ) return retval; + + // set flag to 1: BGPM eventGroup HAS BEEN applied + this_state->bgpm_eventset_applied = 1; + + /* Bgpm_Apply() does an implicit reset; + hence no need to use Bgpm_ResetStart */ + retval = Bgpm_Start( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Start" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_stop\n" ); +#endif + ( void ) ctx; + int retval; + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_read( hwd_context_t * ctx, hwd_control_state_t * ptr, + long_long ** events, int flags ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_read\n" ); +#endif + ( void ) ctx; + ( void ) flags; + int i, numEvts; + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + numEvts = Bgpm_NumEvents( this_state->EventGroup ); + if ( numEvts == 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function Bgpm_NumEvents.\n", numEvts ); +#endif + //return ( EXIT_FAILURE ); + } + + for ( i = 0; i < numEvts; i++ ) + this_state->counters[i] = _common_getEventValue( i, this_state->EventGroup ); + + *events = this_state->counters; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_shutdown_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_shutdown_thread\n" ); +#endif + + ( void ) ctx; + return ( PAPI_OK ); +} + + + + +/* + * user_signal_handler + * + * This function is used when hardware overflows are working or when + * software overflows are forced + */ +void +user_signal_handler_L2UNIT( int hEvtSet, uint64_t address, uint64_t ovfVector, const ucontext_t *pContext ) +{ +#ifdef DEBUG_BGQ + printf( "user_signal_handler_L2UNIT\n" ); +#endif + ( void ) address; + int retval; + unsigned i; + int isHardware = 1; + int cidx = _L2unit_vector.cmp_info.CmpIdx; + long_long overflow_bit = 0; + caddr_t address1; + _papi_hwi_context_t ctx; + ctx.ucontext = ( hwd_ucontext_t * ) pContext; + ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); + EventSetInfo_t *ESI; + ESI = thread->running_eventset[cidx]; + // Get the indices of all events which have overflowed. + unsigned ovfIdxs[BGPM_MAX_OVERFLOW_EVENTS]; + unsigned len = BGPM_MAX_OVERFLOW_EVENTS; + + retval = Bgpm_GetOverflowEventIndices( hEvtSet, ovfVector, ovfIdxs, &len ); + if ( retval < 0 ) { +#ifdef DEBUG_BGPM + printf ( "Error: ret value is %d for BGPM API function Bgpm_GetOverflowEventIndices.\n", + retval ); +#endif + return; + } + + if ( thread == NULL ) { + PAPIERROR( "thread == NULL in user_signal_handler!" ); + return; + } + + if ( ESI == NULL ) { + PAPIERROR( "ESI == NULL in user_signal_handler!"); + return; + } + + if ( ESI->overflow.flags == 0 ) { + PAPIERROR( "ESI->overflow.flags == 0 in user_signal_handler!"); + return; + } + + for ( i = 0; i < len; i++ ) { + uint64_t hProf; + Bgpm_GetEventUser1( hEvtSet, ovfIdxs[i], &hProf ); + if ( hProf ) { + overflow_bit ^= 1 << ovfIdxs[i]; + break; + } + + } + + if ( ESI->overflow.flags & PAPI_OVERFLOW_FORCE_SW ) { +#ifdef DEBUG_BGQ + printf("OVERFLOW_SOFTWARE\n"); +#endif + address1 = GET_OVERFLOW_ADDRESS( ctx ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address1, NULL, 0, 0, &thread, cidx ); + return; + } + else if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { +#ifdef DEBUG_BGQ + printf("OVERFLOW_HARDWARE\n"); +#endif + address1 = GET_OVERFLOW_ADDRESS( ctx ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address1, &isHardware, overflow_bit, 0, &thread, cidx ); + } + else { +#ifdef DEBUG_BGQ + printf("OVERFLOW_NONE\n"); +#endif + PAPIERROR( "ESI->overflow.flags is set to something other than PAPI_OVERFLOW_HARDWARE or PAPI_OVERFLOW_FORCE_SW (%#x)", thread->running_eventset[cidx]->overflow.flags); + } +} + + +/* + * Set Overflow + * + * This is commented out in BG/L/P - need to explore and complete... + * However, with true 64-bit counters in BG/Q and all counters for PAPI + * always starting from a true zero (we don't allow write...), the possibility + * for overflow is remote at best... + */ +int +L2UNIT_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ +#ifdef DEBUG_BGQ + printf("BEGIN L2UNIT_set_overflow\n"); +#endif + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ESI->ctl_state; + int retval; + int evt_idx; + + /* + * In case an BGPM eventGroup HAS BEEN applied or attached before + * overflow is set, delete the eventGroup and create an new empty one, + * and rebuild as it was prior to deletion + */ +#ifdef DEBUG_BGQ + printf( "L2UNIT_set_overflow: bgpm_eventset_applied = %d, threshold = %d\n", + this_state->bgpm_eventset_applied, threshold ); +#endif + if ( 1 == this_state->bgpm_eventset_applied && 0 != threshold ) { + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + retval = _common_rebuildEventgroup( this_state->count, + this_state->EventGroup_local, + &this_state->EventGroup ); + if ( retval < 0 ) return retval; + + /* set BGPM eventGroup flag back to NOT applied yet (0) + * because the eventGroup has been recreated from scratch */ + this_state->bgpm_eventset_applied = 0; + } + + evt_idx = ESI->EventInfoArray[EventIndex].pos[0]; + SUBDBG( "Hardware counter %d (vs %d) used in overflow, threshold %d\n", + evt_idx, EventIndex, threshold ); +#ifdef DEBUG_BGQ + printf( "Hardware counter %d (vs %d) used in overflow, threshold %d\n", + evt_idx, EventIndex, threshold ); +#endif + /* If this counter isn't set to overflow, it's an error */ + if ( threshold == 0 ) { + /* Remove the signal handler */ + retval = _papi_hwi_stop_signal( _L2unit_vector.cmp_info.hardware_intr_sig ); + if ( retval != PAPI_OK ) + return ( retval ); + } + else { + this_state->overflow = 1; + this_state->overflow_count++; + this_state->overflow_list[this_state->overflow_count-1].threshold = threshold; + this_state->overflow_list[this_state->overflow_count-1].EventIndex = evt_idx; + +#ifdef DEBUG_BGQ + printf( "L2UNIT_set_overflow: Enable the signal handler\n" ); +#endif + /* Enable the signal handler */ + retval = _papi_hwi_start_signal( _L2unit_vector.cmp_info.hardware_intr_sig, + NEED_CONTEXT, + _L2unit_vector.cmp_info.CmpIdx ); + if ( retval != PAPI_OK ) + return ( retval ); + + retval = _common_set_overflow_BGPM( this_state->EventGroup, + this_state->overflow_list[this_state->overflow_count-1].EventIndex, + this_state->overflow_list[this_state->overflow_count-1].threshold, + user_signal_handler_L2UNIT ); + if ( retval < 0 ) return retval; + } + + return ( PAPI_OK ); +} + + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +L2UNIT_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_ctl\n" ); +#endif + + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + +/* + * PAPI Cleanup Eventset + * Destroy and re-create the BGPM / L2unit EventSet + */ +int +L2UNIT_cleanup_eventset( hwd_control_state_t * ctrl ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_cleanup_eventset\n" ); +#endif + int retval; + + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ctrl; + + // create a new empty bgpm eventset + // reason: bgpm doesn't permit to remove events from an eventset; + // hence we delete the old eventset and create a new one + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + + // set overflow flag to OFF (0) + this_state->overflow = 0; + this_state->overflow_count = 0; + // set BGPM eventGroup flag back to NOT applied yet (0) + this_state->bgpm_eventset_applied = 0; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_update_control_state: count = %d\n", count ); +#endif + + ( void ) ctx; + int retval, index, i, k; + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + // Delete and re-create BGPM eventset + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + +#ifdef DEBUG_BGQ + printf( "L2UNIT_update_control_state: EventGroup=%d, overflow = %d\n", + this_state->EventGroup, this_state->overflow ); +#endif + + + // otherwise, add the events to the eventset + for ( i = 0; i < count; i++ ) { + index = ( native[i].ni_event ) + OFFSET; + + native[i].ni_position = i; + +#ifdef DEBUG_BGQ + printf("L2UNIT_update_control_state: ADD event: i = %d, index = %d\n", i, index ); +#endif + + this_state->EventGroup_local[i] = index; + + + /* Add events to the BGPM eventGroup */ + retval = Bgpm_AddEvent( this_state->EventGroup, index ); + retval = _check_BGPM_error( retval, "Bgpm_AddEvent" ); + if ( retval < 0 ) return retval; + } + + // store how many events we added to an EventSet + this_state->count = count; + + // since update_control_state trashes overflow settings, this puts things + // back into balance for BGPM + if ( 1 == this_state->overflow ) { + for ( k = 0; k < this_state->overflow_count; k++ ) { + retval = _common_set_overflow_BGPM( this_state->EventGroup, + this_state->overflow_list[k].EventIndex, + this_state->overflow_list[k].threshold, + user_signal_handler_L2UNIT ); + if ( retval < 0 ) return retval; + } + } + + return ( PAPI_OK ); +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int +L2UNIT_set_domain( hwd_control_state_t * cntrl, int domain ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_set_domain\n" ); +#endif + int found = 0; + ( void ) cntrl; + + if ( PAPI_DOM_USER & domain ) + found = 1; + + if ( PAPI_DOM_KERNEL & domain ) + found = 1; + + if ( PAPI_DOM_OTHER & domain ) + found = 1; + + if ( !found ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_reset\n" ); +#endif + ( void ) ctx; + int retval; + L2UNIT_control_state_t * this_state = ( L2UNIT_control_state_t * ) ptr; + + /* we can't simply call Bgpm_Reset() since PAPI doesn't have the + restriction that an EventSet has to be stopped before resetting is + possible. However, BGPM does have this restriction. + Hence we need to stop, reset and start */ + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +L2UNIT_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ +#ifdef DEBUG_BGQ + //printf( "L2UNIT_ntv_enum_events, EventCode = %#x\n", *EventCode ); +#endif + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return ( PAPI_OK ); + break; + + case PAPI_ENUM_EVENTS: + { + int index = ( *EventCode ) + OFFSET; + + if ( index < L2UNIT_MAX_EVENTS ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + + break; + } + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_EINVAL ); +} + + +/* + * + */ +int +L2UNIT_ntv_name_to_code( const char *name, unsigned int *event_code ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_ntv_name_to_code\n" ); +#endif + int ret; + + /* Return event id matching a given event label string */ + ret = Bgpm_GetEventIdFromLabel ( name ); + + if ( ret <= 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function '%s'.\n", + ret, "Bgpm_GetEventIdFromLabel" ); +#endif + return PAPI_ENOEVNT; + } + else if ( ret < OFFSET || ret > L2UNIT_MAX_EVENTS ) // not a L2Unit event + return PAPI_ENOEVNT; + else + *event_code = ( ret - OFFSET ); + + return PAPI_OK; +} + + +/* + * + */ +int +L2UNIT_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "L2UNIT_ntv_code_to_name\n" ); +#endif + int index; + + index = ( EventCode ) + OFFSET; + + if ( index >= MAX_COUNTERS ) + return PAPI_ENOEVNT; + + strncpy( name, Bgpm_GetEventIdLabel( index ), len ); + + if ( name == NULL ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is NULL for BGPM API function Bgpm_GetEventIdLabel.\n" ); +#endif + return PAPI_ENOEVNT; + } + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "L2UNIT_ntv_code_to_descr\n" ); +#endif + int retval, index; + + index = ( EventCode ) + OFFSET; + + retval = Bgpm_GetLongDesc( index, name, &len ); + retval = _check_BGPM_error( retval, "Bgpm_GetLongDesc" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +L2UNIT_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ +#ifdef DEBUG_BGQ + printf( "L2UNIT_ntv_code_to_bits\n" ); +#endif + ( void ) EventCode; + ( void ) bits; + return ( PAPI_OK ); +} + + +/* + * + */ +papi_vector_t _L2unit_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "bgpm/L2Unit", + .short_name = "L2Unit", + .description = "Blue Gene/Q L2Unit component", + .num_cntrs = L2UNIT_MAX_COUNTERS, + .num_native_events = L2UNIT_MAX_EVENTS-OFFSET+1, + .num_mpx_cntrs = L2UNIT_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + .kernel_multiplex = 0, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( L2UNIT_context_t ), + .control_state = sizeof ( L2UNIT_control_state_t ), + .reg_value = sizeof ( L2UNIT_register_t ), + .reg_alloc = sizeof ( L2UNIT_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = L2UNIT_init_thread, + .init_component = L2UNIT_init_component, + .init_control_state = L2UNIT_init_control_state, + .start = L2UNIT_start, + .stop = L2UNIT_stop, + .read = L2UNIT_read, + .shutdown_thread = L2UNIT_shutdown_thread, + .set_overflow = L2UNIT_set_overflow, + .cleanup_eventset = L2UNIT_cleanup_eventset, + .ctl = L2UNIT_ctl, + + .update_control_state = L2UNIT_update_control_state, + .set_domain = L2UNIT_set_domain, + .reset = L2UNIT_reset, + + .ntv_name_to_code = L2UNIT_ntv_name_to_code, + .ntv_enum_events = L2UNIT_ntv_enum_events, + .ntv_code_to_name = L2UNIT_ntv_code_to_name, + .ntv_code_to_descr = L2UNIT_ntv_code_to_descr, + .ntv_code_to_bits = L2UNIT_ntv_code_to_bits +}; diff --git a/src/components/bgpm/L2unit/linux-L2unit.h b/src/components/bgpm/L2unit/linux-L2unit.h new file mode 100644 index 0000000..41690b4 --- /dev/null +++ b/src/components/bgpm/L2unit/linux-L2unit.h @@ -0,0 +1,81 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-L2unit.h + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / L2unit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#ifndef _PAPI_L2UNIT_H +#define _PAPI_L2UNIT_H + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "../../../linux-bgq-common.h" + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ + +/* this number assumes that there will never be more events than indicated */ +#define L2UNIT_MAX_COUNTERS UPC_L2_NUM_COUNTERS +#define L2UNIT_MAX_EVENTS PEVT_L2UNIT_LAST_EVENT +#define OFFSET ( PEVT_PUNIT_LAST_EVENT + 1 ) + +/* Stores private information for each event */ +typedef struct L2UNIT_register +{ + unsigned int selector; + /* Signifies which counter slot is being used */ + /* Indexed from 1 as 0 has a special meaning */ +} L2UNIT_register_t; + + +/* Used when doing register allocation */ +typedef struct L2UNIT_reg_alloc +{ + L2UNIT_register_t ra_bits; +} L2UNIT_reg_alloc_t; + +typedef struct L2UNIT_overflow +{ + int threshold; + int EventIndex; +} L2UNIT_overflow_t; + +/* Holds control flags */ +typedef struct L2UNIT_control_state +{ + int EventGroup; + int EventGroup_local[512]; + int count; + long long counters[L2UNIT_MAX_COUNTERS]; + int overflow; // overflow enable + int overflow_count; + L2UNIT_overflow_t overflow_list[512]; + int bgpm_eventset_applied; // BGPM eventGroup applied yes or no flag +} L2UNIT_control_state_t; + + +/* Holds per-thread information */ +typedef struct L2UNIT_context +{ + L2UNIT_control_state_t state; +} L2UNIT_context_t; + + +#endif /* _PAPI_L2UNIT_H */ diff --git a/src/components/bgpm/NWunit/Rules.NWunit b/src/components/bgpm/NWunit/Rules.NWunit new file mode 100644 index 0000000..4dcfd5e --- /dev/null +++ b/src/components/bgpm/NWunit/Rules.NWunit @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/bgpm/NWunit/linux-NWunit.c +COMPOBJS += linux-NWunit.o + +linux-NWunit.o: components/bgpm/NWunit/linux-NWunit.c components/bgpm/NWunit/linux-NWunit.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/bgpm/NWunit/linux-NWunit.c -o linux-NWunit.o diff --git a/src/components/bgpm/NWunit/linux-NWunit.c b/src/components/bgpm/NWunit/linux-NWunit.c new file mode 100644 index 0000000..fad123f --- /dev/null +++ b/src/components/bgpm/NWunit/linux-NWunit.c @@ -0,0 +1,517 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-NWunit.c + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / NWunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#include "linux-NWunit.h" + +/* Declare our vector in advance */ +papi_vector_t _NWunit_vector; + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +NWUNIT_init_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_init_thread\n" ); +#endif + + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +NWUNIT_init_component( int cidx ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_init_component\n" ); +#endif + + _NWunit_vector.cmp_info.CmpIdx = cidx; +#ifdef DEBUG_BGQ + printf( "NWUNIT_init_component cidx = %d\n", cidx ); +#endif + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +NWUNIT_init_control_state( hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_init_control_state\n" ); +#endif + int retval; + + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + this_state->EventGroup = Bgpm_CreateEventSet(); + retval = _check_BGPM_error( this_state->EventGroup, "Bgpm_CreateEventSet" ); + if ( retval < 0 ) return retval; + + return PAPI_OK; +} + + +/* + * + */ +int +NWUNIT_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_start\n" ); +#endif + + ( void ) ctx; + int retval; + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + retval = Bgpm_Attach( this_state->EventGroup, UPC_NW_ALL_LINKS, 0); + retval = _check_BGPM_error( retval, "Bgpm_Attach" ); + if ( retval < 0 ) return retval; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_stop\n" ); +#endif + ( void ) ctx; + int retval; + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_read( hwd_context_t * ctx, hwd_control_state_t * ptr, + long_long ** events, int flags ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_read\n" ); +#endif + ( void ) ctx; + ( void ) flags; + int i, numEvts; + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + numEvts = Bgpm_NumEvents( this_state->EventGroup ); + if ( numEvts == 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function Bgpm_NumEvents.\n", numEvts ); +#endif + //return ( EXIT_FAILURE ); + } + + for ( i = 0; i < numEvts; i++ ) + this_state->counts[i] = _common_getEventValue( i, this_state->EventGroup ); + + *events = this_state->counts; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_shutdown_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_shutdown_thread\n" ); +#endif + + ( void ) ctx; + return ( PAPI_OK ); +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +NWUNIT_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_ctl\n" ); +#endif + + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + +//int NWUNIT_ntv_code_to_bits ( unsigned int EventCode, hwd_register_t * bits ); + + +/* + * + */ +int +NWUNIT_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_update_control_state: count = %d\n", count ); +#endif + ( void ) ctx; + int retval, index, i; + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + // Delete and re-create BGPM eventset + retval = _common_deleteRecreate( &this_state->EventGroup ); + if ( retval < 0 ) return retval; + + // otherwise, add the events to the eventset + for ( i = 0; i < count; i++ ) { + index = ( native[i].ni_event ) + OFFSET; + + native[i].ni_position = i; + +#ifdef DEBUG_BGQ + printf("NWUNIT_update_control_state: ADD event: i = %d, index = %d\n", i, index ); +#endif + + /* Add events to the BGPM eventGroup */ + retval = Bgpm_AddEvent( this_state->EventGroup, index ); + retval = _check_BGPM_error( retval, "Bgpm_AddEvent" ); + if ( retval < 0 ) return retval; + } + + return ( PAPI_OK ); +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int +NWUNIT_set_domain( hwd_control_state_t * cntrl, int domain ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_set_domain\n" ); +#endif + int found = 0; + ( void ) cntrl; + + if ( PAPI_DOM_USER & domain ) + found = 1; + + if ( PAPI_DOM_KERNEL & domain ) + found = 1; + + if ( PAPI_DOM_OTHER & domain ) + found = 1; + + if ( !found ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_reset\n" ); +#endif + ( void ) ctx; + int retval; + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ptr; + + /* we can't simply call Bgpm_Reset() since PAPI doesn't have the + restriction that an EventSet has to be stopped before resetting is + possible. However, BGPM does have this restriction. + Hence we need to stop, reset and start */ + retval = Bgpm_Stop( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_Stop" ); + if ( retval < 0 ) return retval; + + retval = Bgpm_ResetStart( this_state->EventGroup ); + retval = _check_BGPM_error( retval, "Bgpm_ResetStart" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * PAPI Cleanup Eventset + * + * Destroy and re-create the BGPM / NWunit EventSet + */ +int +NWUNIT_cleanup_eventset( hwd_control_state_t * ctrl ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_cleanup_eventset\n" ); +#endif + int retval; + + NWUNIT_control_state_t * this_state = ( NWUNIT_control_state_t * ) ctrl; + + // create a new empty bgpm eventset + // reason: bgpm doesn't permit to remove events from an eventset; + // hence we delete the old eventset and create a new one + retval = _common_deleteRecreate( &this_state->EventGroup ); // HJ try to use delete() only + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +NWUNIT_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + //printf( "NWUNIT_ntv_enum_events\n" ); + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return ( PAPI_OK ); + break; + + case PAPI_ENUM_EVENTS: + { + int index = ( *EventCode ) + OFFSET; + + if ( index < NWUNIT_MAX_EVENTS ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + + break; + } + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_EINVAL ); +} + + +/* + * + */ +int +NWUNIT_ntv_name_to_code( const char *name, unsigned int *event_code ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_ntv_name_to_code\n" ); +#endif + int ret; + + /* Return event id matching a given event label string */ + ret = Bgpm_GetEventIdFromLabel ( name ); + + if ( ret <= 0 ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is %d for BGPM API function '%s'.\n", + ret, "Bgpm_GetEventIdFromLabel" ); +#endif + return PAPI_ENOEVNT; + } + else if ( ret < OFFSET || ret > NWUNIT_MAX_EVENTS ) // not a NWUnit event + return PAPI_ENOEVNT; + else + *event_code = ( ret - OFFSET ) ; + + return PAPI_OK; +} + + +/* + * + */ +int +NWUNIT_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "NWUNIT_ntv_code_to_name\n" ); +#endif + int index; + + index = ( EventCode ) + OFFSET; + + if ( index >= MAX_COUNTERS ) + return PAPI_ENOEVNT; + + strncpy( name, Bgpm_GetEventIdLabel( index ), len ); + + if ( name == NULL ) { +#ifdef DEBUG_BGPM + printf ("Error: ret value is NULL for BGPM API function Bgpm_GetEventIdLabel.\n" ); +#endif + return PAPI_ENOEVNT; + } + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ +#ifdef DEBUG_BGQ + //printf( "NWUNIT_ntv_code_to_descr\n" ); +#endif + int retval, index; + + index = ( EventCode ) + OFFSET; + + retval = Bgpm_GetLongDesc( index, name, &len ); + retval = _check_BGPM_error( retval, "Bgpm_GetLongDesc" ); + if ( retval < 0 ) return retval; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +NWUNIT_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ +#ifdef DEBUG_BGQ + printf( "NWUNIT_ntv_code_to_bits\n" ); +#endif + ( void ) EventCode; + ( void ) bits; + return ( PAPI_OK ); +} + + +/* + * + */ +papi_vector_t _NWunit_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "bgpm/NWUnit", + .short_name = "NWUnit", + .description = "Blue Gene/Q NWUnit component", + .num_cntrs = NWUNIT_MAX_COUNTERS, + .num_native_events = NWUNIT_MAX_EVENTS-OFFSET+1, + .num_mpx_cntrs = NWUNIT_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + .kernel_multiplex = 0, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( NWUNIT_context_t ), + .control_state = sizeof ( NWUNIT_control_state_t ), + .reg_value = sizeof ( NWUNIT_register_t ), + .reg_alloc = sizeof ( NWUNIT_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = NWUNIT_init_thread, + .init_component = NWUNIT_init_component, + .init_control_state = NWUNIT_init_control_state, + .start = NWUNIT_start, + .stop = NWUNIT_stop, + .read = NWUNIT_read, + .shutdown_thread = NWUNIT_shutdown_thread, + .cleanup_eventset = NWUNIT_cleanup_eventset, + .ctl = NWUNIT_ctl, + + .update_control_state = NWUNIT_update_control_state, + .set_domain = NWUNIT_set_domain, + .reset = NWUNIT_reset, + + .ntv_name_to_code = NWUNIT_ntv_name_to_code, + .ntv_enum_events = NWUNIT_ntv_enum_events, + .ntv_code_to_name = NWUNIT_ntv_code_to_name, + .ntv_code_to_descr = NWUNIT_ntv_code_to_descr, + .ntv_code_to_bits = NWUNIT_ntv_code_to_bits +}; diff --git a/src/components/bgpm/NWunit/linux-NWunit.h b/src/components/bgpm/NWunit/linux-NWunit.h new file mode 100644 index 0000000..2a4925a --- /dev/null +++ b/src/components/bgpm/NWunit/linux-NWunit.h @@ -0,0 +1,69 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-NWunit.h + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: < your name here > + * < your email address > + * BGPM / NWunit component + * + * Tested version of bgpm (early access) + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for BG/Q through the bgpm library. + */ + +#ifndef _PAPI_NWUNIT_H +#define _PAPI_NWUNIT_H + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "../../../linux-bgq-common.h" + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ + +/* this number assumes that there will never be more events than indicated */ +//#define NWUNIT_MAX_COUNTERS UPC_NW_ALL_LINKCTRS +#define NWUNIT_MAX_COUNTERS UPC_NW_NUM_CTRS +#define NWUNIT_MAX_EVENTS PEVT_NWUNIT_LAST_EVENT +#define OFFSET ( PEVT_IOUNIT_LAST_EVENT + 1 ) + + +/** Structure that stores private information of each event */ +typedef struct NWUNIT_register +{ + unsigned int selector; + /* Signifies which counter slot is being used */ + /* Indexed from 1 as 0 has a special meaning */ +} NWUNIT_register_t; + + +typedef struct NWUNIT_reg_alloc +{ + NWUNIT_register_t ra_bits; +} NWUNIT_reg_alloc_t; + + +typedef struct NWUNIT_control_state +{ + int EventGroup; + long long counts[NWUNIT_MAX_COUNTERS]; +} NWUNIT_control_state_t; + + +typedef struct NWUNIT_context +{ + NWUNIT_control_state_t state; +} NWUNIT_context_t; + + +#endif /* _PAPI_NWUNIT_H */ diff --git a/src/components/bgpm/README b/src/components/bgpm/README new file mode 100644 index 0000000..c6f8739 --- /dev/null +++ b/src/components/bgpm/README @@ -0,0 +1,23 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Dan Terpstra +* terpstra@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: BGPM +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +bgpm/ +Five new components have been added to PAPI to support hardware performance monitoring for the BG/Q platform; in particular the BG/Q network, the I/O system, the Compute Node Kernel in addition to the processing core. There are no specific component configure scripts for L2unit, IOunit, NWunit, CNKunit. In order to configure PAPI for BG/Q, use the following configure options at the papi/src level: + % ./configure --prefix=< your_choice > \ + --with-OS=bgq \ + --with-bgpm_installdir=/bgsys/drivers/ppcfloor \ + CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc \ + F77=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gfortran \ + --with-components="bgpm/L2unit bgpm/CNKunit bgpm/IOunit bgpm/NWunit" + +*/ diff --git a/src/components/coretemp/Rules.coretemp b/src/components/coretemp/Rules.coretemp new file mode 100644 index 0000000..0fbb116 --- /dev/null +++ b/src/components/coretemp/Rules.coretemp @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/coretemp/linux-coretemp.c +COMPOBJS += linux-coretemp.o + +linux-coretemp.o: components/coretemp/linux-coretemp.c components/coretemp/linux-coretemp.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/coretemp/linux-coretemp.c -o linux-coretemp.o diff --git a/src/components/coretemp/linux-coretemp.c b/src/components/coretemp/linux-coretemp.c new file mode 100644 index 0000000..e059cae --- /dev/null +++ b/src/components/coretemp/linux-coretemp.c @@ -0,0 +1,703 @@ +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "linux-coretemp.h" + +/* this is what I found on my core2 machine + * but I have not explored this widely yet*/ +#define REFRESH_LAT 4000 + +#define INVALID_RESULT -1000000L + +papi_vector_t _coretemp_vector; + +/* temporary event */ +struct temp_event { + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + char location[PAPI_MAX_STR_LEN]; + char path[PATH_MAX]; + int stone; + long count; + struct temp_event *next; +}; + + +static CORETEMP_native_event_entry_t * _coretemp_native_events; +static int num_events = 0; +static int is_initialized = 0; + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +static struct temp_event* root = NULL; +static struct temp_event *last = NULL; + +static int +insert_in_list(char *name, char *units, + char *description, char *filename) { + + + struct temp_event *temp; + + + /* new_event path, events->d_name */ + temp = (struct temp_event *) papi_calloc(1, sizeof(struct temp_event)); + if (temp==NULL) { + PAPIERROR("out of memory!"); + /* We should also free any previously allocated data */ + return PAPI_ENOMEM; + } + + temp->next = NULL; + + if (root == NULL) { + root = temp; + } + else if (last) { + last->next = temp; + } + else { + /* Because this is a function, it is possible */ + /* we are called with root!=NULL but no last */ + /* so add this to keep coverity happy */ + free(temp); + PAPIERROR("This shouldn't be possible\n"); + + return PAPI_ECMP; + } + + last = temp; + + snprintf(temp->name, PAPI_MAX_STR_LEN, "%s", name); + snprintf(temp->units, PAPI_MIN_STR_LEN, "%s", units); + snprintf(temp->description, PAPI_MAX_STR_LEN, "%s", description); + snprintf(temp->path, PATH_MAX, "%s", filename); + + return PAPI_OK; +} + +/* + * find all coretemp information reported by the kernel + */ +static int +generateEventList(char *base_dir) +{ + char path[PATH_MAX],filename[PATH_MAX]; + char modulename[PAPI_MIN_STR_LEN], + location[PAPI_MIN_STR_LEN], + units[PAPI_MIN_STR_LEN], + description[PAPI_MAX_STR_LEN], + name[PAPI_MAX_STR_LEN]; + DIR *dir,*d; + FILE *fff; + int count = 0; + struct dirent *hwmonx; + int i,pathnum; + +#define NUM_PATHS 2 + char paths[NUM_PATHS][PATH_MAX]={ + "device","." + }; + + /* Open "/sys/class/hwmon" */ + dir = opendir(base_dir); + if ( dir == NULL ) { + SUBDBG("Can't find %s, are you sure the coretemp module is loaded?\n", + base_dir); + return 0; + } + + /* Iterate each /sys/class/hwmonX/device directory */ + while( (hwmonx = readdir(dir) ) ) { + if ( !strncmp("hwmon", hwmonx->d_name, 5) ) { + + /* Found a hwmon directory */ + + /* Sometimes the files are in ./, sometimes in device/ */ + for(pathnum=0;pathnumd_name,paths[pathnum]); + + SUBDBG("Trying to open %s\n",path); + d = opendir(path); + if (d==NULL) { + continue; + } + + /* Get the name of the module */ + + snprintf(filename, PAPI_MAX_STR_LEN, "%s/name",path); + fff=fopen(filename,"r"); + if (fff==NULL) { + snprintf(modulename, PAPI_MIN_STR_LEN, "Unknown"); + } else { + if (fgets(modulename,PAPI_MIN_STR_LEN,fff)!=NULL) { + modulename[strlen(modulename)-1]='\0'; + } + fclose(fff); + } + + SUBDBG("Found module %s\n",modulename); + + /******************************************************/ + /* Try handling all events starting with in (voltage) */ + /******************************************************/ + + + /* arbitrary maximum */ + /* the problem is the numbering can be sparse */ + /* should probably go back to dirent listing */ + + for(i=0;i<32;i++) { + + /* Try looking for a location label */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/in%d_label", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) { + strncpy(location,"?",PAPI_MIN_STR_LEN); + } + else { + if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { + location[strlen(location)-1]='\0'; + } + fclose(fff); + } + + /* Look for input temperature */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/in%d_input", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) continue; + fclose(fff); + + snprintf(name, PAPI_MAX_STR_LEN, "%s:in%i_input", + hwmonx->d_name, i); + snprintf(units, PAPI_MIN_STR_LEN, "V"); + snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", + units,modulename, + location); + + if (insert_in_list(name,units,description,filename)!=PAPI_OK) { + goto done_error; + } + + count++; + + } + + /************************************************************/ + /* Try handling all events starting with temp (temperature) */ + /************************************************************/ + + for(i=0;i<32;i++) { + + /* Try looking for a location label */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/temp%d_label", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) { + strncpy(location,"?",PAPI_MIN_STR_LEN); + } + else { + if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { + location[strlen(location)-1]='\0'; + } + fclose(fff); + } + + /* Look for input temperature */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/temp%d_input", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) continue; + fclose(fff); + + snprintf(name, PAPI_MAX_STR_LEN, "%s:temp%i_input", + hwmonx->d_name, i); + snprintf(units, PAPI_MIN_STR_LEN, "degrees C"); + snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", + units,modulename, + location); + + if (insert_in_list(name,units,description,filename)!=PAPI_OK) { + goto done_error; + } + + count++; + } + + /************************************************************/ + /* Try handling all events starting with fan (fan) */ + /************************************************************/ + + for(i=0;i<32;i++) { + + /* Try looking for a location label */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/fan%d_label", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) { + strncpy(location,"?",PAPI_MIN_STR_LEN); + } + else { + if (fgets(location,PAPI_MIN_STR_LEN,fff)!=NULL) { + location[strlen(location)-1]='\0'; + } + fclose(fff); + } + + /* Look for input fan */ + snprintf(filename, PAPI_MAX_STR_LEN, "%s/fan%d_input", + path,i); + fff=fopen(filename,"r"); + if (fff==NULL) continue; + fclose(fff); + + snprintf(name, PAPI_MAX_STR_LEN, "%s:fan%i_input", + hwmonx->d_name, i); + snprintf(units, PAPI_MIN_STR_LEN, "RPM"); + snprintf(description, PAPI_MAX_STR_LEN, "%s, %s module, label %s", + units,modulename, + location); + + if (insert_in_list(name,units,description,filename)!=PAPI_OK) { + goto done_error; + } + + count++; + + } + closedir(d); + } + } + } + + closedir(dir); + return count; + +done_error: + closedir(d); + closedir(dir); + return PAPI_ECMP; +} + +static long long +getEventValue( int index ) +{ + char buf[PAPI_MAX_STR_LEN]; + FILE* fp; + long result; + + if (_coretemp_native_events[index].stone) { + return _coretemp_native_events[index].value; + } + + fp = fopen(_coretemp_native_events[index].path, "r"); + if (fp==NULL) { + return INVALID_RESULT; + } + + if (fgets(buf, PAPI_MAX_STR_LEN, fp)==NULL) { + result=INVALID_RESULT; + } + else { + result=strtoll(buf, NULL, 10); + } + fclose(fp); + + return result; +} + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_coretemp_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_coretemp_init_component( int cidx ) +{ + int i = 0; + struct temp_event *t,*last; + + if ( is_initialized ) + return (PAPI_OK ); + + is_initialized = 1; + + /* This is the prefered method, all coretemp sensors are symlinked here + * see $(kernel_src)/Documentation/hwmon/sysfs-interface */ + + num_events = generateEventList("/sys/class/hwmon"); + + if ( num_events < 0 ) { + strncpy(_coretemp_vector.cmp_info.disabled_reason, + "Cannot open /sys/class/hwmon",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + if ( num_events == 0 ) { + strncpy(_coretemp_vector.cmp_info.disabled_reason, + "No coretemp events found",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + t = root; + + _coretemp_native_events = (CORETEMP_native_event_entry_t*) + papi_calloc(num_events, sizeof(CORETEMP_native_event_entry_t)); + + do { + strncpy(_coretemp_native_events[i].name,t->name,PAPI_MAX_STR_LEN); + _coretemp_native_events[i].name[PAPI_MAX_STR_LEN-1] = '\0'; + strncpy(_coretemp_native_events[i].path,t->path,PATH_MAX); + _coretemp_native_events[i].path[PATH_MAX-1] = '\0'; + strncpy(_coretemp_native_events[i].units,t->units,PAPI_MIN_STR_LEN); + _coretemp_native_events[i].units[PAPI_MIN_STR_LEN-1] = '\0'; + strncpy(_coretemp_native_events[i].description,t->description,PAPI_MAX_STR_LEN); + _coretemp_native_events[i].description[PAPI_MAX_STR_LEN-1] = '\0'; + _coretemp_native_events[i].stone = 0; + _coretemp_native_events[i].resources.selector = i + 1; + last = t; + t = t->next; + papi_free(last); + i++; + } while (t != NULL); + root = NULL; + + /* Export the total number of events available */ + _coretemp_vector.cmp_info.num_native_events = num_events; + + /* Export the component id */ + _coretemp_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_coretemp_init_control_state( hwd_control_state_t * ctl) +{ + int i; + + CORETEMP_control_state_t *coretemp_ctl = (CORETEMP_control_state_t *) ctl; + + for ( i=0; i < num_events; i++ ) { + coretemp_ctl->counts[i] = getEventValue(i); + } + + /* Set last access time for caching results */ + coretemp_ctl->lastupdate = PAPI_get_real_usec(); + + return PAPI_OK; +} + +static int +_coretemp_start( hwd_context_t *ctx, hwd_control_state_t *ctl) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + +static int +_coretemp_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long ** events, int flags) +{ + (void) flags; + (void) ctx; + + CORETEMP_control_state_t* control = (CORETEMP_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + + /* Only read the values from the kernel if enough time has passed */ + /* since the last read. Otherwise return cached values. */ + + if ( now - control->lastupdate > REFRESH_LAT ) { + for ( i = 0; i < num_events; i++ ) { + control->counts[i] = getEventValue( i ); + } + control->lastupdate = now; + } + + /* Pass back a pointer to our results */ + *events = control->counts; + + return PAPI_OK; +} + +static int +_coretemp_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + /* read values */ + CORETEMP_control_state_t* control = (CORETEMP_control_state_t*) ctl; + int i; + + for ( i = 0; i < num_events; i++ ) { + control->counts[i] = getEventValue( i ); + } + + return PAPI_OK; +} + +/* Shutdown a thread */ +static int +_coretemp_shutdown_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + +/* + * Clean up what was setup in coretemp_init_component(). + */ +static int +_coretemp_shutdown_component( ) +{ + if ( is_initialized ) { + is_initialized = 0; + papi_free(_coretemp_native_events); + _coretemp_native_events = NULL; + } + return PAPI_OK; +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_coretemp_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +static int +_coretemp_update_control_state( hwd_control_state_t *ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ + int i, index; + ( void ) ctx; + ( void ) ptr; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + native[i].ni_position = _coretemp_native_events[index].resources.selector - 1; + } + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_coretemp_set_domain( hwd_control_state_t * cntl, int domain ) +{ + (void) cntl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int +_coretemp_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_coretemp_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + int index; + + switch ( modifier ) { + + case PAPI_ENUM_FIRST: + + if (num_events==0) { + return PAPI_ENOEVNT; + } + *EventCode = 0; + + return PAPI_OK; + + + case PAPI_ENUM_EVENTS: + + index = *EventCode; + + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +/* + * + */ +static int +_coretemp_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _coretemp_native_events[index].name, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +/* + * + */ +static int +_coretemp_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _coretemp_native_events[index].description, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +static int +_coretemp_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; + + strncpy( info->symbol, _coretemp_native_events[index].name, sizeof(info->symbol)); + strncpy( info->long_descr, _coretemp_native_events[index].description, sizeof(info->long_descr)); + strncpy( info->units, _coretemp_native_events[index].units, sizeof(info->units)); + info->units[sizeof(info->units)-1] = '\0'; + + return PAPI_OK; +} + + + +/* + * + */ +papi_vector_t _coretemp_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "coretemp", + .short_name = "coretemp", + .description = "Linux hwmon temperature and other info", + .version = "4.2.1", + .num_mpx_cntrs = CORETEMP_MAX_COUNTERS, + .num_cntrs = CORETEMP_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( CORETEMP_context_t ), + .control_state = sizeof ( CORETEMP_control_state_t ), + .reg_value = sizeof ( CORETEMP_register_t ), + .reg_alloc = sizeof ( CORETEMP_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = _coretemp_init_thread, + .init_component = _coretemp_init_component, + .init_control_state = _coretemp_init_control_state, + .start = _coretemp_start, + .stop = _coretemp_stop, + .read = _coretemp_read, + .shutdown_thread = _coretemp_shutdown_thread, + .shutdown_component = _coretemp_shutdown_component, + .ctl = _coretemp_ctl, + + .update_control_state = _coretemp_update_control_state, + .set_domain = _coretemp_set_domain, + .reset = _coretemp_reset, + + .ntv_enum_events = _coretemp_ntv_enum_events, + .ntv_code_to_name = _coretemp_ntv_code_to_name, + .ntv_code_to_descr = _coretemp_ntv_code_to_descr, + .ntv_code_to_info = _coretemp_ntv_code_to_info, +}; diff --git a/src/components/coretemp/linux-coretemp.h b/src/components/coretemp/linux-coretemp.h new file mode 100644 index 0000000..475cc91 --- /dev/null +++ b/src/components/coretemp/linux-coretemp.h @@ -0,0 +1,89 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-coretemp.h + * CVS: $Id$ + * @author James Ralph + * ralph@eecs.utk.edu + * + * @ingroup papi_components + * + * @brief coretemp component + * This file has the source code for a component that enables PAPI-C to access + * hardware monitoring sensors through the coretemp sysfs interface. This code + * will dynamically create a native events table for all the sensors that can + * be found under /sys/class/hwmon/hwmon[0-9]+. + * + * Notes: + * - Based heavily upon the lm-sensors component by Heike Jagode. + */ + +#ifndef _PAPI_CORETEMP_H +#define _PAPI_CORETEMP_H + +#include +#include + + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ +/* this number assumes that there will never be more events than indicated */ +#define CORETEMP_MAX_COUNTERS 512 + +/** Structure that stores private information of each event */ +typedef struct CORETEMP_register +{ + /* This is used by the framework.It likes it to be !=0 to do somehting */ + unsigned int selector; + /* These are the only information needed to locate a libsensors event */ + int subfeat_nr; +} CORETEMP_register_t; + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + + + +/** This structure is used to build the table of events */ +typedef struct CORETEMP_native_event_entry +{ + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + char path[PATH_MAX]; + int stone; /* some counters are set in stone, a max temperature is just that... */ + long value; + CORETEMP_register_t resources; +} CORETEMP_native_event_entry_t; + +typedef struct CORETEMP_reg_alloc +{ + CORETEMP_register_t ra_bits; +} CORETEMP_reg_alloc_t; + + +typedef struct CORETEMP_control_state +{ + long long counts[CORETEMP_MAX_COUNTERS]; // used for caching + long long lastupdate; +} CORETEMP_control_state_t; + + +typedef struct CORETEMP_context +{ + CORETEMP_control_state_t state; +} CORETEMP_context_t; + + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ + + +#endif /* _PAPI_CORETEMP_H */ diff --git a/src/components/coretemp/tests/Makefile b/src/components/coretemp/tests/Makefile new file mode 100644 index 0000000..37723b2 --- /dev/null +++ b/src/components/coretemp/tests/Makefile @@ -0,0 +1,23 @@ +NAME=coretemp +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = coretemp_basic coretemp_pretty + +coretemp_tests: $(TESTS) + +coretemp_basic: coretemp_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o coretemp_basic coretemp_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +coretemp_pretty: coretemp_pretty.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o coretemp_pretty coretemp_pretty.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/coretemp/tests/coretemp_basic.c b/src/components/coretemp/tests/coretemp_basic.c new file mode 100644 index 0000000..544f6b7 --- /dev/null +++ b/src/components/coretemp/tests/coretemp_basic.c @@ -0,0 +1,142 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Vince Weaver + * + * test case for coretemp component + * + * + * @brief + * Tests basic coretemp functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp,coretemp_cid=-1; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all coretemp events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"coretemp")) { + coretemp_cid=cid; + if (!TESTS_QUIET) { + printf("Found coretemp component at cid %d\n", coretemp_cid); + } + + if (cmpinfo->disabled) { + if (!TESTS_QUIET) fprintf(stderr,"Coretemp component disabled: %s\n", + cmpinfo->disabled_reason); + test_skip(__FILE__, __LINE__, + "Component disabled\n", 0); + } + } + } + + if (coretemp_cid==-1) { + test_skip(__FILE__,__LINE__,"No coretemp component found",0); + } + + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, coretemp_cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) printf("%s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!TESTS_QUIET) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, coretemp_cid ); + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No coretemp events found",0); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/coretemp/tests/coretemp_pretty.c b/src/components/coretemp/tests/coretemp_pretty.c new file mode 100644 index 0000000..0ed6f13 --- /dev/null +++ b/src/components/coretemp/tests/coretemp_pretty.c @@ -0,0 +1,267 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Vince Weaver + * + * test case that displays "pretty" coretemp output + * + * @brief + * Shows "pretty" coretemp output + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,coretemp_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + PAPI_event_info_t evinfo; + double temperature; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all coretemp events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"coretemp")) { + coretemp_cid=cid; + if (!TESTS_QUIET) printf("Found coretemp component at cid %d\n", + coretemp_cid); + if (cmpinfo->disabled) { + if (!TESTS_QUIET) fprintf(stderr,"Coretemp component disabled: %s\n", + cmpinfo->disabled_reason); + test_skip(__FILE__, __LINE__, + "Component disabled\n", 0); + } + if (cmpinfo->num_native_events==0) { + test_skip(__FILE__,__LINE__,"No coretemp events found",0); + } + break; + } + } + + + + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, coretemp_cid ); + + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + retval = PAPI_get_event_info(code,&evinfo); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, + "Error getting event info\n",retval); + } + + /****************************/ + /* Print Temperature Inputs */ + /****************************/ + if (strstr(event_name,"temp")) { + + /* Only print inputs */ + if (strstr(event_name,"_input")) { + + if (!TESTS_QUIET) printf("%s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + temperature=(values[0]/1000.0); + + if (!TESTS_QUIET) printf("\tvalue: %.2lf %s\n", + temperature, + evinfo.long_descr + ); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + } + } + + /****************************/ + /* Print Voltage Inputs */ + /****************************/ + if (strstr(event_name,".in")) { + + /* Only print inputs */ + if (strstr(event_name,"_input")) { + + if (!TESTS_QUIET) printf("%s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + temperature=(values[0]/1000.0); + + if (!TESTS_QUIET) printf("\tvalue: %.2lf %s\n", + temperature, + evinfo.long_descr + ); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + } + } + /********************/ + /* Print Fan Inputs */ + /********************/ + else if (strstr(event_name,"fan")) { + + /* Only print inputs */ + if (strstr(event_name,"_input")) { + + if (!TESTS_QUIET) printf("%s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!TESTS_QUIET) printf("\tvalue: %lld %s\n",values[0], + evinfo.long_descr); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + } + + } + else { + /* Skip unknown */ + } + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, coretemp_cid ); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/coretemp_freebsd/README b/src/components/coretemp_freebsd/README new file mode 100644 index 0000000..1a59f87 --- /dev/null +++ b/src/components/coretemp_freebsd/README @@ -0,0 +1,17 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Dan Terpstra +* terpstra@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: CoreTemp FreeBSD +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +coretemp_freebsd/ +This component is intended to access CPU On-Die Thermal Sensors in the Intel Core architecture in a FreeBSD machine using the coretemp.ko kernel module. The returned values represent Kelvin degrees. + +*/ \ No newline at end of file diff --git a/src/components/coretemp_freebsd/Rules.coretemp_freebsd b/src/components/coretemp_freebsd/Rules.coretemp_freebsd new file mode 100644 index 0000000..42d7aae --- /dev/null +++ b/src/components/coretemp_freebsd/Rules.coretemp_freebsd @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/coretemp_freebsd/coretemp_freebsd.c +COMPOBJS += coretemp_freebsd.o + +coretemp_freebsd.o: components/coretemp_freebsd/coretemp_freebsd.c components/coretemp_freebsd/coretemp_freebsd.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/coretemp_freebsd/coretemp_freebsd.c -o coretemp_freebsd.o diff --git a/src/components/coretemp_freebsd/coretemp_freebsd.c b/src/components/coretemp_freebsd/coretemp_freebsd.c new file mode 100644 index 0000000..3902d01 --- /dev/null +++ b/src/components/coretemp_freebsd/coretemp_freebsd.c @@ -0,0 +1,488 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file coretemp_freebsd.c + * @author Joachim Protze + * joachim.protze@zih.tu-dresden.de + * @author Vince Weaver + * vweaver1@eecs.utk.edu + * @author Harald Servat + * harald.servat@gmail.com + * + * @ingroup papi_components + * + * @brief + * This component is intended to access CPU On-Die Thermal Sensors in + * the Intel Core architecture in a FreeBSD machine using the coretemp.ko + * kernel module. + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#define CORETEMP_MAX_COUNTERS 32 /* Can we tune this dynamically? */ +#define TRUE (1==1) +#define FALSE (1!=1) +#define UNREFERENCED(x) (void)x + +/* Structure that stores private information for each event */ +typedef struct coretemp_register +{ + int mib[4]; + /* Access to registers through these MIBs + sysctl (3) call */ + + unsigned int selector; + /**< Signifies which counter slot is being used */ + /**< Indexed from 1 as 0 has a special meaning */ +} coretemp_register_t; + +/** This structure is used to build the table of events */ +typedef struct coretemp_native_event_entry +{ + coretemp_register_t resources; /**< Per counter resources */ + char name[PAPI_MAX_STR_LEN]; /**< Name of the counter */ + char description[PAPI_MAX_STR_LEN]; /**< Description of the counter */ +} coretemp_native_event_entry_t; + +/* This structure is used when doing register allocation + it possibly is not necessary when there are no + register constraints */ +typedef struct coretemp_reg_alloc +{ + coretemp_register_t ra_bits; +} coretemp_reg_alloc_t; + +/* Holds control flags, usually out-of band configuration of the hardware */ +typedef struct coretemp_control_state +{ + int added[CORETEMP_MAX_COUNTERS]; + long_long counters[CORETEMP_MAX_COUNTERS]; /**< Copy of counts, used for caching */ +} coretemp_control_state_t; + +/* Holds per-thread information */ +typedef struct coretemp_context +{ + coretemp_control_state_t state; +} coretemp_context_t; + +/** This table contains the native events */ +static coretemp_native_event_entry_t *coretemp_native_table; + +/** number of events in the table*/ +static int CORETEMP_NUM_EVENTS = 0; + + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + +/** This is called whenever a thread is initialized */ +int coretemp_init_thread (hwd_context_t * ctx) +{ + int mib[4]; + size_t len; + UNREFERENCED(ctx); + + SUBDBG("coretemp_init_thread %p...\n", ctx); + +#if 0 + /* what does this do? VMW */ + + len = 4; + if (sysctlnametomib ("dev.coretemp.0.%driver", mib, &len) == -1) + return PAPI_ECMP; +#endif + + return PAPI_OK; +} + + +/** Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int coretemp_init_component () +{ + int ret; + int i; + int mib[4]; + size_t len; + char tmp[128]; + + SUBDBG("coretemp_init_component...\n"); + + /* Count the number of cores (counters) that have sensors allocated */ + i = 0; + CORETEMP_NUM_EVENTS = 0; + sprintf (tmp, "dev.coretemp.%d.%%driver", i); + len = 4; + ret = sysctlnametomib (tmp, mib, &len); + while (ret != -1) + { + CORETEMP_NUM_EVENTS++; + i++; + sprintf (tmp, "dev.coretemp.%d.%%driver", i); + len = 4; + ret = sysctlnametomib (tmp, mib, &len); + } + + if (CORETEMP_NUM_EVENTS == 0) + return PAPI_OK; + + /* Allocate memory for the our event table */ + coretemp_native_table = (coretemp_native_event_entry_t *) + papi_malloc (sizeof (coretemp_native_event_entry_t) * CORETEMP_NUM_EVENTS); + if (coretemp_native_table == NULL) + { + perror( "malloc():Could not get memory for coretemp events table" ); + return PAPI_ENOMEM; + } + + /* Allocate native events internal structures */ + for (i = 0; i < CORETEMP_NUM_EVENTS; i++) + { + /* Event name */ + sprintf (coretemp_native_table[i].name, "CORETEMP_CPU_%d", i); + + /* Event description */ + sprintf (coretemp_native_table[i].description, "CPU On-Die Thermal Sensor #%d", i); + + /* Event extra bits -> save MIB to faster access later */ + sprintf (tmp, "dev.cpu.%d.temperature", i); + len = 4; + if (sysctlnametomib (tmp, coretemp_native_table[i].resources.mib, &len) == -1) + return PAPI_ECMP; + + coretemp_native_table[i].resources.selector = i+1; + } + + return PAPI_OK; +} + + +/** Setup the counter control structure */ +int coretemp_init_control_state (hwd_control_state_t * ctrl) +{ + int i; + + SUBDBG("coretemp_init_control_state... %p\n", ctrl); + coretemp_control_state_t *c = (coretemp_control_state_t *) ctrl; + + for (i = 0; i < CORETEMP_MAX_COUNTERS; i++) + c->added[i] = FALSE; + + return PAPI_OK; +} + + +/** Enumerate Native Events + @param EventCode is the event of interest + @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS +*/ +int coretemp_ntv_enum_events (unsigned int *EventCode, int modifier) +{ + + switch ( modifier ) + { + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + break; + + /* return EventCode of passed-in Event */ + case PAPI_ENUM_EVENTS: + { + int index = *EventCode; + + if ( index < CORETEMP_NUM_EVENTS - 1 ) + { + *EventCode = *EventCode + 1; + return PAPI_OK; + } + else + return PAPI_ENOEVNT; + break; + } + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + +/** Takes a native event code and passes back the name + @param EventCode is the native event code + @param name is a pointer for the name to be copied to + @param len is the size of the string + */ +int coretemp_ntv_code_to_name (unsigned int EventCode, char *name, int len) +{ + int index = EventCode; + + strncpy( name, coretemp_native_table[index].name, len ); + + return PAPI_OK; +} + +/** Takes a native event code and passes back the event description + @param EventCode is the native event code + @param name is a pointer for the description to be copied to + @param len is the size of the string + */ +int coretemp_ntv_code_to_descr (unsigned int EventCode, char *name, int len) +{ + int index = EventCode; + + strncpy( name, coretemp_native_table[index].description, len ); + + return PAPI_OK; +} + +/** This takes an event and returns the bits that would be written + out to the hardware device (this is very much tied to CPU-type support */ +int coretemp_ntv_code_to_bits (unsigned int EventCode, hwd_register_t * bits) +{ + UNREFERENCED(EventCode); + UNREFERENCED(bits); + + return PAPI_OK; +} + +/** Triggered by eventset operations like add or remove */ +int coretemp_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, hwd_context_t * ctx ) +{ + int i, index; + coretemp_control_state_t *c = (coretemp_control_state_t *) ptr; + UNREFERENCED(ctx); + + SUBDBG("coretemp_update_control_state %p %p...\n", ptr, ctx); + + for (i = 0; i < count; i++) + { + index = native[i].ni_event; + native[i].ni_position = coretemp_native_table[index].resources.selector - 1; + c->added[native[i].ni_position] = TRUE; + + SUBDBG ("\nnative[%i].ni_position = coretemp_native_table[%i].resources.selector-1 = %i;\n", + i, index, native[i].ni_position ); + } + + return PAPI_OK; +} + +/** Triggered by PAPI_start() */ +int coretemp_start (hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + UNREFERENCED(ctx); + UNREFERENCED(ctrl); + + SUBDBG( "coretemp_start %p %p...\n", ctx, ctrl ); + + /* Nothing to be done */ + + return PAPI_OK; +} + + +/** Triggered by PAPI_stop() */ +int coretemp_stop (hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + UNREFERENCED(ctx); + UNREFERENCED(ctrl); + + SUBDBG("coretemp_stop %p %p...\n", ctx, ctrl); + + /* Nothing to be done */ + + return PAPI_OK; +} + + +/** Triggered by PAPI_read() */ +int coretemp_read (hwd_context_t * ctx, hwd_control_state_t * ctrl, + long_long ** events, int flags) +{ + int i; + coretemp_control_state_t *c = (coretemp_control_state_t *) ctrl; + UNREFERENCED(ctx); + UNREFERENCED(flags); + + SUBDBG("coretemp_read... %p %d\n", ctx, flags); + + for (i = 0; i < CORETEMP_MAX_COUNTERS; i++) + if (c->added[i]) + { + int tmp; + size_t len = sizeof(tmp); + + if (sysctl (coretemp_native_table[i].resources.mib, 4, &tmp, &len, NULL, 0) == -1) + c->counters[i] = 0; + else + c->counters[i] = tmp/10; + /* Coretemp module returns temperature in tenths of kelvin + Kelvin are useful to avoid negative values... but will have + negative temperatures ??? */ + } + + *events = c->counters; + + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +int coretemp_write (hwd_context_t * ctx, hwd_control_state_t * ctrl, + long_long events[] ) +{ + UNREFERENCED(ctx); + UNREFERENCED(events); + UNREFERENCED(ctrl); + + SUBDBG("coretemp_write... %p %p\n", ctx, ctrl); + + /* These sensor counters cannot be writtn */ + + return PAPI_OK; +} + + +/** Triggered by PAPI_reset */ +int coretemp_reset(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + UNREFERENCED(ctx); + UNREFERENCED(ctrl); + + SUBDBG("coretemp_reset ctx=%p ctrl=%p...\n", ctx, ctrl); + + /* These sensors cannot be reseted */ + + return PAPI_OK; +} + +/** Triggered by PAPI_shutdown() */ +int coretemp_shutdown_component (void) +{ + + SUBDBG( "coretemp_shutdown_component... %p\n"); + + /* Last chance to clean up */ + papi_free (coretemp_native_table); + + return PAPI_OK; +} + + + +/** This function sets various options in the component + @param ctx unused + @param code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param option unused + */ +int coretemp_ctl (hwd_context_t * ctx, int code, _papi_int_option_t * option) +{ + UNREFERENCED(ctx); + UNREFERENCED(code); + UNREFERENCED(option); + + SUBDBG( "coretemp_ctl... %p %d %p\n", ctx, code, option ); + + /* FIXME. This should maybe set up more state, such as which counters are active and */ + /* counter mappings. */ + + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +int coretemp_set_domain (hwd_control_state_t * cntrl, int domain) +{ + UNREFERENCED(cntrl); + + SUBDBG ("coretemp_set_domain... %p %d\n", cntrl, domain); + + if (PAPI_DOM_ALL & domain) + { + SUBDBG( " PAPI_DOM_ALL \n" ); + return PAPI_OK; + } + return PAPI_EINVAL ; + +} + + +/** Vector that points to entry points for our component */ +papi_vector_t _coretemp_freebsd_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "coretemp_freebsd", + .short_name = "coretemp", + .version = "5.0", + .num_mpx_cntrs = CORETEMP_MAX_COUNTERS, + .num_cntrs = CORETEMP_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( coretemp_context_t ), + .control_state = sizeof ( coretemp_control_state_t ), + .reg_value = sizeof ( coretemp_register_t ), + .reg_alloc = sizeof ( coretemp_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = coretemp_init_thread, + .init_component = coretemp_init_component, + .init_control_state = coretemp_init_control_state, + .start = coretemp_start, + .stop = coretemp_stop, + .read = coretemp_read, + .write = coretemp_write, + .shutdown_component = coretemp_shutdown_component, + .ctl = coretemp_ctl, + + .update_control_state = coretemp_update_control_state, + .set_domain = coretemp_set_domain, + .reset = coretemp_reset, + + .ntv_enum_events = coretemp_ntv_enum_events, + .ntv_code_to_name = coretemp_ntv_code_to_name, + .ntv_code_to_descr = coretemp_ntv_code_to_descr, + .ntv_code_to_bits = coretemp_ntv_code_to_bits, +}; + diff --git a/src/components/coretemp_freebsd/coretemp_freebsd.h b/src/components/coretemp_freebsd/coretemp_freebsd.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/components/coretemp_freebsd/coretemp_freebsd.h diff --git a/src/components/cuda/README b/src/components/cuda/README new file mode 100644 index 0000000..3f3b655 --- /dev/null +++ b/src/components/cuda/README @@ -0,0 +1,87 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Asim YarKhan yarkhan@icl.utk.edu +* @author: Heike McCraw mccraw@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: CUDA +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +cuda/ + +CUDA component update: Support for CUPTI metrics (early release) + + +Known problems and limitations in early release of metric support +----------------------------------------------------------------- + +* Only sets of metrics and events that can be gathered in a single + pass are supported. Transparent multi-pass support is expected +* All metrics are returned as long long integers, which means that + CUPTI double precision values will be truncated, possibly severely. +* The NVLink metrics have been disabled for this alpha release. + + +General information +------------------- + +The PAPI CUDA component is a hardware performance counter +measurement technology for the NVIDIA CUDA platform which provides +access to the hardware counters inside the GPU. PAPI CUDA is based on +CUPTI support in the NVIDIA driver library. In any environment where +the CUPTI-enabled driver is installed, the PAPI CUDA component should +be able to provide detailed performance counter information regarding +events on the GPU kernels. + +NOTE: When adding CUDA related events or metrics to the CUDA +component, each event can be added within a users specified CUDA +context. If the event is outside its context or in no context, a +default CUDA context will be created for the event. + +NOTE: In order to disable and destroy the CUDA eventGroup properly, +the user has to call PAPI_cleanup_eventset( EventSet ) before calling +PAPI_shutdown() in the application. This is important since it also +frees the performance monitoring hardware on the GPU. + + +How to install PAPI with the CUDA component? +-------------------------------------------- + +This PAPI CUDA componen has been developed and tested using CUDA +version 8.0 and the associated CUPTI library. CUPTI is released with +the CUDA Tools SDK. + +This component uses the CUDA location from the environment (or looks +for /opt/cuda by default). Please set CUDA_DIR and CUPTI_DIR during +builds so that the component can find the required header files. + +Configure PAPI with CUDA enabled. + % cd src + % ./configure --prefix=some_location --with-components="cuda" + +Build with CUDA_DIR and CUPTI_DIR specified + % export CUDA_DIR=/opt/cuda + % export CUPTI_DIR=/opt/cuda/extras/CUPTI + % make + +Testing the component requires that libraries for PAPI, CUDA, CUPTI +can be found or are statically linked in to the executable. You may +need to add the library directories as shown here. + + % export LD_LIBRARY_PATH=${CUDA_DIR}/lib64:${CUPTI_DIR}/lib64:${LD_LIBRARY_PATH} + +Test by running from the src directory + % ./components/cuda/tests/simpleMultiGPU + +For general information on how to create and run components, the user +is referred to the INSTALL.txt section "CREATING AND RUNNING +COMPONENTS". + +To find a list of CUDA supported events. + % utils/papi_native_avail | grep CUDA + +*/ diff --git a/src/components/cuda/Rules.cuda b/src/components/cuda/Rules.cuda new file mode 100644 index 0000000..ad08063 --- /dev/null +++ b/src/components/cuda/Rules.cuda @@ -0,0 +1,20 @@ +# $Id$ + +CUDA_DIR ?= /opt/cuda +CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI + +COMPSRCS += components/cuda/linux-cuda.c +COMPOBJS += linux-cuda.o +CFLAGS += -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include -g +LDFLAGS += -L$(CUPTI_DIR)/lib64 -lcupti $(LDL) -Wl,-rpath=$(CUPTI_DIR)/lib64 -g + +linux-cuda.o: components/cuda/linux-cuda.c $(HEADERS) cuda_sampling + $(CC) -E $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/linux-cuda.c -o linux-cuda.pre + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/cuda/linux-cuda.c -o linux-cuda.o + +cuda_sampling: + cd components/cuda/sampling; $(MAKE); + +native_clean: + cd components/cuda/sampling; $(MAKE) clean + diff --git a/src/components/cuda/linux-cuda.c b/src/components/cuda/linux-cuda.c new file mode 100644 index 0000000..11a13db --- /dev/null +++ b/src/components/cuda/linux-cuda.c @@ -0,0 +1,1312 @@ +/** + * @file linux-cuda.c + * @author Asim YarKhan yarkhan@icl.utk.edu (updated in 2017 to support CUDA metrics) + * @author Asim YarKhan yarkhan@icl.utk.edu (updated in 2015 for multiple CUDA contexts/devices) + * @author Heike Jagode (First version, in collaboration with Robert Dietrich, TU Dresden) jagode@icl.utk.edu + * + * @ingroup papi_components + * + * @brief This implements a PAPI component that enables PAPI-C to + * access hardware monitoring counters for NVIDIA CUDA GPU devices + * through the CUPTI library. + * + * The open source software license for PAPI conforms to the BSD + * License template. + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "papi_vector.h" + +/* this number assumes that there will never be more events than indicated */ +#define PAPICUDA_MAX_COUNTERS 512 + +// #define PAPICUDA_KERNEL_REPLAY_MODE + +/* Contains device list, pointer to device desciption, and the list of available events */ +typedef struct papicuda_context { + int deviceCount; + struct papicuda_device_desc *deviceArray; + uint32_t availEventSize; + CUpti_ActivityKind *availEventKind; + int *availEventDeviceNum; + uint32_t *availEventIDArray; + uint32_t *availEventIsBeingMeasuredInEventset; + struct papicuda_name_desc *availEventDesc; +} papicuda_context_t; + +/* Store the name and description for an event */ +typedef struct papicuda_name_desc { + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_2MAX_STR_LEN]; +} papicuda_name_desc_t; + +/* For a device, store device description */ +typedef struct papicuda_device_desc { + CUdevice cuDev; + int deviceNum; + char deviceName[PAPI_MIN_STR_LEN]; + uint32_t maxDomains; /* number of domains per device */ + CUpti_EventDomainID *domainIDArray; /* Array[maxDomains] of domain IDs */ + uint32_t *domainIDNumEvents; /* Array[maxDomains] of num of events in that domain */ +} papicuda_device_desc_t; + +/* Control structure tracks array of active contexts, records active events and their values */ +typedef struct papicuda_control { + uint32_t countOfActiveCUContexts; + struct papicuda_active_cucontext_s *arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]; + uint32_t activeEventCount; + int activeEventIndex[PAPICUDA_MAX_COUNTERS]; + long long activeEventValues[PAPICUDA_MAX_COUNTERS]; + uint64_t cuptiStartTimestampNs; + uint64_t cuptiReadTimestampNs; +} papicuda_control_t; + +/* For each active context, which CUDA events are being measured, context eventgroups containing events */ +typedef struct papicuda_active_cucontext_s { + CUcontext cuCtx; + int deviceNum; + uint32_t conMetricsCount; + CUpti_EventID conMetrics[PAPICUDA_MAX_COUNTERS]; + CUpti_MetricValue conMetricValues[PAPICUDA_MAX_COUNTERS]; + uint32_t conEventsCount; + CUpti_EventID conEvents[PAPICUDA_MAX_COUNTERS]; + uint64_t conEventValues[PAPICUDA_MAX_COUNTERS]; + CUpti_EventGroupSets *eventGroupPasses; +} papicuda_active_cucontext_t; + +// file handles used to access cuda libraries with dlopen +static void *dl1 = NULL; +static void *dl2 = NULL; +static void *dl3 = NULL; + +/* The PAPI side (external) variable as a global */ +papi_vector_t _cuda_vector; + +/* Global variable for hardware description, event and metric lists */ +static papicuda_context_t *global_papicuda_context = NULL; + +/* This global variable points to the head of the control state list */ +static papicuda_control_t *global_papicuda_control = NULL; + +/* Macros for error checking... each arg is only referenced/evaluated once */ +#define CHECK_PRINT_EVAL( checkcond, str, evalthis ) \ + do { \ + int _cond = (checkcond); \ + if (_cond) { \ + SUBDBG("error: condition %s failed: %s.\n", #checkcond, str); \ + evalthis; \ + } \ + } while (0) + +#define CUDA_CALL( call, handleerror ) \ + do { \ + cudaError_t _status = (call); \ + if (_status != cudaSuccess) { \ + SUBDBG("error: function %s failed with error %d.\n", #call, _status); \ + handleerror; \ + } \ + } while (0) + +#define CU_CALL( call, handleerror ) \ + do { \ + CUresult _status = (call); \ + if (_status != CUDA_SUCCESS) { \ + SUBDBG("error: function %s failed with error %d.\n", #call, _status); \ + handleerror; \ + } \ + } while (0) + + +#define CUPTI_CALL(call, handleerror) \ + do { \ + CUptiResult _status = (call); \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + (*cuptiGetResultStringPtr)(_status, &errstr); \ + SUBDBG("error: function %s failed with error %s.\n", #call, errstr); \ + handleerror; \ + } \ + } while (0) + +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define ALIGN_BUFFER(buffer, align) \ + (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer)) + +/* Function prototypes */ +static int papicuda_cleanup_eventset(hwd_control_state_t * ctrl); + +/* ****** CHANGE PROTOTYPES TO DECLARE CUDA LIBRARY SYMBOLS AS WEAK ********** + * This is done so that a version of PAPI built with the cuda component can * + * be installed on a system which does not have the cuda libraries installed. * + * * + * If this is done without these prototypes, then all papi services on the * + * system without the cuda libraries installed will fail. The PAPI libraries * + * contain references to the cuda libraries which are not installed. The * + * load of PAPI commands fails because the cuda library references can not be * + * resolved. * + * * + * This also defines pointers to the cuda library functions that we call. * + * These function pointers will be resolved with dlopen/dlsym calls at * + * component initialization time. The component then calls the cuda library * + * functions through these function pointers. * + *******************************************************************************/ +void (*_dl_non_dynamic_init) (void) __attribute__ ((weak)); + +#define CUAPIWEAK __attribute__( ( weak ) ) +#define DECLARECUFUNC(funcname, funcsig) CUresult CUAPIWEAK funcname funcsig; CUresult( *funcname##Ptr ) funcsig; +DECLARECUFUNC(cuCtxGetCurrent, (CUcontext *)); +DECLARECUFUNC(cuCtxSetCurrent, (CUcontext)); +DECLARECUFUNC(cuDeviceGet, (CUdevice *, int)); +DECLARECUFUNC(cuDeviceGetCount, (int *)); +DECLARECUFUNC(cuDeviceGetName, (char *, int, CUdevice)); +DECLARECUFUNC(cuInit, (unsigned int)); +DECLARECUFUNC(cuCtxPopCurrent, (CUcontext * pctx)); +DECLARECUFUNC(cuCtxPushCurrent, (CUcontext pctx)); +DECLARECUFUNC(cuCtxSynchronize, ()); + +#define CUDAAPIWEAK __attribute__( ( weak ) ) +#define DECLARECUDAFUNC(funcname, funcsig) cudaError_t CUDAAPIWEAK funcname funcsig; cudaError_t( *funcname##Ptr ) funcsig; +DECLARECUDAFUNC(cudaGetDevice, (int *)); +DECLARECUDAFUNC(cudaSetDevice, (int)); +DECLARECUDAFUNC(cudaFree, (void *)); + +#define CUPTIAPIWEAK __attribute__( ( weak ) ) +#define DECLARECUPTIFUNC(funcname, funcsig) CUptiResult CUPTIAPIWEAK funcname funcsig; CUptiResult( *funcname##Ptr ) funcsig; +/* CUptiResult CUPTIAPIWEAK cuptiDeviceEnumEventDomains( CUdevice, size_t *, CUpti_EventDomainID * ); */ +/* CUptiResult( *cuptiDeviceEnumEventDomainsPtr )( CUdevice, size_t *, CUpti_EventDomainID * ); */ +DECLARECUPTIFUNC(cuptiDeviceEnumMetrics, (CUdevice device, size_t * arraySizeBytes, CUpti_MetricID * metricArray)); +DECLARECUPTIFUNC(cuptiDeviceGetEventDomainAttribute, (CUdevice device, CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t * valueSize, void *value)); +DECLARECUPTIFUNC(cuptiDeviceGetNumMetrics, (CUdevice device, uint32_t * numMetrics)); +DECLARECUPTIFUNC(cuptiEventGroupGetAttribute, (CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t * valueSize, void *value)); +DECLARECUPTIFUNC(cuptiEventGroupReadEvent, (CUpti_EventGroup eventGroup, CUpti_ReadEventFlags flags, CUpti_EventID event, size_t * eventValueBufferSizeBytes, uint64_t * eventValueBuffer)); +DECLARECUPTIFUNC(cuptiEventGroupSetAttribute, (CUpti_EventGroup eventGroup, CUpti_EventGroupAttribute attrib, size_t valueSize, void *value)); +DECLARECUPTIFUNC(cuptiEventGroupSetDisable, (CUpti_EventGroupSet * eventGroupSet)); +DECLARECUPTIFUNC(cuptiEventGroupSetEnable, (CUpti_EventGroupSet * eventGroupSet)); +DECLARECUPTIFUNC(cuptiEventGroupSetsCreate, (CUcontext context, size_t eventIdArraySizeBytes, CUpti_EventID * eventIdArray, CUpti_EventGroupSets ** eventGroupPasses)); +DECLARECUPTIFUNC(cuptiEventGroupSetsDestroy, (CUpti_EventGroupSets * eventGroupSets)); +DECLARECUPTIFUNC(cuptiGetTimestamp, (uint64_t * timestamp)); +DECLARECUPTIFUNC(cuptiMetricEnumEvents, (CUpti_MetricID metric, size_t * eventIdArraySizeBytes, CUpti_EventID * eventIdArray)); +DECLARECUPTIFUNC(cuptiMetricGetAttribute, (CUpti_MetricID metric, CUpti_MetricAttribute attrib, size_t * valueSize, void *value)); +DECLARECUPTIFUNC(cuptiMetricGetNumEvents, (CUpti_MetricID metric, uint32_t * numEvents)); +DECLARECUPTIFUNC(cuptiMetricGetValue, (CUdevice device, CUpti_MetricID metric, size_t eventIdArraySizeBytes, CUpti_EventID * eventIdArray, size_t eventValueArraySizeBytes, uint64_t * eventValueArray, uint64_t timeDuration, CUpti_MetricValue * metricValue)); +DECLARECUPTIFUNC(cuptiSetEventCollectionMode, (CUcontext context, CUpti_EventCollectionMode mode)); +DECLARECUPTIFUNC(cuptiDeviceEnumEventDomains, (CUdevice, size_t *, CUpti_EventDomainID *)); +DECLARECUPTIFUNC(cuptiDeviceGetNumEventDomains, (CUdevice, uint32_t *)); +DECLARECUPTIFUNC(cuptiEventDomainEnumEvents, (CUpti_EventDomainID, size_t *, CUpti_EventID *)); +DECLARECUPTIFUNC(cuptiEventDomainGetAttribute, (CUpti_EventDomainID eventDomain, CUpti_EventDomainAttribute attrib, size_t * valueSize, void *value)); +DECLARECUPTIFUNC(cuptiEventDomainGetNumEvents, (CUpti_EventDomainID, uint32_t *)); +DECLARECUPTIFUNC(cuptiEventGetAttribute, (CUpti_EventID, CUpti_EventAttribute, size_t *, void *)); +DECLARECUPTIFUNC(cuptiEventGroupAddEvent, (CUpti_EventGroup, CUpti_EventID)); +DECLARECUPTIFUNC(cuptiEventGroupCreate, (CUcontext, CUpti_EventGroup *, uint32_t)); +DECLARECUPTIFUNC(cuptiEventGroupDestroy, (CUpti_EventGroup)); +DECLARECUPTIFUNC(cuptiEventGroupDisable, (CUpti_EventGroup)); +DECLARECUPTIFUNC(cuptiEventGroupEnable, (CUpti_EventGroup)); +DECLARECUPTIFUNC(cuptiEventGroupReadAllEvents, (CUpti_EventGroup, CUpti_ReadEventFlags, size_t *, uint64_t *, size_t *, CUpti_EventID *, size_t *)); +DECLARECUPTIFUNC(cuptiEventGroupResetAllEvents, (CUpti_EventGroup)); +DECLARECUPTIFUNC(cuptiGetResultString, (CUptiResult result, const char **str)); +DECLARECUPTIFUNC(cuptiEnableKernelReplayMode, ( CUcontext context )); +DECLARECUPTIFUNC(cuptiDisableKernelReplayMode, ( CUcontext context )); + + +/***************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ********* + *****************************************************************************/ + +/* + * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then + * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built + * with the CUDA component can be installed and used on systems which have the CUDA libraries installed + * and on systems where these libraries are not installed. + */ +static int papicuda_linkCudaLibraries() +{ +#define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } + + /* Attempt to guess if we were statically linked to libc, if so bail */ + if(_dl_non_dynamic_init != NULL) { + strncpy(_cuda_vector.cmp_info.disabled_reason, "The CUDA component does not support statically linking to libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + /* Need to link in the cuda libraries, if not found disable the component */ + dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); + CHECK_PRINT_EVAL(!dl1, "CUDA library libcuda.so not found.", return (PAPI_ENOSUPP)); + cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxGetCurrent"); + cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxSetCurrent"); + cuDeviceGetPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGet"); + cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetCount"); + cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetName"); + cuInitPtr = DLSYM_AND_CHECK(dl1, "cuInit"); + cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPopCurrent"); + cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPushCurrent"); + cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl1, "cuCtxSynchronize"); + + dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); + CHECK_PRINT_EVAL(!dl2, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP)); + cudaGetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaGetDevice"); + cudaSetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaSetDevice"); + cudaFreePtr = DLSYM_AND_CHECK(dl2, "cudaFree"); + + dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL); + CHECK_PRINT_EVAL(!dl3, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP)); + /* The macro DLSYM_AND_CHECK results in the expansion example below */ + /* cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" ); */ + /* if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } */ + cuptiDeviceEnumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumMetrics"); + cuptiDeviceGetEventDomainAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetEventDomainAttribute"); + cuptiDeviceGetNumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumMetrics"); + cuptiEventGroupGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupGetAttribute"); + cuptiEventGroupReadEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadEvent"); + cuptiEventGroupSetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetAttribute"); + cuptiEventGroupSetDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetDisable"); + cuptiEventGroupSetEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetEnable"); + cuptiEventGroupSetsCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsCreate"); + cuptiEventGroupSetsDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsDestroy"); + cuptiGetTimestampPtr = DLSYM_AND_CHECK(dl3, "cuptiGetTimestamp"); + cuptiMetricEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricEnumEvents"); + cuptiMetricGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetAttribute"); + cuptiMetricGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetNumEvents"); + cuptiMetricGetValuePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetValue"); + cuptiSetEventCollectionModePtr = DLSYM_AND_CHECK(dl3, "cuptiSetEventCollectionMode"); + cuptiDeviceEnumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumEventDomains"); + cuptiDeviceGetNumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumEventDomains"); + cuptiEventDomainEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainEnumEvents"); + cuptiEventDomainGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetAttribute"); + cuptiEventDomainGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetNumEvents"); + cuptiEventGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGetAttribute"); + cuptiEventGroupAddEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupAddEvent"); + cuptiEventGroupCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupCreate"); + cuptiEventGroupDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDestroy"); + cuptiEventGroupDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDisable"); + cuptiEventGroupEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupEnable"); + cuptiEventGroupReadAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadAllEvents"); + cuptiEventGroupResetAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupResetAllEvents"); + cuptiGetResultStringPtr = DLSYM_AND_CHECK(dl3, "cuptiGetResultString"); + cuptiEnableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode"); + cuptiDisableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode"); + return (PAPI_OK); +} + + +static int papicuda_add_native_events(papicuda_context_t * gctxt) +{ + SUBDBG("Entering\n"); + CUresult cuErr; + int deviceNum; + uint32_t domainNum, eventNum; + papicuda_device_desc_t *mydevice; + char tmpStr[PAPI_MIN_STR_LEN]; + tmpStr[PAPI_MIN_STR_LEN - 1] = '\0'; + size_t tmpSizeBytes; + int ii; + uint32_t maxEventSize; + + /* How many CUDA devices do we have? */ + cuErr = (*cuDeviceGetCountPtr) (&gctxt->deviceCount); + if(cuErr == CUDA_ERROR_NOT_INITIALIZED) { + /* If CUDA not initilaized, initialized CUDA and retry the device list */ + /* This is required for some of the PAPI tools, that do not call the init functions */ + if(((*cuInitPtr) (0)) != CUDA_SUCCESS) { + strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA cannot be found and initialized (cuInit failed).", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + CU_CALL((*cuDeviceGetCountPtr) (&gctxt->deviceCount), return (PAPI_EMISC)); + } + + if(gctxt->deviceCount == 0) { + strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA initialized but no CUDA devices found.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + SUBDBG("Found %d devices\n", gctxt->deviceCount); + + /* allocate memory for device information */ + gctxt->deviceArray = (papicuda_device_desc_t *) papi_calloc(gctxt->deviceCount, sizeof(papicuda_device_desc_t)); + CHECK_PRINT_EVAL(!gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return (PAPI_ENOMEM)); + + /* For each device, get domains and domain-events counts */ + maxEventSize = 0; + for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { + mydevice = &gctxt->deviceArray[deviceNum]; + /* Get device id, name, numeventdomains for each device */ + CU_CALL((*cuDeviceGetPtr) (&mydevice->cuDev, deviceNum), return (PAPI_EMISC)); + CU_CALL((*cuDeviceGetNamePtr) (mydevice->deviceName, PAPI_MIN_STR_LEN - 1, mydevice->cuDev), return (PAPI_EMISC)); + mydevice->deviceName[PAPI_MIN_STR_LEN - 1] = '\0'; + CUPTI_CALL((*cuptiDeviceGetNumEventDomainsPtr) (mydevice->cuDev, &mydevice->maxDomains), return (PAPI_EMISC)); + /* Allocate space to hold domain IDs */ + mydevice->domainIDArray = (CUpti_EventDomainID *) papi_calloc(mydevice->maxDomains, sizeof(CUpti_EventDomainID)); + CHECK_PRINT_EVAL(!mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return (PAPI_ENOMEM)); + /* Put domain ids into allocated space */ + size_t domainarraysize = mydevice->maxDomains * sizeof(CUpti_EventDomainID); + CUPTI_CALL((*cuptiDeviceEnumEventDomainsPtr) (mydevice->cuDev, &domainarraysize, mydevice->domainIDArray), return (PAPI_EMISC)); + /* Allocate space to hold domain event counts */ + mydevice->domainIDNumEvents = (uint32_t *) papi_calloc(mydevice->maxDomains, sizeof(uint32_t)); + CHECK_PRINT_EVAL(!mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return (PAPI_ENOMEM)); + /* For each domain, get event counts in domainNumEvents[] */ + for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) { + CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum]; + /* Get num events in domain */ + // SUBDBG( "Device %d:%d calling cuptiEventDomainGetNumEventsPtr with domainID %d \n", deviceNum, mydevice->cuDev, domainID ); + CUPTI_CALL((*cuptiEventDomainGetNumEventsPtr) (domainID, &mydevice->domainIDNumEvents[domainNum]), return (PAPI_EMISC)); + /* Keep track of overall number of events */ + maxEventSize += mydevice->domainIDNumEvents[domainNum]; + } + } + + /* Create space for metrics */ + for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { + uint32_t maxMetrics; + mydevice = &gctxt->deviceArray[deviceNum]; + // CUPTI_CALL((*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics), return (PAPI_EMISC)); + if ( (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics) != CUPTI_SUCCESS ) + maxMetrics = 0; + maxEventSize += maxMetrics; + } + + /* Allocate space for all events and descriptors */ + gctxt->availEventKind = (CUpti_ActivityKind *) papi_calloc(maxEventSize, sizeof(CUpti_ActivityKind)); + CHECK_PRINT_EVAL(!gctxt->availEventKind, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); + gctxt->availEventDeviceNum = (int *) papi_calloc(maxEventSize, sizeof(int)); + CHECK_PRINT_EVAL(!gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); + gctxt->availEventIDArray = (CUpti_EventID *) papi_calloc(maxEventSize, sizeof(CUpti_EventID)); + CHECK_PRINT_EVAL(!gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); + gctxt->availEventIsBeingMeasuredInEventset = (uint32_t *) papi_calloc(maxEventSize, sizeof(uint32_t)); + CHECK_PRINT_EVAL(!gctxt->availEventIsBeingMeasuredInEventset, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); + gctxt->availEventDesc = (papicuda_name_desc_t *) papi_calloc(maxEventSize, sizeof(papicuda_name_desc_t)); + CHECK_PRINT_EVAL(!gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM)); + + /* Record the events and descriptions */ + uint32_t idxEventArray = 0; + for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { + mydevice = &gctxt->deviceArray[deviceNum]; + // SUBDBG( "For device %d %d maxdomains %d \n", deviceNum, mydevice->cuDev, mydevice->maxDomains ); + /* Get and store event IDs, names, descriptions into the large arrays allocated */ + for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) { + /* Get domain id */ + CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum]; + uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum]; + // SUBDBG( "For device %d domain %d domainID %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents ); + /* Allocate temp space for eventIDs for this domain */ + CUpti_EventID *domainEventIDArray = (CUpti_EventID *) papi_calloc(domainNumEvents, sizeof(CUpti_EventID)); + CHECK_PRINT_EVAL(!domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return (PAPI_ENOMEM)); + /* Load the domain eventIDs in temp space */ + size_t domainEventArraySize = domainNumEvents * sizeof(CUpti_EventID); + CUPTI_CALL((*cuptiEventDomainEnumEventsPtr) (domainID, &domainEventArraySize, domainEventIDArray), return (PAPI_EMISC)); + /* For each event, get and store name and description */ + for(eventNum = 0; eventNum < domainNumEvents; eventNum++) { + /* Record the event IDs in native event array */ + CUpti_EventID myeventCuptiEventId = domainEventIDArray[eventNum]; + gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_EVENT; + gctxt->availEventIDArray[idxEventArray] = myeventCuptiEventId; + gctxt->availEventDeviceNum[idxEventArray] = deviceNum; + /* Get event name */ + tmpSizeBytes = PAPI_MIN_STR_LEN - 1 * sizeof(char); + CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr), return (PAPI_EMISC)); + /* Save a full path for the event, filling spaces with underscores */ + // snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "%s:%d:%s", mydevice->deviceName, deviceNum, tmpStr ); + snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "event:%s:device=%d", tmpStr, deviceNum); + gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0'; + char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name; + for(ii = 0; ii < (int) strlen(nameTmpPtr); ii++) + if(nameTmpPtr[ii] == ' ') + nameTmpPtr[ii] = '_'; + /* Save description in the native event array */ + tmpSizeBytes = PAPI_2MAX_STR_LEN - 1 * sizeof(char); + CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, gctxt->availEventDesc[idxEventArray].description), return (PAPI_EMISC)); + gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0'; + // SUBDBG( "Event ID:%d Name:%s Desc:%s\n", gctxt->availEventIDArray[idxEventArray], gctxt->availEventDesc[idxEventArray].name, gctxt->availEventDesc[idxEventArray].description ); + /* Increment index past events in this domain to start of next domain */ + idxEventArray++; + } + papi_free(domainEventIDArray); + } + } + + /* Retrieve and store metric information for each device */ + SUBDBG("Checking for metrics\n"); + for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { + uint32_t maxMetrics, i; + CUpti_MetricID *metricIdList = NULL; + mydevice = &gctxt->deviceArray[deviceNum]; + // CUPTI_CALL((*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics), return (PAPI_EMISC)); + if ( (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics) != CUPTI_SUCCESS ) { + maxMetrics = 0; + continue; + } + SUBDBG("Device %d: Checking each of the (maxMetrics) %d metrics\n", deviceNum, maxMetrics); + size_t size = maxMetrics * sizeof(CUpti_EventID); + metricIdList = (CUpti_MetricID *) papi_calloc(maxMetrics, sizeof(CUpti_EventID)); + CHECK_PRINT_EVAL(metricIdList == NULL, "Out of memory", return (PAPI_ENOMEM)); + CUPTI_CALL((*cuptiDeviceEnumMetricsPtr) (mydevice->cuDev, &size, metricIdList), return (PAPI_EMISC)); + for(i = 0; i < maxMetrics; i++) { + gctxt->availEventIDArray[idxEventArray] = metricIdList[i]; + gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_METRIC; + gctxt->availEventDeviceNum[idxEventArray] = deviceNum; + size = PAPI_MIN_STR_LEN; + CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr), return (PAPI_EMISC)); + // FIXME SOMEDAY: For this release the nvlink metrics are not functioning so skip them + if(strstr(tmpStr, "nvlink")!=NULL) continue; + // FIXME SOMEDAY: For this release the nvlink metrics are not functioning so skip them + if(size >= PAPI_MIN_STR_LEN) + gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0'; + snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "metric:%s:device=%d", tmpStr, deviceNum); + size = PAPI_2MAX_STR_LEN; + CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], CUPTI_METRIC_ATTR_LONG_DESCRIPTION, &size, (uint8_t *) gctxt->availEventDesc[idxEventArray].description), return (PAPI_EMISC)); + if(size >= PAPI_2MAX_STR_LEN) + gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0'; + // SUBDBG( "For device %d availEvent[%d] %s\n", mydevice->cuDev, idxEventArray, gctxt->availEventDesc[idxEventArray].name); + idxEventArray++; + } + papi_free(metricIdList); + } + gctxt->availEventSize = idxEventArray; + + /* return 0 if everything went OK */ + return 0; +} + + +/* + This routine tries to convert all CUPTI values to long long values. + If the CUPTI value is an integer type, it is cast to long long. If + the CUPTI value is a percent, it is multiplied by 100 to return the + integer percentage. If the CUPTI value is a double, the value + is cast to long long... this can be a severe truncation. + */ +static int papicuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue) +{ + union { + long long ll; + double fp; + } tmpValue; + + SUBDBG("Try to convert the CUPTI metric value kind (index %d) to PAPI value (long long or double)\n", valueKind); + switch (valueKind) { + case CUPTI_METRIC_VALUE_KIND_DOUBLE: + SUBDBG("Metric double %f\n", metricValue.metricValueDouble); + tmpValue.ll = (long long)(metricValue.metricValueDouble); + //CHECK_PRINT_EVAL(tmpValue.fp - metricValue.metricValueDouble > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + case CUPTI_METRIC_VALUE_KIND_UINT64: + SUBDBG("Metric uint64 = %llu\n", (unsigned long long) metricValue.metricValueUint64); + tmpValue.ll = (long long) (metricValue.metricValueUint64); + CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUint64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + case CUPTI_METRIC_VALUE_KIND_INT64: + SUBDBG("Metric int64 = %lld\n", (long long) metricValue.metricValueInt64); + tmpValue.ll = (long long) (metricValue.metricValueInt64); + CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueInt64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + case CUPTI_METRIC_VALUE_KIND_PERCENT: + SUBDBG("Metric percent = %f%%\n", metricValue.metricValuePercent); + tmpValue.ll = (long long)(metricValue.metricValuePercent*100); + //CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValuePercent > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + case CUPTI_METRIC_VALUE_KIND_THROUGHPUT: + SUBDBG("Metric throughput %llu bytes/sec\n", (unsigned long long) metricValue.metricValueThroughput); + tmpValue.ll = (long long) (metricValue.metricValueThroughput); + CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueThroughput > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + case CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL: + SUBDBG("Metric utilization level %u\n", (unsigned int) metricValue.metricValueUtilizationLevel); + tmpValue.ll = (long long) (metricValue.metricValueUtilizationLevel); + CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUtilizationLevel > 1e-6, "Error converting metric\n", return (PAPI_EMISC)); + break; + default: + CHECK_PRINT_EVAL(1, "ERROR: unsupported metric value kind", return (PAPI_EINVAL)); + exit(-1); + } + *papiValue = tmpValue.ll; + return (PAPI_OK); +} + + +/* **************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + **************************************************************************** */ + +/* + * This is called whenever a thread is initialized. + */ +static int papicuda_init_thread(hwd_context_t * ctx) +{ + (void) ctx; + SUBDBG("Entering\n"); + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +/* NOTE: only called by main thread (not by every thread) !!! Starting + in CUDA 4.0, multiple CPU threads can access the same CUDA + context. This is a much easier programming model then pre-4.0 as + threads - using the same context - can share memory, data, + etc. It's possible to create a different context for each + thread. That's why CUDA context creation is done in + CUDA_init_component() (called only by main thread) rather than + CUDA_init() or CUDA_init_control_state() (both called by each + thread). */ +static int papicuda_init_component(int cidx) +{ + SUBDBG("Entering with cidx: %d\n", cidx); + int rv; + + /* link in all the cuda libraries and resolve the symbols we need to use */ + if(papicuda_linkCudaLibraries() != PAPI_OK) { + SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n"); + SUBDBG("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + /* Create the structure */ + if(!global_papicuda_context) + global_papicuda_context = (papicuda_context_t *) papi_calloc(1, sizeof(papicuda_context_t)); + + /* Get list of all native CUDA events supported */ + rv = papicuda_add_native_events(global_papicuda_context); + if(rv != 0) + return (rv); + + /* Export some information */ + _cuda_vector.cmp_info.CmpIdx = cidx; + _cuda_vector.cmp_info.num_native_events = global_papicuda_context->availEventSize; + _cuda_vector.cmp_info.num_cntrs = _cuda_vector.cmp_info.num_native_events; + _cuda_vector.cmp_info.num_mpx_cntrs = _cuda_vector.cmp_info.num_native_events; + + return (PAPI_OK); +} + + +/* Setup a counter control state. + * In general a control state holds the hardware info for an + * EventSet. + */ +static int papicuda_init_control_state(hwd_control_state_t * ctrl) +{ + SUBDBG("Entering\n"); + (void) ctrl; + papicuda_context_t *gctxt = global_papicuda_context; + + CHECK_PRINT_EVAL(!gctxt, "Error: The PAPI CUDA component needs to be initialized first", return (PAPI_ENOINIT)); + /* If no events were found during the initial component initialization, return error */ + if(global_papicuda_context->availEventSize <= 0) { + strncpy(_cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN); + return (PAPI_EMISC); + } + /* If it does not exist, create the global structure to hold CUDA contexts and active events */ + if(!global_papicuda_control) { + global_papicuda_control = (papicuda_control_t *) papi_calloc(1, sizeof(papicuda_control_t)); + global_papicuda_control->countOfActiveCUContexts = 0; + global_papicuda_control->activeEventCount = 0; + } + return PAPI_OK; +} + +/* Triggered by eventset operations like add or remove. For CUDA, + * needs to be called multiple times from each seperate CUDA context + * with the events to be measured from that context. For each + * context, create eventgroups for the events. + */ +/* Note: NativeInfo_t is defined in papi_internal.h */ +static int papicuda_update_control_state(hwd_control_state_t * ctrl, NativeInfo_t * nativeInfo, int nativeCount, hwd_context_t * ctx) +{ + SUBDBG("Entering with nativeCount %d\n", nativeCount); + (void) ctx; + // (void) ctrl; + papicuda_control_t *gctrl = global_papicuda_control; + papicuda_context_t *gctxt = global_papicuda_context; + int currDeviceNum; + CUcontext currCuCtx; + int eventContextIdx; + CUcontext eventCuCtx; + int index, ii; + uint32_t numEvents, ee, cc; + + /* Return if no events */ + if(nativeCount == 0) + return (PAPI_OK); + + /* Get deviceNum, initialize context if needed via free, get context */ + // CU_CALL( (*cuCtxGetCurrentPtr)(&currCuCtx), return(PAPI_EMISC)); + CUDA_CALL((*cudaGetDevicePtr) (&currDeviceNum), return (PAPI_EMISC)); + SUBDBG("currDeviceNum %d \n", currDeviceNum); + CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC)); + CU_CALL((*cuCtxGetCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); + SUBDBG("currDeviceNum %d cuCtx %p \n", currDeviceNum, currCuCtx); + + /* Handle user request of events to be monitored */ + for(ii = 0; ii < nativeCount; ii++) { + /* Get the PAPI event index from the user */ + index = nativeInfo[ii].ni_event; +#ifdef DEBUG + char *eventName = gctxt->availEventDesc[index].name; +#endif + int eventDeviceNum = gctxt->availEventDeviceNum[index]; + + /* if this event is already added continue to next ii, if not, mark it as being added */ + if(gctxt->availEventIsBeingMeasuredInEventset[index] == 1) { + SUBDBG("Skipping event %s which is already added\n", eventName); + continue; + } else + gctxt->availEventIsBeingMeasuredInEventset[index] = 1; + + /* Find context/control in papicuda, creating it if does not exist */ + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + CHECK_PRINT_EVAL(cc >= PAPICUDA_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return (PAPI_EMISC)); + if(gctrl->arrayOfActiveCUContexts[cc]->deviceNum == eventDeviceNum) { + eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + SUBDBG("Event %s device %d already has a cuCtx %p registered\n", eventName, eventDeviceNum, eventCuCtx); + if(eventCuCtx != currCuCtx) + CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), return (PAPI_EMISC)); + break; + } + } + // Create context if it does not exit + if(cc == gctrl->countOfActiveCUContexts) { + SUBDBG("Event %s device %d does not have a cuCtx registered yet...\n", eventName, eventDeviceNum); + if(currDeviceNum != eventDeviceNum) { + CUDA_CALL((*cudaSetDevicePtr) (eventDeviceNum), return (PAPI_EMISC)); + CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC)); + CU_CALL((*cuCtxGetCurrentPtr) (&eventCuCtx), return (PAPI_EMISC)); + } else { + eventCuCtx = currCuCtx; + } + gctrl->arrayOfActiveCUContexts[cc] = papi_calloc(1, sizeof(papicuda_active_cucontext_t)); + CHECK_PRINT_EVAL(gctrl->arrayOfActiveCUContexts[cc] == NULL, "Memory allocation for new active context failed", return (PAPI_ENOMEM)); + gctrl->arrayOfActiveCUContexts[cc]->deviceNum = eventDeviceNum; + gctrl->arrayOfActiveCUContexts[cc]->cuCtx = eventCuCtx; + gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses = NULL; + gctrl->arrayOfActiveCUContexts[cc]->conMetricsCount = 0; + gctrl->arrayOfActiveCUContexts[cc]->conEventsCount = 0; + gctrl->countOfActiveCUContexts++; + SUBDBG("Added a new context deviceNum %d cuCtx %p ... now countOfActiveCUContexts is %d\n", eventDeviceNum, eventCuCtx, gctrl->countOfActiveCUContexts); + } + eventContextIdx = cc; + + papicuda_active_cucontext_t *eventctrl = gctrl->arrayOfActiveCUContexts[eventContextIdx]; + switch (gctxt->availEventKind[index]) { + case CUPTI_ACTIVITY_KIND_METRIC: + SUBDBG("Need to add metric %d %s \n", index, eventName); + /* For the metric, find list of events required */ + CUpti_MetricID metricId = gctxt->availEventIDArray[index]; + CUPTI_CALL((*cuptiMetricGetNumEventsPtr) (metricId, &numEvents), return (PAPI_EINVAL)); + size_t sizeBytes = numEvents * sizeof(CUpti_EventID); + CUpti_EventID *eventIdArray = papi_malloc(sizeBytes); + CHECK_PRINT_EVAL(eventIdArray == NULL, "Malloc failed", return (PAPI_ENOMEM)); + CUPTI_CALL((*cuptiMetricEnumEventsPtr) (metricId, &sizeBytes, eventIdArray), return (PAPI_EINVAL)); + SUBDBG("For metric %s, append the list of %d required events\n", eventName, numEvents); + for(ee = 0; ee < numEvents; ee++) { + eventctrl->conEvents[eventctrl->conEventsCount] = eventIdArray[ee]; + eventctrl->conEventsCount++; + SUBDBG("For metric %s, appended event %d - %d %d to this context (conEventsCount %d)\n", eventName, ee, eventIdArray[ee], eventctrl->conEvents[eventctrl->conEventsCount], eventctrl->conEventsCount); + if (eventctrl->conEventsCount >= PAPICUDA_MAX_COUNTERS) { + SUBDBG("Num events (generated by metric) exceeded PAPICUDA_MAX_COUNTERS\n"); + return(PAPI_EINVAL); + } + } + eventctrl->conMetrics[eventctrl->conMetricsCount] = metricId; + eventctrl->conMetricsCount++; + if (eventctrl->conMetricsCount >= PAPICUDA_MAX_COUNTERS) { + SUBDBG("Num metrics exceeded PAPICUDA_MAX_COUNTERS\n"); + return(PAPI_EINVAL); + } + break; + + case CUPTI_ACTIVITY_KIND_EVENT: + SUBDBG("Need to add event %d %s to the context\n", index, eventName); + /* lookup cuptieventid for this event index */ + CUpti_EventID eventId = gctxt->availEventIDArray[index]; + eventctrl->conEvents[eventctrl->conEventsCount] = eventId; + eventctrl->conEventsCount++; + break; + + default: + CHECK_PRINT_EVAL(1, "Unknown CUPTI measure", return (PAPI_EMISC)); + break; + } + + if (eventctrl->conEventsCount >= PAPICUDA_MAX_COUNTERS) { + SUBDBG("Num events exceeded PAPICUDA_MAX_COUNTERS\n"); + return(PAPI_EINVAL); + } + + /* Record index of this active event back into the nativeInfo structure */ + nativeInfo[ii].ni_position = gctrl->activeEventCount; + /* record added event at the higher level */ + CHECK_PRINT_EVAL(gctrl->activeEventCount == PAPICUDA_MAX_COUNTERS - 1, "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return (PAPI_EMISC)); + gctrl->activeEventIndex[gctrl->activeEventCount] = index; + // gctrl->activeEventContextIdx[gctrl->activeEventCount] = eventContextIdx; + gctrl->activeEventValues[gctrl->activeEventCount] = 0; + gctrl->activeEventCount++; + + /* Create/recreate eventgrouppass structures for the added event and context */ + SUBDBG("Create eventGroupPasses for context (destroy pre-existing) (nativeCount %d, conEventsCount %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount); + if(eventctrl->conEventsCount > 0) { + // SUBDBG("Destroy prevous eventGroupPasses for the context \n"); + if(eventctrl->eventGroupPasses != NULL) + CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (eventctrl->eventGroupPasses), return (PAPI_EMISC)); + eventctrl->eventGroupPasses = NULL; + size_t sizeBytes = (eventctrl->conEventsCount) * sizeof(CUpti_EventID); + // SUBDBG("About to create eventGroupPasses for the context (sizeBytes %zu) \n", sizeBytes); +#ifdef PAPICUDA_KERNEL_REPLAY_MODE + CUPTI_CALL((*cuptiEnableKernelReplayModePtr) (eventCuCtx), return (PAPI_ECMP)); + CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) (eventCuCtx, sizeBytes, eventctrl->conEvents, &eventctrl->eventGroupPasses), return (PAPI_ECMP)); +#else + CUPTI_CALL((*cuptiSetEventCollectionModePtr)(eventCuCtx,CUPTI_EVENT_COLLECTION_MODE_KERNEL), return(PAPI_ECMP)); + CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) (eventCuCtx, sizeBytes, eventctrl->conEvents, &eventctrl->eventGroupPasses), return (PAPI_EMISC)); + if (eventctrl->eventGroupPasses->numSets > 1) { + SUBDBG("Error occured: The combined CUPTI events require more than 1 pass... try different events\n"); + papicuda_cleanup_eventset(ctrl); + return(PAPI_ECOMBO); + } else { + SUBDBG("Created eventGroupPasses for context total-events %d in-this-context %d passes-requied %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount, eventctrl->eventGroupPasses->numSets); + } + +#endif + } + + if(eventCuCtx != currCuCtx) + CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC)); + + } + return (PAPI_OK); +} + +/* Triggered by PAPI_start(). + * For CUDA component, switch to each context and start all eventgroups. +*/ +static int papicuda_start(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + SUBDBG("Entering\n"); + (void) ctx; + (void) ctrl; + papicuda_control_t *gctrl = global_papicuda_control; + // papicuda_context_t *gctxt = global_papicuda_context; + uint32_t ii, gg, cc, ss; + int saveDeviceNum = -1; + + SUBDBG("Reset all active event values\n"); + for(ii = 0; ii < gctrl->activeEventCount; ii++) + gctrl->activeEventValues[ii] = 0; + + SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n"); + CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiStartTimestampNs), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + int eventDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + CUcontext eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + SUBDBG("Set to device %d cuCtx %p \n", eventDeviceNum, eventCuCtx); + // CUDA_CALL( (*cudaSetDevicePtr)(eventDeviceNum), return(PAPI_EMISC)); + if(eventDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), return (PAPI_EMISC)); + CUpti_EventGroupSets *eventEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses; + for (ss=0; ssnumSets; ss++) { + CUpti_EventGroupSet groupset = eventEventGroupPasses->sets[ss]; + for(gg = 0; gg < groupset.numEventGroups; gg++) { + CUpti_EventGroup group = groupset.eventGroups[gg]; + uint32_t one = 1; + CUPTI_CALL((*cuptiEventGroupSetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), &one), return (PAPI_EMISC)); + } + CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (&groupset), return (PAPI_EMISC)); + } + if(eventDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC)); + } + + return (PAPI_OK); +} + + +/* Triggered by PAPI_read(). For CUDA component, switch to each + * context, read all the eventgroups, and put the values in the + * correct places. */ +static int papicuda_read(hwd_context_t * ctx, hwd_control_state_t * ctrl, long long **values, int flags) +{ + SUBDBG("Entering\n"); + (void) ctx; + (void) ctrl; + (void) flags; + papicuda_control_t *gctrl = global_papicuda_control; + papicuda_context_t *gctxt = global_papicuda_context; + uint32_t gg, ii, jj, ee, instanceK, cc, rr, ss; + int saveDeviceNum; + size_t eventIdsSize = PAPICUDA_MAX_COUNTERS * sizeof(CUpti_EventID); + uint64_t readEventValueBuffer[PAPICUDA_MAX_COUNTERS]; + CUpti_EventID readEventIDArray[PAPICUDA_MAX_COUNTERS]; + + // Get read time stamp + CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiReadTimestampNs), return (PAPI_EMISC)); + uint64_t durationNs = gctrl->cuptiReadTimestampNs - gctrl->cuptiStartTimestampNs; + gctrl->cuptiStartTimestampNs = gctrl->cuptiReadTimestampNs; + + SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n"); + CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx); + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + else + CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + + size_t numEventIDsRead = 0; + CU_CALL((*cuCtxSynchronizePtr) (), return (PAPI_EMISC)); + CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses; + uint32_t numEvents, numInstances, numTotalInstances; + size_t sizeofuint32num = sizeof(uint32_t); + CUpti_EventDomainID groupDomainID; + size_t groupDomainIDSize = sizeof(groupDomainID); + CUdevice cudevice = gctxt->deviceArray[currDeviceNum].cuDev; + + /* Since we accumulate the eventValues in a buffer, it needs to be cleared for each context */ + for(ee = 0; ee < PAPICUDA_MAX_COUNTERS; ee++) + readEventValueBuffer[ee] = 0; + + for (ss=0; ssnumSets; ss++) { + CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss]; + SUBDBG("Read events in this context\n"); + for(gg = 0; gg < groupset.numEventGroups; gg++) { + CUpti_EventGroup group = groupset.eventGroups[gg]; + CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, &groupDomainIDSize, &groupDomainID), return (PAPI_EMISC)); + CUPTI_CALL((*cuptiDeviceGetEventDomainAttributePtr) (cudevice, groupDomainID, CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, &sizeofuint32num, &numTotalInstances), return (PAPI_EMISC)); + CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT, &sizeofuint32num, &numInstances), return (PAPI_EMISC)); + CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS, &sizeofuint32num, &numEvents), return (PAPI_EMISC)); + eventIdsSize = PAPICUDA_MAX_COUNTERS * sizeof(CUpti_EventID); + CUpti_EventID eventIds[PAPICUDA_MAX_COUNTERS]; + CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_EVENTS, &eventIdsSize, eventIds), return (PAPI_EMISC)); + SUBDBG("Context %d eventgroup %d domain numTotalInstaces %u numInstances %u numEvents %u\n", cc, gg, numTotalInstances, numInstances, numEvents); + size_t valuesSize = sizeof(uint64_t) * numInstances; + uint64_t *values = (uint64_t *) papi_malloc(valuesSize); + CHECK_PRINT_EVAL(values == NULL, "Out of memory", return (PAPI_ENOMEM)); + /* For each event, read all values and normalize */ + for(ee = 0; ee < numEvents; ee++) { + CUPTI_CALL((*cuptiEventGroupReadEventPtr) (group, CUPTI_EVENT_READ_FLAG_NONE, eventIds[ee], &valuesSize, values), return (PAPI_EMISC)); + // sum collect event values from all instances + uint64_t valuesum = 0; + for(instanceK = 0; instanceK < numInstances; instanceK++) + valuesum += values[instanceK]; + // It seems that the same event can occur multiple times in eventIds, so we need to accumulate values in older valueBuffers if needed + // Scan thru readEvents looking for a match, break if found, if not found, increment numEventIDsRead + for(rr = 0; rr < numEventIDsRead; rr++) + if(readEventIDArray[rr] == eventIds[ee]) + break; + /* If the event was not found, increment the numEventIDsRead */ + if(rr == numEventIDsRead) + numEventIDsRead++; + readEventIDArray[rr] = eventIds[ee]; + readEventValueBuffer[rr] += valuesum; + size_t tmpStrSize = PAPI_MIN_STR_LEN - 1 * sizeof(char); + char tmpStr[PAPI_MIN_STR_LEN]; + CUPTI_CALL((*cuptiEventGetAttributePtr) (eventIds[ee], CUPTI_EVENT_ATTR_NAME, &tmpStrSize, tmpStr), return (PAPI_EMISC)); + SUBDBG("Read context %d eventgroup %d numEventIDsRead %lu device %d event %d/%d %d name %s value %lu (rr %d id %d val %lu) \n", cc, gg, numEventIDsRead, currDeviceNum, ee, numEvents, eventIds[ee], tmpStr, valuesum, rr, + eventIds[rr], readEventValueBuffer[rr]); + } + papi_free(values); + } + } + + // normalize the event values to represent the total number of domain instances on the device + for(ii = 0; ii < numEventIDsRead; ii++) + readEventValueBuffer[numEventIDsRead] = (readEventValueBuffer[numEventIDsRead] * numTotalInstances) / numInstances; + + /* For this pushed device and context, figure out the event and metric values and record them into the arrays */ + SUBDBG("For this device and context, match read values against active events by scanning activeEvents array and matching associated availEventIDs\n"); + for(jj = 0; jj < gctrl->activeEventCount; jj++) { + int index = gctrl->activeEventIndex[jj]; + /* If the device/context does not match the current context, move to next */ + if(gctxt->availEventDeviceNum[index] != currDeviceNum) + continue; + uint32_t eventId = gctxt->availEventIDArray[index]; + switch (gctxt->availEventKind[index]) { + case CUPTI_ACTIVITY_KIND_EVENT: + SUBDBG("Searching for activeEvent %s eventId %u\n", gctxt->availEventDesc[index].name, eventId); + for(ii = 0; ii < numEventIDsRead; ii++) { + SUBDBG("Look at readEventIDArray[%u/%zu] with id %u\n", ii, numEventIDsRead, readEventIDArray[ii]); + if(readEventIDArray[ii] == eventId) { + gctrl->activeEventValues[jj] += (long long) readEventValueBuffer[ii]; + SUBDBG("Matched read-eventID %d:%d eventName %s value %ld activeEvent %d value %lld \n", jj, (int) eventId, gctxt->availEventDesc[index].name, readEventValueBuffer[ii], index, gctrl->activeEventValues[jj]); + break; + } + } + break; + + case CUPTI_ACTIVITY_KIND_METRIC: + SUBDBG("For the metric, find list of events required to calculate this metric value\n"); + CUpti_MetricID metricId = gctxt->availEventIDArray[index]; + int metricDeviceNum = gctxt->availEventDeviceNum[index]; + CUdevice cudevice = gctxt->deviceArray[metricDeviceNum].cuDev; + uint32_t numEvents, ee; + CUPTI_CALL((*cuptiMetricGetNumEventsPtr) (metricId, &numEvents), return (PAPI_EINVAL)); + SUBDBG("Metric %s needs %d events\n", gctxt->availEventDesc[index].name, numEvents); + size_t eventIdArraySizeBytes = numEvents * sizeof(CUpti_EventID); + CUpti_EventID *eventIdArray = papi_malloc(eventIdArraySizeBytes); + CHECK_PRINT_EVAL(eventIdArray == NULL, "Malloc failed", return (PAPI_ENOMEM)); + size_t eventValueArraySizeBytes = numEvents * sizeof(uint64_t); + uint64_t *eventValueArray = papi_malloc(eventValueArraySizeBytes); + CHECK_PRINT_EVAL(eventValueArray == NULL, "Malloc failed", return (PAPI_ENOMEM)); + CUPTI_CALL((*cuptiMetricEnumEventsPtr) (metricId, &eventIdArraySizeBytes, eventIdArray), return (PAPI_EINVAL)); + // Match metrics for the users events + for(ee = 0; ee < numEvents; ee++) { + for(ii = 0; ii < numEventIDsRead; ii++) { + if(eventIdArray[ee] == readEventIDArray[ii]) { + SUBDBG("Matched metric %s, found %d/%d events with eventId %d\n", gctxt->availEventDesc[index].name, ee, numEvents, readEventIDArray[ii]); + eventValueArray[ee] = readEventValueBuffer[ii]; + break; + } + } + CHECK_PRINT_EVAL(ii == numEventIDsRead, "Could not find required event for metric", return (PAPI_EINVAL)); + } + + // Use CUPTI to calculate a metric. Return all metric values mapped into long long values. + CUpti_MetricValue metricValue; + CUpti_MetricValueKind valueKind; + size_t valueKindSize = sizeof(valueKind); + CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricId, CUPTI_METRIC_ATTR_VALUE_KIND, &valueKindSize, &valueKind), return (PAPI_EMISC)); + CUPTI_CALL((*cuptiMetricGetValuePtr) (cudevice, metricId, eventIdArraySizeBytes, eventIdArray, eventValueArraySizeBytes, eventValueArray, durationNs, &metricValue), return (PAPI_EMISC)); + int retval = papicuda_convert_metric_value_to_long_long(metricValue, valueKind, &(gctrl->activeEventValues[jj])); + if(retval != PAPI_OK) + return (retval); + papi_free(eventIdArray); + papi_free(eventValueArray); + break; + + default: + SUBDBG("Not handled"); + break; + } + } + + /* Pop the pushed context */ + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); + } + *values = gctrl->activeEventValues; + return (PAPI_OK); +} + +/* Triggered by PAPI_stop() */ +static int papicuda_stop(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + SUBDBG("Entering\n"); + (void) ctx; + (void) ctrl; + papicuda_control_t *gctrl = global_papicuda_control; + uint32_t cc, ss; + int saveDeviceNum; + + SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n"); + CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx); + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + else + CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses; + for (ss=0; ssnumSets; ss++) { + CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss]; + CUPTI_CALL((*cuptiEventGroupSetDisablePtr) (&groupset), return (PAPI_EMISC)); + } + /* Pop the pushed context */ + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); + + } + return (PAPI_OK); +} + + +/* + * Disable and destroy the CUDA eventGroup + */ +static int papicuda_cleanup_eventset(hwd_control_state_t * ctrl) +{ + SUBDBG("Entering\n"); + (void) ctrl; + papicuda_control_t *gctrl = global_papicuda_control; + // papicuda_active_cucontext_t *currctrl; + uint32_t cc; + int saveDeviceNum; + + SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n"); + CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses; + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + else + CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + //CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses), return (PAPI_EMISC)); + (*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses); + gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses = NULL; + papi_free( gctrl->arrayOfActiveCUContexts[cc] ); + /* Pop the pushed context */ + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); + } + /* Record that there are no active contexts or events */ + gctrl->countOfActiveCUContexts = 0; + gctrl->activeEventCount = 0; + return (PAPI_OK); +} + + +/* Called at thread shutdown. Does nothing in the CUDA component. */ +int papicuda_shutdown_thread(hwd_context_t * ctx) +{ + SUBDBG("Entering\n"); + (void) ctx; + + return (PAPI_OK); +} + +/* Triggered by PAPI_shutdown() and frees memory allocated in the CUDA component. */ +static int papicuda_shutdown_component(void) +{ + SUBDBG("Entering\n"); + papicuda_control_t *gctrl = global_papicuda_control; + papicuda_context_t *gctxt = global_papicuda_context; + int deviceNum; + uint32_t cc; + /* Free context */ + if(gctxt) { + for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { + papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum]; + papi_free(mydevice->domainIDArray); + papi_free(mydevice->domainIDNumEvents); + } + papi_free(gctxt->availEventIDArray); + papi_free(gctxt->availEventDeviceNum); + papi_free(gctxt->availEventKind); + papi_free(gctxt->availEventIsBeingMeasuredInEventset); + papi_free(gctxt->availEventDesc); + papi_free(gctxt->deviceArray); + papi_free(gctxt); + global_papicuda_context = gctxt = NULL; + } + /* Free control */ + if(gctrl) { + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { +#ifdef PAPICUDA_KERNEL_REPLAY_MODE + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + CUPTI_CALL((*cuptiDisableKernelReplayModePtr) (currCuCtx), return (PAPI_EMISC)); +#endif + if(gctrl->arrayOfActiveCUContexts[cc] != NULL) + papi_free(gctrl->arrayOfActiveCUContexts[cc]); + } + papi_free(gctrl); + global_papicuda_control = gctrl = NULL; + } + // close the dynamic libraries needed by this component (opened in the init substrate call) + dlclose(dl1); + dlclose(dl2); + dlclose(dl3); + return (PAPI_OK); +} + + +/* Triggered by PAPI_reset() but only if the EventSet is currently + * running. If the eventset is not currently running, then the saved + * value in the EventSet is set to zero without calling this + * routine. */ +static int papicuda_reset(hwd_context_t * ctx, hwd_control_state_t * ctrl) +{ + (void) ctx; + (void) ctrl; + papicuda_control_t *gctrl = global_papicuda_control; + uint32_t gg, ii, cc, ss; + int saveDeviceNum; + + SUBDBG("Reset all active event values\n"); + for(ii = 0; ii < gctrl->activeEventCount; ii++) + gctrl->activeEventValues[ii] = 0; + + SUBDBG("Save current context, then switch to each active device/context and reset\n"); + CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); + for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { + CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; + int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + else + CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); + CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses; + for (ss=0; ssnumSets; ss++) { + CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss]; + for(gg = 0; gg < groupset.numEventGroups; gg++) { + CUpti_EventGroup group = groupset.eventGroups[gg]; + CUPTI_CALL((*cuptiEventGroupResetAllEventsPtr) (group), return (PAPI_EMISC)); + } + CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (&groupset), return (PAPI_EMISC)); + } + if(currDeviceNum != saveDeviceNum) + CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); + } + return (PAPI_OK); +} + + +/* This function sets various options in the component - Does nothing in the CUDA component. + @param[in] ctx -- hardware context + @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param[in] option -- options to be set +*/ +static int papicuda_ctrl(hwd_context_t * ctx, int code, _papi_int_option_t * option) +{ + SUBDBG("Entering\n"); + (void) ctx; + (void) code; + (void) option; + return (PAPI_OK); +} + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int papicuda_set_domain(hwd_control_state_t * ctrl, int domain) +{ + SUBDBG("Entering\n"); + (void) ctrl; + if((PAPI_DOM_USER & domain) || (PAPI_DOM_KERNEL & domain) || (PAPI_DOM_OTHER & domain) || (PAPI_DOM_ALL & domain)) + return (PAPI_OK); + else + return (PAPI_EINVAL); + return (PAPI_OK); +} + + +/* Enumerate Native Events. + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + */ +static int papicuda_ntv_enum_events(unsigned int *EventCode, int modifier) +{ + // SUBDBG( "Entering (get next event after %u)\n", *EventCode ); + switch (modifier) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + return (PAPI_OK); + break; + case PAPI_ENUM_EVENTS: + if(*EventCode < global_papicuda_context->availEventSize - 1) { + *EventCode = *EventCode + 1; + return (PAPI_OK); + } else + return (PAPI_ENOEVNT); + break; + default: + return (PAPI_EINVAL); + } + return (PAPI_OK); +} + + +/* Takes a native event code and passes back the name + * @param EventCode is the native event code + * @param name is a pointer for the name to be copied to + * @param len is the size of the name string + */ +static int papicuda_ntv_code_to_name(unsigned int EventCode, char *name, int len) +{ + // SUBDBG( "Entering EventCode %d\n", EventCode ); + unsigned int index = EventCode; + papicuda_context_t *gctxt = global_papicuda_context; + if(index < gctxt->availEventSize) { + strncpy(name, gctxt->availEventDesc[index].name, len); + } else { + return (PAPI_EINVAL); + } + // SUBDBG( "Exit: EventCode %d: Name %s\n", EventCode, name ); + return (PAPI_OK); +} + + +/* Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +static int papicuda_ntv_code_to_descr(unsigned int EventCode, char *name, int len) +{ + // SUBDBG( "Entering\n" ); + unsigned int index = EventCode; + papicuda_context_t *gctxt = global_papicuda_context; + if(index < gctxt->availEventSize) { + strncpy(name, gctxt->availEventDesc[index].description, len); + } else { + return (PAPI_EINVAL); + } + return (PAPI_OK); +} + + +/* Vector that points to entry points for the component */ +papi_vector_t _cuda_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "cuda", + .short_name = "cuda", + .version = "5.1", + .description = "CUDA events and metrics via NVIDIA CuPTI interfaces", + .num_mpx_cntrs = PAPICUDA_MAX_COUNTERS, + .num_cntrs = PAPICUDA_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + } + , + /* sizes of framework-opaque component-private structures... these are all unused in this component */ + .size = { + .context = 1, /* sizeof( papicuda_context_t ), */ + .control_state = 1, /* sizeof( papicuda_control_t ), */ + .reg_value = 1, /* sizeof( papicuda_register_t ), */ + .reg_alloc = 1, /* sizeof( papicuda_reg_alloc_t ), */ + } + , + /* function pointers in this component */ + .start = papicuda_start, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .stop = papicuda_stop, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .read = papicuda_read, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long ** events, int flags ) */ + .reset = papicuda_reset, /* ( hwd_context_t * ctx, hwd_control_state_t * ctrl ) */ + .cleanup_eventset = papicuda_cleanup_eventset, /* ( hwd_control_state_t * ctrl ) */ + + .init_component = papicuda_init_component, /* ( int cidx ) */ + .init_thread = papicuda_init_thread, /* ( hwd_context_t * ctx ) */ + .init_control_state = papicuda_init_control_state, /* ( hwd_control_state_t * ctrl ) */ + .update_control_state = papicuda_update_control_state, /* ( hwd_control_state_t * ptr, NativeInfo_t * native, int count, hwd_context_t * ctx ) */ + + .ctl = papicuda_ctrl, /* ( hwd_context_t * ctx, int code, _papi_int_option_t * option ) */ + .set_domain = papicuda_set_domain, /* ( hwd_control_state_t * cntrl, int domain ) */ + .ntv_enum_events = papicuda_ntv_enum_events, /* ( unsigned int *EventCode, int modifier ) */ + .ntv_code_to_name = papicuda_ntv_code_to_name, /* ( unsigned int EventCode, char *name, int len ) */ + .ntv_code_to_descr = papicuda_ntv_code_to_descr, /* ( unsigned int EventCode, char *name, int len ) */ + .shutdown_thread = papicuda_shutdown_thread, /* ( hwd_context_t * ctx ) */ + .shutdown_component = papicuda_shutdown_component, /* ( void ) */ +}; diff --git a/src/components/cuda/sampling/Makefile b/src/components/cuda/sampling/Makefile new file mode 100644 index 0000000..8b158c1 --- /dev/null +++ b/src/components/cuda/sampling/Makefile @@ -0,0 +1,29 @@ +# include ../Makefile.cuda + +CUDA_DIR ?= /opt/cuda +CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI +CFG = -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include +LDG = -L$(CUDA_DIR)/lib64 -L$(CUPTI_DIR)/lib64 +NVCC = $(CUDA_DIR)/bin/nvcc + +all: + @make lib + @make papi_sampling + @make sample + @make cpy + +lib: activity.c + @gcc -fPIC -shared -o libactivity.so $(CFG) $(LDG) activity.c -lcuda -lcupti + +papi_sampling: gpu_activity.c + @echo 'char* ld_prld="LD_PRELOAD='`pwd`'/sampling/libactivity.so";' > path.h + @gcc -I. gpu_activity.c -o ../../../utils/papi_sampling_cuda + +sample: test/matmul.cu + @$(NVCC) test/matmul.cu $(CFG) $(LDG) -o test/matmul -lcuda + +cpy: + cp ../../../utils/papi_sampling_cuda test + +clean: + rm libactivity.so ../../../utils/papi_sampling_cuda test/matmul test/sass* test/papi_sampling_cuda diff --git a/src/components/cuda/sampling/README b/src/components/cuda/sampling/README new file mode 100644 index 0000000..6e0089c --- /dev/null +++ b/src/components/cuda/sampling/README @@ -0,0 +1,75 @@ +######################################## +Author: Sangamesh Ragate +Date : Nov 12th 2015 +email : sragate@vols.utk.edu +INNOVATIVE COMPUTING LABORATORY, UTK +####################################### + + +Descripttion : This Utility helps in configuring CUPTI for performing PC_SAMPLING + on MAXWELL GPUS and those which have PC_SAMPLING support. It is a + standalone tool that works like nvprof and can be used to get the + PC sampling result on any cuda application without re building + the application. + +************************************************************************************************************ +To Compile: + + This utility get compiled automatically when the cuda component for PAPI is compiled + The *papi_sampling_cuda executable is generated in the src/utils diretory of PAPI + +************************************************************************************************************ +To run the utility: + + ./papi_sampling_cuda [-d ] [-s ] cuda_app [its arguments] + + -d : This switch is optional and is used to supply GPU device ID, should + be integer > 0, Default is GPU device ID 0 + -s : This switch is optional and is used to supply PC sampling period. + Range from 0 to 5, refer "enum CUpti_ActivityPCSamplingPeriod" in + CUPTI user manual. Default is set to 5. + + cuda_app : this is the cuda applicationf for which PC SAMPLING is performed + All the arguments that come next belong to the cuda_app. + + +************************************************************************************************************ +Example to try: + + After successful compilation of PAPI with cuda component + > cd src/components/cuda/sampling/test + + + NOTE: Make sure papi and cuda shared libraries are in the LD_LIBRARY_PATH before you run the test. + matmul is a cuda application which performs 512x512 matrix multiplication + + Try: + > ./papi_sampling_cuda matmul + > ./papi_sampling_cuda -d 0 -s 0 matmul + > ./papi_sampling_cuda -d 0 -s 5 matmul + > ./papi_sampling_cuda -d 0 matmul + > ./papi_sampling_cuda -s 2 matmul + + +************************************************************************************************************ +Output: + + >Kernel activity record : This gives information about the cuda kernel that was launched for PC SAMPLING + >Activity Kind record : This gives information about the cuda kernel that was launched for PC SAMPLING + >PC_SAMPLING record : Kernel identification, PC value, samples, stall reason + >Source locator record : This is generated if cuda_app is compiled using "-lineinfo" in nvcc + >STALL SUMMARY : This gives the histogram of Stall reason Vs Number of samples due to the + corresponding stall. + + NOTE: To better understand the output generated, the user should be familiar with the "Activity API" + Records of cupti, more specifically KERNEL,SOURCE_LOACTOR,PC_SAMPLING activity records mentioned in the + CUPT manual. +************************************************************************************************************ +Additional Feature: + + The utility also generates SASS dump that can be used to trace the stall to the source code line in the + CUDA application. To get the source code line info, recompile only your cuda_app using "-lineinfo" flag in the + nvcc. + + +************************************************************************************************************ diff --git a/src/components/cuda/sampling/activity.c b/src/components/cuda/sampling/activity.c new file mode 100644 index 0000000..40a725e --- /dev/null +++ b/src/components/cuda/sampling/activity.c @@ -0,0 +1,300 @@ +/* + * Author : Sangamesh Ragate + * Date : 18th Nov 2015 + * ICl-UTK + * Description : This is the shared library that sets up the environent + * for the cuda application by creating the context and keeping it ready + * to perform PC sampling of the cuda application as soon as it launces the kernel + */ + + + +#include +#include +#include +#include +#include + +static CUpti_SubscriberHandle g_subscriber; + + +#define RUNTIME_API_CALL(apiFuncCall) \ +do { \ + cudaError_t _status = apiFuncCall; \ + if (_status != cudaSuccess) { \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ + exit(-1); \ + } \ +} while (0) + +#define CUPTI_CALL(call) \ +do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + exit(-1); \ + } \ +} while (0) + +#define BUF_SIZE (32 * 16384) +#define ALIGN_SIZE (8) + +static char* stall_name[12]; +static int val[12]={0}; + + +static const char * +getStallReasonString(CUpti_ActivityPCSamplingStallReason reason,unsigned int samples) +{ + switch (reason) { + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INVALID: + stall_name[0]="Stall_invalid"; + val[0] += samples; + return "Invalid"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NONE: + stall_name[1]="Stall_none"; + val[1] += samples; + return "Selected"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_INST_FETCH: + stall_name[2]="Stall_inst_fetch"; + val[2] += samples; + return "Instruction fetch"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_EXEC_DEPENDENCY: + stall_name[3]="Stall_exec_dependency"; + val[3] += samples; + return "Execution dependency"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_DEPENDENCY: + stall_name[4]="Stall_mem_dependency"; + val[4] += samples; + return "Memory dependency"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_TEXTURE: + stall_name[5]="Stall_texture"; + val[5] += samples; + return "Texture"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_SYNC: + stall_name[6]="Stall_sync"; + val[6] += samples; + return "Sync"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_CONSTANT_MEMORY_DEPENDENCY: + stall_name[7]="Stall_const_mem_dependency"; + val[7] += samples; + return "Constant memory dependency"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_PIPE_BUSY: + stall_name[8]="Stall_pipe_busy"; + val[8] += samples; + return "Pipe busy"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_MEMORY_THROTTLE: + stall_name[9]="Stall_memory_throttle"; + val[9] += samples; + return "Memory throttle"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_NOT_SELECTED: + stall_name[10]="Stall_warp_not_selected"; + val[10] += samples; + return "Warp Not selected"; + case CUPTI_ACTIVITY_PC_SAMPLING_STALL_OTHER: + stall_name[11]="Stall_other"; + val[11] += samples; + return "Other"; + default: + break; + } + + return NULL; +} + +static void +printActivity(CUpti_Activity *record) +{ + switch (record->kind) { + case CUPTI_ACTIVITY_KIND_SOURCE_LOCATOR: + { + CUpti_ActivitySourceLocator *sourceLocator = (CUpti_ActivitySourceLocator *)record; + printf("Source Locator Id %d, File %s Line %d\n", sourceLocator->id, sourceLocator->fileName, sourceLocator->lineNumber); + break; + } + case CUPTI_ACTIVITY_KIND_PC_SAMPLING: + { + CUpti_ActivityPCSampling *psRecord = (CUpti_ActivityPCSampling *)record; + printf("source %u, functionId %u, pc 0x%x, corr %u, samples %u, stallreason %s\n", + psRecord->sourceLocatorId, + psRecord->functionId, + psRecord->pcOffset, + psRecord->correlationId, + psRecord->samples, + getStallReasonString(psRecord->stallReason,psRecord->samples)); + break; + } + case CUPTI_ACTIVITY_KIND_PC_SAMPLING_RECORD_INFO: + { + CUpti_ActivityPCSamplingRecordInfo *pcsriResult = + (CUpti_ActivityPCSamplingRecordInfo *)(void *)record; + + printf("\n\n************** PC_SAMPLING_RECORD_SUMMARY ************************\n"); + printf("corr %u, totalSamples %llu, droppedSamples %llu, sampling period %llu\n", + pcsriResult->correlationId, + (unsigned long long)pcsriResult->totalSamples, + (unsigned long long)pcsriResult->droppedSamples, + (unsigned long long)pcsriResult->samplingPeriodInCycles); + break; + } + case CUPTI_ACTIVITY_KIND_FUNCTION: + { + CUpti_ActivityFunction *fResult = + (CUpti_ActivityFunction *)record; + + printf("\n\n************************************ ACTIVITY_KIND_FUNCTION_SUMMARY **********************************\n"); + printf("id %u, ctx %u, moduleId %u, functionIndex %u, name %s\n", + fResult->id, + fResult->contextId, + fResult->moduleId, + fResult->functionIndex, + fResult->name); + printf("\n\n\n\n**************************************************************************************************\n"); + break; + } + case CUPTI_ACTIVITY_KIND_KERNEL: + { + CUpti_ActivityKernel3 *kernel = (CUpti_ActivityKernel3 *)record; + printf("\n\n************************************** KERNEL_RECORD_SUMMARY **********************************\n"); + printf("Kernel %s , device %d, context %d, correlation %d, stream %d,[start-end][%ld-%ld]\n\n",kernel->name, + kernel->deviceId,kernel->contextId,kernel->correlationId,kernel->streamId,kernel->start,kernel->end); + break; + } + + default: + printf("\n"); + break; + } +} + +static void CUPTIAPI +bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) +{ + *size = BUF_SIZE + ALIGN_SIZE; + *buffer = (uint8_t*) calloc(1, *size); + *maxNumRecords = 0; + if (*buffer == NULL) { + printf("Error: out of memory\n"); + exit(-1); + } +} + +static void CUPTIAPI +bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + do { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if(status == CUPTI_SUCCESS) { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { + break; + } + else { + CUPTI_CALL(status); + } + } while (1); + + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + printf("\n\n\n\n\n\n"); + printf("************* STALL SUMMARY ********************\n"); + int i; + for(i=0;i<12;++i) + if(stall_name[i] != NULL) + printf("%s = %d \n",stall_name[i],val[i]); + printf("*************************************************\n\n"); + + +} + +#define DUMP_CUBIN 1 + +void CUPTIAPI dumpCudaModule(CUpti_CallbackId cbid, void *resourceDescriptor) +{ +#if DUMP_CUBIN + const char *pCubin; + size_t cubinSize; + + + //dump the cubin at MODULE_LOADED_STARTING + CUpti_ModuleResourceData *moduleResourceData = (CUpti_ModuleResourceData *)resourceDescriptor; + #endif + + if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) { + #if DUMP_CUBIN + // You can use nvdisasm to dump the SASS from the cubin. + // Try nvdisasm -b -fun sass_to_source.cubin + pCubin = moduleResourceData->pCubin; + cubinSize = moduleResourceData->cubinSize; + + FILE *cubin; + cubin = fopen("sass_source_map.cubin", "wb"); + fwrite(pCubin, sizeof(uint8_t), cubinSize, cubin); + fclose(cubin); + #endif + }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) { + // You can dump the cubin either at MODULE_LOADED or MODULE_UNLOAD_STARTING + } +} + +static void +handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData) +{ + if (cbid == CUPTI_CBID_RESOURCE_MODULE_LOADED) { + dumpCudaModule(cbid, resourceData->resourceDescriptor); + }else if (cbid == CUPTI_CBID_RESOURCE_MODULE_UNLOAD_STARTING) { + dumpCudaModule(cbid, resourceData->resourceDescriptor); + } +} + + +static void CUPTIAPI +traceCallback(void *userdata, CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, const void *cbdata) +{ + if (domain == CUPTI_CB_DOMAIN_RESOURCE) { + handleResource(cbid, (CUpti_ResourceData *)cbdata); + } +} + + +__attribute__((constructor)) void +initTrace() +{ + //get the arguments from the environment variables + int deviceId, sampRate; + + CUcontext cuCtx; + deviceId = atoi(getenv("GPU_DEVICE_ID")); + cuInit(0); + cuCtxCreate(&cuCtx,0,deviceId); + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_PC_SAMPLING)); + //CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_BRANCH)); + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL)); + CUPTI_CALL(cuptiSubscribe(&g_subscriber, (CUpti_CallbackFunc)traceCallback, NULL)); + CUPTI_CALL(cuptiEnableDomain(1, g_subscriber, CUPTI_CB_DOMAIN_RESOURCE)); + CUpti_ActivityPCSamplingConfig config; + sampRate=atoi(getenv("PC_SAMPLING_RATE")); + config.samplingPeriod= sampRate; + CUPTI_CALL(cuptiActivityConfigurePCSampling(cuCtx, &config)); +} + +__attribute__((destructor)) void +finiTrace() +{ +// printf("FLushing CUPTI \n"); + CUPTI_CALL(cuptiActivityFlushAll(0)); +} + diff --git a/src/components/cuda/sampling/gpu_activity.c b/src/components/cuda/sampling/gpu_activity.c new file mode 100644 index 0000000..1ec2355 --- /dev/null +++ b/src/components/cuda/sampling/gpu_activity.c @@ -0,0 +1,111 @@ +/* + * Author: Sangamesh Ragate + * Date : 18th Nov 2015 + * ICL-UTK + * Description : This is the parent process that Preloads the libactivity.so + * and launches the cuda application for performing PC-Sampling + */ + + +#include +#include +#include +#include + +#include + +#include "path.h" + + + + +int main(int argc,char** argv){ + + + int pid; + if(argc < 2){ + printf("please supply Cuda app to be profiled \n"); + exit(-1); + } + + + //get CUDA device ID and sampling rate from args + char *samp, *device; + extern char* optarg; + extern int optind; + int c,tmp,index; + + //set default samp and device + device="0"; + samp="5"; + index=1; + while((c=getopt(argc,argv,"d:s:"))!=-1){ + switch(c){ + case 'd': + device = optarg; + tmp=atoi(device); + index = optind; + if(tmp < 0){ + printf("GPU device ID not valid \n"); + exit(-1); + } + break; + case 's': + samp = optarg; + tmp=atoi(samp); + //record index for argument forming for cuda app + index = optind; + if(tmp < 0 || tmp > 5){ + printf("PC sampling rate not valid \n"); + exit(-1); + } + break; + case '?': + printf("Switch not recognized by papi_sampling_cuda utility \n"); + break; + } + } + + + //form the arg list for the cuda app + char** var; + var=&argv[index]; + + + char* ld_lib; + char env1[1024]; + char env2[256]; + char env3[256]; + int status; + + //get the shared library load path + strcpy(env1,"LD_LIBRARY_PATH="); + ld_lib=getenv("LD_LIBRARY_PATH"); + if(ld_lib == NULL){ + printf("Error loading CUDA shared libraries: LD_LIBRARY_PATH=NULL \n"); + exit(-1); + } + strcat(env1,ld_lib); + + strcpy(env2,"GPU_DEVICE_ID="); + strcat(env2,device); + + strcpy(env3,"PC_SAMPLING_RATE="); + strcat(env3,samp); + + //form the env variable + char* env[]={env1,env2,env3,ld_prld, NULL}; + + printf("\n\n\n\n"); + printf("***************** PAPI_SAMPLING_CUDA utility **********************\n"); + pid=fork(); + if(pid==0){ + execve(var[0],var,env); + }else if(pid==-1){ + printf("Profile fork failed \n"); + exit(-1); + }else{ + wait(&status); + } + return 0; +} diff --git a/src/components/cuda/sampling/path.h.in b/src/components/cuda/sampling/path.h.in new file mode 100644 index 0000000..13c9c5c --- /dev/null +++ b/src/components/cuda/sampling/path.h.in @@ -0,0 +1,2 @@ +char* ld_prld="LD_PRELOAD=@CURR_DIR@/sampling/libactivity.so"; + diff --git a/src/components/cuda/sampling/test/matmul.cu b/src/components/cuda/sampling/test/matmul.cu new file mode 100644 index 0000000..003004e --- /dev/null +++ b/src/components/cuda/sampling/test/matmul.cu @@ -0,0 +1,207 @@ +//This is a matrix multiplication program in CUDA without any optimizations +//like tiling, using shared memory etc + +#include +#include +#include +#include + + +__global__ void MatrixMulKernel(float* Md, float* Nd, float* Pd, int width) +{ + + //2D thread ID + int bx=blockIdx.x; + int by=blockIdx.y; + int tdx=threadIdx.x; + int tdy=threadIdx.y; + + int tx=bx*blockDim.x+tdx; + int ty=by*blockDim.y+tdy; + + //Pvalue stores the Pd element that is computed by the thread + float Pvalue=0; + for(int k=0;k>>(Md,Nd,Pd,width); + +// error=cudaDeviceSynchronize(); + error =cudaEventRecord(stop,NULL); + if(error!=cudaSuccess){ + printf("cuda event stop record failed with error=%s\n",cudaGetErrorString(error)); + exit(-1); + } + + error = cudaEventSynchronize(stop); + if(error!=cudaSuccess){ + printf("cuda event sync failed :%s\n",cudaGetErrorString(error)); + exit(-1); + } + + + + float msecTotal=0.0f; + error = cudaEventElapsedTime(&msecTotal,start,stop); + if(error!=cudaSuccess){ + printf("cuda elapsed time calculation failed \n"); + exit(-1); + } + + float msecPerMatrixMul = msecTotal; + double flopsPerMatrixMul = 2*width*width*width; + double gigaFlops=(flopsPerMatrixMul*1.0e-9f)/(msecPerMatrixMul/1000.0f); + printf("Performance= %.2f GFlop/s, Time= %.3f msec, Size= %.0f Ops, WorkgroupSize= %u threads/block\n", + gigaFlops, + msecPerMatrixMul, + flopsPerMatrixMul, + width * width); + + + + error=cudaMemcpy(P,Pd,size,cudaMemcpyDeviceToHost); + if(error!=cudaSuccess){ + printf("Device memoory copy back for Pd failed \n"); + exit(-1); + } + + printf("Very slow Host Matrix Mult \n"); + float temp; + // initialization of host data + for (int i = 0; i < width; ++i) { + for ( int j = 0; j < width; ++j) { + temp=0; + for(int k=0; k + * + * test case for Example component + * + * + * @brief + * This file is a very simple HelloWorld C example which serves (together + * with its Makefile) as a guideline on how to add tests to components. + * The papi configure and papi Makefile will take care of the compilation + * of the component tests (if all tests are added to a directory named + * 'tests' in the specific component dir). + * See components/README for more details. + * + * The string "Hello World!" is mangled and then restored. + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 +#define PAPI 1 + +// Prototypes +__global__ void helloWorld(char*); + + +// Host function +int main(int argc, char** argv) +{ +#ifdef PAPI + int retval, i; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + /* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT + FOR THE CUDA DEVICE YOU ARE RUNNING ON. + RUN papi_native_avail to get a list of CUDA events that are + supported on your machine */ + //char *EventName[] = { "PAPI_FP_OPS" }; + char const *EventName[] = { "cuda:::event:elapsed_cycles_sm:device=0" }; + int events[NUM_EVENTS]; + int eventCount = 0; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) { + if (!quiet) printf("PAPI init failed\n"); + test_fail(__FILE__,__LINE__, + "PAPI_library_init failed", 0 ); + } + + if (!quiet) { + printf( "PAPI_VERSION : %4d %6d %7d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + } + + /* convert PAPI native events to PAPI code */ + for( i = 0; i < NUM_EVENTS; i++ ){ + retval = PAPI_event_name_to_code( (char *)EventName[i], &events[i] ); + if( retval != PAPI_OK ) { + fprintf( stderr, "PAPI_event_name_to_code failed\n" ); + continue; + } + eventCount++; + if (!quiet) printf( "Name %s --- Code: %#x\n", EventName[i], events[i] ); + } + + /* if we did not find any valid events, just report test failed. */ + if (eventCount == 0) { + if (!quiet) printf( "Test FAILED: no valid events found.\n"); + test_skip(__FILE__,__LINE__,"No events found",0); + return 1; + } + + retval = PAPI_create_eventset( &EventSet ); + if( retval != PAPI_OK ) { + if (!quiet) printf( "PAPI_create_eventset failed\n" ); + test_fail(__FILE__,__LINE__,"Cannot create eventset",retval); + } + + // If multiple GPUs/contexts were being used, + // you need to switch to each device before adding its events + // e.g. cudaSetDevice( 0 ); + retval = PAPI_add_events( EventSet, events, eventCount ); + if( retval != PAPI_OK ) { + fprintf( stderr, "PAPI_add_events failed\n" ); + } + + retval = PAPI_start( EventSet ); + if( retval != PAPI_OK ) { + fprintf( stderr, "PAPI_start failed\n" ); + } +#endif + + + int j; + + // desired output + char str[] = "Hello World!"; + + // mangle contents of output + // the null character is left intact for simplicity + for(j = 0; j < 12; j++) { + str[j] -= j; + //printf("str=%s\n", str); + } + + + // allocate memory on the device + char *d_str; + size_t size = sizeof(str); + cudaMalloc((void**)&d_str, size); + + // copy the string to the device + cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice); + + // set the grid and block sizes + dim3 dimGrid(2); // one block per word + dim3 dimBlock(6); // one thread per character + + // invoke the kernel + helloWorld<<< dimGrid, dimBlock >>>(d_str); + + // retrieve the results from the device + cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost); + + // free up the allocated memory on the device + cudaFree(d_str); + + if (!quiet) printf("END: %s\n", str); + + +#ifdef PAPI + retval = PAPI_stop( EventSet, values ); + if( retval != PAPI_OK ) + fprintf( stderr, "PAPI_stop failed\n" ); + + retval = PAPI_cleanup_eventset(EventSet); + if( retval != PAPI_OK ) + fprintf(stderr, "PAPI_cleanup_eventset failed\n"); + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) + fprintf(stderr, "PAPI_destroy_eventset failed\n"); + + PAPI_shutdown(); + + for( i = 0; i < eventCount; i++ ) + if (!quiet) printf( "%12lld \t\t --> %s \n", values[i], EventName[i] ); +#endif + + test_pass(__FILE__); + + return 0; +} + + +// Device kernel +__global__ void +helloWorld(char* str) +{ + // determine where in the thread grid we are + int idx = blockIdx.x * blockDim.x + threadIdx.x; + // unmangle output + str[idx] += idx; +} + diff --git a/src/components/cuda/tests/Makefile b/src/components/cuda/tests/Makefile new file mode 100644 index 0000000..bd6f425 --- /dev/null +++ b/src/components/cuda/tests/Makefile @@ -0,0 +1,47 @@ +NAME=cuda +include ../../Makefile_comp_tests.target + +CUDA_DIR ?= /opt/cuda +CUPTI_DIR ?= $(CUDA_DIR)/extras/CUPTI +CUDRV_DIR ?= $(CUDA_DIR) + +TESTS = HelloWorld simpleMultiGPU simpleMultiGPU_no_counters +cuda_tests: $(TESTS) + +CUDA_DIR ?= $(CUDA_PATH) +NVCC = $(CUDA_DIR)/bin/nvcc +NVCFLAGS = -g -ccbin='$(CC)' +INCLUDE += -I$(CUDA_DIR)/include -I$(CUPTI_DIR)/include +CUDALIBS = -L$(CUDRV_DIR)/lib64 -L$(CUDA_DIR)/lib64 -L$(CUPTI_DIR)/lib64 -lcudart -lcupti -lcuda +PAPILIB += -L../../../libpfm4/lib -lpfm +default: $(TESTS) + +%.o:%.cu + $(NVCC) $(INCLUDE) $(NVCFLAGS) -c -o $@ $< + +HelloWorld: HelloWorld.o $(UTILOBJS) + $(NVCC) $(NVCFLAGS) -o HelloWorld HelloWorld.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) $(LDFLAGS) + +simpleMultiGPU: simpleMultiGPU.cu $(UTILOBJS) + $(NVCC) $(NVCFLAGS) $(INCLUDE) -DPAPI -g -o $@ $+ $(UTILOBJS) $(PAPILIB) $(CUDALIBS) $(LDFLAGS) + +simpleMultiGPU_no_counters: simpleMultiGPU.cu $(UTILOBJS) + $(NVCC) $(NVCFLAGS) $(INCLUDE) -DNO_COUNTERS -g -o $@ $+ $(UTILOBJS) $(PAPILIB) $(CUDALIBS) $(LDFLAGS) + +simpleMultiGPU_cupti_only: simpleMultiGPU.cu $(UTILOBJS) + $(NVCC) $(NVCFLAGS) $(INCLUDE) -DCUPTI_ONLY -g -o $@ $+ $(UTILOBJS) $(PAPILIB) $(CUDALIBS) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o cuda_ld_preload_example.so + +# Extra example to show LD_PRELOAD can be used to insert PAPI CUDA into a pre-existing binary +cuda_ld_preload_example.so: simpleMultiGPU_no_counters cuda_ld_preload_example.c + gcc -Wall -fPIC -shared -o cuda_ld_preload_example.so $(LFLAGS) $(INCLUDE) cuda_ld_preload_example.c ../../../libpapi.so + +run: + ( cd ~/icl/papi/papi/src/ && make ) + ( make HelloWorld ) + ./HelloWorld + ( cd ~/icl/papi/papi/src/ && make ) + ( make simpleMultiGPU ) + ./simpleMultiGPU diff --git a/src/components/cuda/tests/cuda_ld_preload_example.README b/src/components/cuda/tests/cuda_ld_preload_example.README new file mode 100644 index 0000000..373c0db --- /dev/null +++ b/src/components/cuda/tests/cuda_ld_preload_example.README @@ -0,0 +1,56 @@ +Example of using LD_PRELOAD with the CUDA component. +Asim YarKhan (2015) + +A short example of using LD_PRELOAD on a Linux system to intercept +function calls and PAPI-enable an un-instrumented CUDA binary. + +Several CUDA events (e.g. SM PM counters) require a CUcontext handle +to be a provided since they are context switched. This means that we +cannot use a PAPI_attach from an external process to measure those +events in a preexisting executable. These events can only be measured +from within the CUcontext, that is, within the CUDA enabled code we +are trying to measure. If the user is unable to change the source +code, they may be able to use LD_PRELOAD's ability to trap functions +and measure the events for within the executable. + +This example is designed to work with the simpleMultiGPU_no_counters +binary in the PAPI CUDA component tests directory. We use ltrace to +figure out where to attach the PAPI start, PAPI eventset management +and PAPI_stop. Please note that this is a rough example; return codes +are not be checked and other changes may be required to make sure that +the calls are intercepted at the right moment. + +First trace the library calls in simpleMultiGPU_no_counters binary +were traced using ltrace. Note in the ltrace output that the CUDA C +APIs are different from the CUDA calls visible to nvcc. Then figure +out appropriate place to attach the PAPI calls. The initialization is +attached to the first entry to cudaSetDevice. Each cudaSetDevice is +also used to setup the PAPI events for that device. It was harder to +figure out where to attach the PAPI_start. After running some tests, +I attached it to the 18th invocation of gettimeofday (kind of +arbitrary! Sorry! May need tweaking). The PAPI_stop was attached to +the first invocation of cudaFreeHost. + + +[Note: There are other events that do not require a CUcontext. The PM +counter for TEX, L2, and FB are not context switched so it would be +possible to sample these values from any context as long as the +context is on the same CUDA device. These events could be measured +using a PAPI_attach from another process using the same CUDA device.] + + +-------------------------------------------------- +How to use this example... please read carefully to make sense of the following. + +Build: +make cuda_ld_preload_example.so + +Trace the executable using ltrace to figure out where to intercept the calls: +# Do the tracing with a small example! +# ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && ltrace --output ltrace.out --library /usr/lib64/libcuda.so.1 ./simpleMultiGPU_no_counters ) +# ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ltrace ./simpleMultiGPU_no_counters ) + +Run using dynamic linking to find the correct libraries: +( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ./simpleMultiGPU_no_counters ) + +make cuda_ld_preload_example.so && ( export PAPI_DIR=`pwd`/../../.. && export LIBPFM_LIBDIR=`pwd`/../../../libpfm4/lib && export LD_LIBRARY_PATH=./:${PAPI_DIR}:${LIBPFM_LIBDIR}:${LD_LIBRARY_PATH} && LD_PRELOAD=./cuda_ld_preload_example.so ./simpleMultiGPU_no_counters ) diff --git a/src/components/cuda/tests/cuda_ld_preload_example.c b/src/components/cuda/tests/cuda_ld_preload_example.c new file mode 100644 index 0000000..f67aac7 --- /dev/null +++ b/src/components/cuda/tests/cuda_ld_preload_example.c @@ -0,0 +1,110 @@ +/* + Example of using LD_PRELOAD with the CUDA component. + Asim YarKhan + + This is designed to work with the simpleMultiGPU_no_counters binary + in the PAPI CUDA component tests directory. First trace the library + calls in simpleMultiGPU_no_counters binary using ltrace. Note in + the ltrace output that the CUDA C APIs are different from the CUDA + calls visible to nvcc. Then figure out appropriate place to attach + the PAPI calls. The initialization is attached to the first entry + to cudaSetDevice. Each cudaSetDevice is also used to setup the PAPI + events for that device. It was harder to figure out where to attach + the PAPI_start. After running some tests, I attached it to the 18th + invocation of gettimeofday (kind of arbitrary! Sorry!). The + PAPI_stop was attached to the first invocation of cudaFreeHost. + +*/ + +#define _GNU_SOURCE + +#include +#include + +#include "papi.h" + +#define MAXDEVICES 5 +int EventSet = PAPI_NULL; +int devseen[MAXDEVICES] = {0}; + +static void *dl1; +int (*PAPI_library_init_ptr)(int version); /**< initialize the PAPI library */ +int (*PAPI_create_eventset_ptr)(int *EventSet); /**< create a new empty PAPI event set */ +int (*PAPI_add_named_event_ptr)(int EventSet, char *EventName); /**< add an event by name to a PAPI event set */ +int (*PAPI_start_ptr)(int EventSet); /**< start counting hardware events in an event set */ +int (*PAPI_stop_ptr)(int EventSet, long long * values); /**< stop counting hardware events in an event set and return current events */ + + +int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1) +{ + static int onetime = 0; + int retval, retval_cudaSetDevice; + //printf("cudaSetDevice wrapper %d\n", devnum); + if ( onetime==0 ) { + onetime=1; + // Load the papi library dynamically and read the relevant functions + dl1 = dlopen( "libpapi.so", RTLD_NOW | RTLD_GLOBAL ); + if ( dl1==NULL ) printf("Intercept cudaSetDevice: Cannot load libpapi.so\n"); + PAPI_library_init_ptr = dlsym( dl1, "PAPI_library_init" ); + PAPI_create_eventset_ptr = dlsym( dl1, "PAPI_create_eventset" ); + PAPI_add_named_event_ptr = dlsym( dl1, "PAPI_add_named_event" ); + PAPI_start_ptr = dlsym( dl1, "PAPI_start" ); + PAPI_stop_ptr = dlsym( dl1, "PAPI_stop" ); + // Start using PAPI + printf("Intercept cudaSetDevice: Initializing PAPI on device %d\n", devnum); + retval = (PAPI_library_init_ptr)( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) fprintf( stdout, "PAPI_library_init failed\n" ); + printf( "PAPI version: %d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ), PAPI_VERSION_MINOR( PAPI_VERSION ), PAPI_VERSION_REVISION( PAPI_VERSION ) ); + retval = (PAPI_create_eventset_ptr)( &EventSet ); + if( retval != PAPI_OK ) fprintf( stdout, "PAPI_create_eventset failed\n" ); + } + int (*original_function)(int devnum, int n1, int n2, int n3, void *ptr1); + original_function = dlsym(RTLD_NEXT, "cudaSetDevice"); + retval_cudaSetDevice = (*original_function)( devnum, n1, n2, n3, ptr1 ); + if ( devseen[devnum]==0 ) { + devseen[devnum]=1; + char tmpEventName[120]; + printf("Intercept cudaSetDevice: Attaching events for device on device %d\n", devnum); + snprintf( tmpEventName, 110, "cuda:::device:%d:%s", devnum, "inst_executed" ); + retval = (PAPI_add_named_event_ptr)( EventSet, tmpEventName ); + if (retval!=PAPI_OK) printf( "Could not add event %s\n", tmpEventName ); + } + return retval_cudaSetDevice; +} + + +int gettimeofday(void *ptr1, void *ptr2) +{ + static int onetime = 0; + onetime++; + // printf("gettimeofday onetime %d\n", onetime); + // Use above print statement to determine that the N-th gettime of day works + if ( onetime==17 ) { + printf("Intercept gettimeofday: Attaching PAPI_start to the %d th call to gettimeofday (this may need to be adjusted)\n", onetime); + int retval = (PAPI_start_ptr)( EventSet ); + printf("Starting PAPI\n"); + if( retval!=PAPI_OK ) fprintf( stdout, "PAPI_start failed\n" ); + } + int (*original_function)(void *ptr1, void *ptr2); + original_function = dlsym(RTLD_NEXT, "gettimeofday"); + return (*original_function)(ptr1, ptr2); +} + +int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3) +{ + static int onetime = 0; + long long values[10]; + int retval, devnum; + onetime++; + if ( onetime==1 ) { + printf("Intercept cudaFreeHost: Used to get PAPI results\n" ); + retval = (PAPI_stop_ptr)( EventSet, values ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); + for( devnum = 0; devnum < MAXDEVICES && devseen[devnum]==1 ; devnum++ ) + printf( "PAPI counterValue: cuda::device:%d:%s: %12lld \n", devnum, "inst_executed", values[devnum] ); + } + int (*original_function)(void *ptr1, void *ptr2, int n1, int n2, void *ptr3); + original_function = dlsym(RTLD_NEXT, "cudaFreeHost"); + return (*original_function)(ptr1, ptr2, n1, n2, ptr3); +} + diff --git a/src/components/cuda/tests/nvlink_bandwidth.cu b/src/components/cuda/tests/nvlink_bandwidth.cu new file mode 100755 index 0000000..8397c35 --- /dev/null +++ b/src/components/cuda/tests/nvlink_bandwidth.cu @@ -0,0 +1,594 @@ +/* + * Copyright 2015-2016 NVIDIA Corporation. All rights reserved. + * + * Sample to demonstrate use of NVlink CUPTI APIs + */ + +#include +#include +#include +#include +#include + +#ifdef PAPI +#include "papi.h" +#endif + +#define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + exit(-1); \ + } \ + } while (0) + +#define DRIVER_API_CALL(apiFuncCall) \ + do { \ + CUresult _status = apiFuncCall; \ + if (_status != CUDA_SUCCESS) { \ + fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ + __FILE__, __LINE__, #apiFuncCall, _status); \ + exit(-1); \ + } \ + } while (0) + +#define RUNTIME_API_CALL(apiFuncCall) \ + do { \ + cudaError_t _status = apiFuncCall; \ + if (_status != cudaSuccess) { \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status)); \ + exit(-1); \ + } \ + } while (0) + +#define MEMORY_ALLOCATION_CALL(var) \ + do { \ + if (var == NULL) { \ + fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ + __FILE__, __LINE__); \ + exit(-1); \ + } \ + } while (0) + +#define MAX_DEVICES (32) +#define BLOCK_SIZE (1024) +#define GRID_SIZE (512) +#define BUF_SIZE (32 * 1024) +#define ALIGN_SIZE (8) +#define SUCCESS (0) +#define NUM_METRIC (4) +#define NUM_EVENTS (2) +#define MAX_SIZE (64*1024*1024) // 64 MB +#define NUM_STREAMS (6) // gp100 has 6 physical copy engines + +CUpti_ActivityNvLink *nvlinkRec = NULL; +int cpuToGpu = 0; +int gpuToGpu = 0; +int cpuToGpuAccess = 0; +int gpuToGpuAccess = 0; + +extern "C" __global__ void test_nvlink_bandwidth(float *src, float *dst) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + dst[idx] = src[idx] * 2.0f; +} + +static void printActivity(CUpti_Activity * record) +{ + if(record->kind == CUPTI_ACTIVITY_KIND_NVLINK) { + nvlinkRec = (CUpti_ActivityNvLink *) record; + // printf("typeDev0 %d, typeDev1 %d, sysmem %d, peer %d, physical links %d, portdev0 %d, %d, %d, %d, portDev1 %d, %d, %d, %d, bandwidth %llu\n", nvlinkRec->typeDev0, nvlinkRec->typeDev1, ((nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS) ? 1 : 0), ((nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS) ? 1 : 0), nvlinkRec->physicalNvLinkCount, nvlinkRec->portDev0[0], nvlinkRec->portDev0[1], nvlinkRec->portDev0[2], nvlinkRec->portDev0[3], nvlinkRec->portDev1[0], nvlinkRec->portDev1[1], nvlinkRec->portDev1[2], nvlinkRec->portDev1[3], (long long unsigned int) nvlinkRec->bandwidth); + cpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS); + gpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS); + } else { + printf("Error : Unexpected CUPTI activity kind.\nExpected Activity kind : CUPTI_ACTIVITY_KIND_NVLINK\n"); + } +} + +static void CUPTIAPI bufferRequested(uint8_t ** buffer, size_t * size, size_t * maxNumRecords) +{ + *size = BUF_SIZE + ALIGN_SIZE; + *buffer = (uint8_t *) calloc(1, *size); + MEMORY_ALLOCATION_CALL(*buffer); + *maxNumRecords = 0; +} + +static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t * buffer, size_t size, size_t validSize) +{ + CUptiResult status; + CUpti_Activity *record = NULL; + do { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if(status == CUPTI_SUCCESS) { + printActivity(record); + } else if(status == CUPTI_ERROR_MAX_LIMIT_REACHED) { + break; + } else { + CUPTI_CALL(status); + } + } while(1); + + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if(dropped != 0) { + printf("Dropped %u activity records\n", (unsigned int) dropped); + } +} + +#define DIM(x) (sizeof(x)/sizeof(*(x))) + +void calculateSize(char *result, uint64_t size) +{ + int i; + + const char *sizes[] = { "TB", "GB", "MB", "KB", "B" }; + uint64_t exbibytes = 1024ULL * 1024ULL * 1024ULL * 1024ULL; + + uint64_t multiplier = exbibytes; + + for(i = 0; (unsigned) i < DIM(sizes); i++, multiplier /= (uint64_t) 1024) { + if(size < multiplier) + continue; + sprintf(result, "%.1f %s", (float) size / multiplier, sizes[i]); + return; + } + strcpy(result, "0"); + return; +} + +void readMetricValue(CUpti_EventGroup eventGroup, uint32_t numEvents, CUdevice dev, CUpti_MetricID * metricId, uint64_t timeDuration, CUpti_MetricValue * metricValue) +{ + size_t bufferSizeBytes, numCountersRead; + uint64_t *eventValueArray = NULL; + CUpti_EventID *eventIdArray; + size_t arraySizeBytes = 0; + size_t numTotalInstancesSize = 0; + uint64_t numTotalInstances = 0; + uint64_t *aggrEventValueArray = NULL; + size_t aggrEventValueArraySize; + uint32_t i = 0, j = 0; + CUpti_EventDomainID domainId; + size_t domainSize; + + domainSize = sizeof(CUpti_EventDomainID); + + CUPTI_CALL(cuptiEventGroupGetAttribute(eventGroup, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, &domainSize, (void *) &domainId)); + + numTotalInstancesSize = sizeof(uint64_t); + + CUPTI_CALL(cuptiDeviceGetEventDomainAttribute(dev, domainId, CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, &numTotalInstancesSize, (void *) &numTotalInstances)); + + arraySizeBytes = sizeof(CUpti_EventID) * numEvents; + bufferSizeBytes = sizeof(uint64_t) * numEvents * numTotalInstances; + + eventValueArray = (uint64_t *) malloc(bufferSizeBytes); + MEMORY_ALLOCATION_CALL(eventValueArray); + + eventIdArray = (CUpti_EventID *) malloc(arraySizeBytes); + MEMORY_ALLOCATION_CALL(eventIdArray); + + aggrEventValueArray = (uint64_t *) calloc(numEvents, sizeof(uint64_t)); + MEMORY_ALLOCATION_CALL(aggrEventValueArray); + + aggrEventValueArraySize = sizeof(uint64_t) * numEvents; + + CUPTI_CALL(cuptiEventGroupReadAllEvents(eventGroup, CUPTI_EVENT_READ_FLAG_NONE, &bufferSizeBytes, eventValueArray, &arraySizeBytes, eventIdArray, &numCountersRead)); + + for(i = 0; i < numEvents; i++) { + for(j = 0; j < numTotalInstances; j++) { + aggrEventValueArray[i] += eventValueArray[i + numEvents * j]; + printf("For event %d instance %d value %ul aggregate %d = %ul\n", i, j, eventValueArray[i + numEvents * j], i, aggrEventValueArray[i]); + } + } + + for(i = 0; i < NUM_METRIC; i++) { + CUPTI_CALL(cuptiMetricGetValue(dev, metricId[i], arraySizeBytes, eventIdArray, aggrEventValueArraySize, aggrEventValueArray, timeDuration, &metricValue[i])); + } + + free(eventValueArray); + free(eventIdArray); +} + +// Print metric value, we format based on the value kind +int printMetricValue(CUpti_MetricID metricId, CUpti_MetricValue metricValue, const char *metricName) +{ + + CUpti_MetricValueKind valueKind; + char str[64]; + size_t valueKindSize = sizeof(valueKind); + + CUPTI_CALL(cuptiMetricGetAttribute(metricId, CUPTI_METRIC_ATTR_VALUE_KIND, &valueKindSize, &valueKind)); + switch (valueKind) { + + case CUPTI_METRIC_VALUE_KIND_DOUBLE: + printf("%s = %f ", metricName, metricValue.metricValueDouble); + calculateSize(str, (uint64_t) metricValue.metricValueDouble); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_UINT64: + printf("%s = %lu ", metricName, metricValue.metricValueUint64); + calculateSize(str, (uint64_t) metricValue.metricValueUint64); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_INT64: + printf("%s = %ld ", metricName, metricValue.metricValueInt64); + calculateSize(str, (uint64_t) metricValue.metricValueInt64); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_THROUGHPUT: + printf("%s = %f ", metricName, metricValue.metricValueThroughput); + calculateSize(str, (uint64_t) metricValue.metricValueThroughput); + printf("%s/Sec\n", str); + break; + + default: + fprintf(stderr, "error: unknown value kind\n"); + return -1; + } + return 0; +} + +void testCpuToGpu(CUpti_EventGroup * eventGroup, CUdeviceptr * pDevBuffer, float **pHostBuffer, size_t bufferSize, cudaStream_t * cudaStreams, uint64_t * timeDuration, int numEventGroup) +{ + int i; + +#ifdef CUPTI_ONLY + uint32_t value = 1; + uint64_t startTimestamp, endTimestamp; + for(i = 0; i < numEventGroup; i++) { + CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), (void *) &value)); + } + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +#endif // CUPTI_ONLY + + // Unidirectional copy H2D + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + // Unidirectional copy D2H + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i], (void *) pDevBuffer[i], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + // Bidirectional copy + for(i = 0; i < NUM_STREAMS; i += 2) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i + 1], (void *) pDevBuffer[i + 1], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i + 1])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + +#ifdef CUPTI_ONLY + CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); + *timeDuration = endTimestamp - startTimestamp; +#endif // CUPTI_ONLY +} + +void testGpuToGpu_part1(CUpti_EventGroup * eventGroup, CUdeviceptr * pDevBuffer0, CUdeviceptr * pDevBuffer1, float **pHostBuffer, size_t bufferSize, cudaStream_t * cudaStreams, uint64_t * timeDuration, int numEventGroup) +{ + int i; + +#ifdef CUPTI_ONLY + uint32_t value = 1; + uint64_t startTimestamp, endTimestamp; + for(i = 0; i < numEventGroup; i++) { + CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), (void *) &value)); + } +#endif // CUPTI_ONLY + + RUNTIME_API_CALL(cudaSetDevice(0)); + RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(1, 0)); + RUNTIME_API_CALL(cudaSetDevice(1)); + RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(0, 0)); + + // Unidirectional copy H2D + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer0[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer1[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); +} + +void testGpuToGpu_part2(CUpti_EventGroup * eventGroup, CUdeviceptr * pDevBuffer0, CUdeviceptr * pDevBuffer1, float **pHostBuffer, size_t bufferSize, cudaStream_t * cudaStreams, uint64_t * timeDuration, int numEventGroup) +{ + int i; + +#ifdef CUPTI_ONLY + uint32_t value = 1; + uint64_t startTimestamp, endTimestamp; + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); +#endif + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer0[i], (void *) pDevBuffer1[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); + //printf("Copy %zu stream %d to devBuffer0 from devBuffer1 \n", bufferSize, i); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *) pDevBuffer1[i], (void *) pDevBuffer0[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); + // printf("Copy %zu stream %d to devBuffer0 from devBuffer1 \n", bufferSize, i); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for(i = 0; i < NUM_STREAMS; i++) { + test_nvlink_bandwidth <<< GRID_SIZE, BLOCK_SIZE >>> ((float *) pDevBuffer1[i], (float *) pDevBuffer0[i]); + // printf("test_nvlink_bandwidth stream %d \n", i); + } + +#ifdef CUPTI_ONLY + CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); + *timeDuration = endTimestamp - startTimestamp; +#endif // CUPTI_ONLY +} + + +static void printUsage() +{ + printf("usage: Demonstrate use of NVlink CUPTI APIs\n"); + printf(" -help : display help message\n"); + printf(" --cpu-to-gpu : Show results for data transfer between CPU and GPU \n"); + printf(" --gpu-to-gpu : Show results for data transfer between two GPUs \n"); +} + +void parseCommandLineArgs(int argc, char *argv[]) +{ + if(argc != 2) { + printf("Invalid number of options\n"); + exit(0); + } + + if(strcmp(argv[1], "--cpu-to-gpu") == 0) { + cpuToGpu = 1; + } else if(strcmp(argv[1], "--gpu-to-gpu") == 0) { + gpuToGpu = 1; + } else if((strcmp(argv[1], "--help") == 0) || (strcmp(argv[1], "-help") == 0) || (strcmp(argv[1], "-h") == 0)) { + printUsage(); + exit(0); + } else { + cpuToGpu = 1; + } +} + +int main(int argc, char *argv[]) +{ + int deviceCount = 0, i = 0, numEventGroup = 0; + size_t bufferSize = 0, freeMemory = 0, totalMemory = 0; + CUcontext ctx; + char str[64]; + + CUdeviceptr pDevBuffer0[NUM_STREAMS]; + CUdeviceptr pDevBuffer1[NUM_STREAMS]; + float *pHostBuffer[NUM_STREAMS]; + + cudaStream_t cudaStreams[NUM_STREAMS] = { 0 }; + cudaDeviceProp prop[MAX_DEVICES]; + uint64_t timeDuration; + CUpti_EventGroup eventGroup[32]; + +#ifdef CUPTI_ONLY + CUpti_MetricID metricId[NUM_METRIC]; + uint32_t numEvents[NUM_METRIC]; + CUpti_MetricValue metricValue[NUM_METRIC]; + + // Adding nvlink Metrics. + const char *metricName[NUM_METRIC] = { + "nvlink_total_data_transmitted", + "nvlink_total_data_received", + "nvlink_transmit_throughput", + "nvlink_receive_throughput" + }; +#endif // CUPTI_ONLY + + // Parse command line arguments + parseCommandLineArgs(argc, argv); + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NVLINK)); + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + DRIVER_API_CALL(cuInit(0)); + RUNTIME_API_CALL(cudaGetDeviceCount(&deviceCount)); + printf("There are %d devices.\n", deviceCount); + + if(deviceCount == 0) { + printf("There is no device supporting CUDA.\n"); + exit(-1); + } + + for(i = 0; i < deviceCount; i++) { + RUNTIME_API_CALL(cudaGetDeviceProperties(&prop[i], i)); + printf("CUDA Device %d Name: %s\n", i, prop[i].name); + } + + // Set memcpy size based on available device memory + RUNTIME_API_CALL(cudaMemGetInfo(&freeMemory, &totalMemory)); + printf("Total Device Memory available : "); + calculateSize(str, (uint64_t) totalMemory); + printf("%s\n", str); + + bufferSize = MAX_SIZE < (freeMemory / 4) ? MAX_SIZE : (freeMemory / 4); + bufferSize = bufferSize/2; + printf("Memcpy size is set to %llu B (%llu MB)\n", (unsigned long long) bufferSize, (unsigned long long) bufferSize / (1024 * 1024)); + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaStreamCreate(&cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + // Nvlink-topology Records are generated even before cudaMemcpy API is called. + CUPTI_CALL(cuptiActivityFlushAll(0)); + + // Transfer Data between Host And Device, if Nvlink is Present + // Check condition : nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS + // True : Nvlink is present between CPU & GPU + // False : Nvlink is not present. + if((nvlinkRec) && (((cpuToGpu) && (cpuToGpuAccess)) || ((gpuToGpu) && (gpuToGpuAccess)))) { + +#ifdef CUPTI_ONLY + for(i = 0; i < NUM_METRIC; i++) { + CUPTI_CALL(cuptiMetricGetIdFromName(0, metricName[i], &metricId[i])); + CUPTI_CALL(cuptiMetricGetNumEvents(metricId[i], &numEvents[i])); + } +#endif // CUPTI_ONLY + + DRIVER_API_CALL(cuCtxCreate(&ctx, 0, 0)); + +#ifdef PAPI + printf("Setup PAPI counters internally (PAPI)\n"); + int EventSet = PAPI_NULL; + long long values[MAX_DEVICES * NUM_METRIC]; + char *EventName[MAX_DEVICES * NUM_METRIC]; + int eventCount; + int retval, ee; + + /* PAPI Initialization */ + retval = PAPI_library_init(PAPI_VER_CURRENT); + if(retval != PAPI_VER_CURRENT) fprintf(stderr, "PAPI_library_init failed\n"); + printf("PAPI version: %d.%d.%d\n", PAPI_VERSION_MAJOR(PAPI_VERSION), PAPI_VERSION_MINOR(PAPI_VERSION), PAPI_VERSION_REVISION(PAPI_VERSION)); + + retval = PAPI_create_eventset(&EventSet); + if(retval != PAPI_OK) fprintf(stderr, "PAPI_create_eventset failed\n"); + + const char *EventEndings[NUM_METRIC] = { + "cuda:::metric:nvlink_total_data_transmitted", + "cuda:::metric:nvlink_total_data_received", + "cuda:::metric:nvlink_transmit_throughput", + "cuda:::metric:nvlink_receive_throughput", + }; + + // Add events at a GPU specific level ... eg cuda:::metric:nvlink_total_data_transmitted:device=0 + // Just profile devices to match the CUPTI example + char tmpEventName[1024]; + eventCount = 0; + for(i = 0; i < 1; i++) { // only profile device 0 + printf("Set device to %d\n", i); + for(ee = 0; ee < NUM_METRIC; ee++) { + snprintf(tmpEventName, 1024, "%s:device=%d\0", EventEndings[ee], i); + printf("Trying to add event %s to GPU %d in PAPI...", tmpEventName, i); + retval = PAPI_add_named_event(EventSet, tmpEventName); + if(retval == PAPI_OK) { + printf("Added event\n"); + EventName[eventCount] = strdup(tmpEventName); + eventCount++; + } else { + printf("Could not add event\n"); + } + } + } + for(i = 0; i < eventCount; i++) + values[i] = -1; +#endif // PAPI_ONLY + + +#ifdef CUPTI_ONLY + CUpti_EventGroupSets *passes = NULL; + int j = 0; + CUPTI_CALL(cuptiMetricCreateEventGroupSets(ctx, (sizeof metricId), metricId, &passes)); + // EventGroups required to profile Nvlink metrics. + for(i = 0; i < (signed) passes->numSets; i++) { + for(j = 0; j < (signed) passes->sets[i].numEventGroups; j++) { + eventGroup[numEventGroup] = passes->sets[i].eventGroups[j]; + if(!eventGroup[numEventGroup]) { + printf("\n eventGroup initialization failed \n"); + exit(-1); + } + numEventGroup++; + } + } + CUPTI_CALL(cuptiSetEventCollectionMode(ctx, CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS)); +#endif // CUPTI_ONLY + + // ===== Allocate Memory ===================================== + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMalloc((void **) &pDevBuffer0[i], bufferSize)); + + pHostBuffer[i] = (float *) malloc(bufferSize); + MEMORY_ALLOCATION_CALL(pHostBuffer[i]); + } + + if(cpuToGpu) { +#ifdef PAPI + retval = PAPI_start( EventSet ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_start failed\n" ); +#endif// PAPI + testCpuToGpu(eventGroup, pDevBuffer0, pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); +#ifdef PAPI + retval = PAPI_stop(EventSet, values); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); +#endif + printf("Data tranferred between CPU & Device%d : \n", (int) nvlinkRec->typeDev0); + + + } else if(gpuToGpu) { + RUNTIME_API_CALL(cudaSetDevice(1)); + for(i = 0; i < NUM_STREAMS; i++) + RUNTIME_API_CALL(cudaMalloc((void **) &pDevBuffer1[i], bufferSize)); + testGpuToGpu_part1(eventGroup, pDevBuffer0, pDevBuffer1, pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); +#ifdef PAPI + retval = PAPI_start( EventSet ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_start failed\n" ); +#endif + testGpuToGpu_part2(eventGroup, pDevBuffer0, pDevBuffer1, pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); +#ifdef PAPI + retval = PAPI_stop(EventSet, values); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); +#endif + printf("Data tranferred between Device 0 & Device 1 : \n"); + } + +#ifdef CUPTI_ONLY + // Collect Nvlink Metric values for the data transfer via Nvlink for all the eventGroups. + for(i = 0; i < numEventGroup; i++) { + readMetricValue(eventGroup[i], NUM_EVENTS, 0, metricId, timeDuration, metricValue); + + CUPTI_CALL(cuptiEventGroupDisable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupDestroy(eventGroup[i])); + + for(i = 0; i < NUM_METRIC; i++) { + if(printMetricValue(metricId[i], metricValue[i], metricName[i]) != 0) { + printf("\n printMetricValue failed \n"); + exit(-1); + } + } + } +#endif // CUPTI_ONLY + +#ifdef PAPI + for(i = 0; i < eventCount; i++) { + char str[64]; + calculateSize(str, (uint64_t) values[i] ); + printf("PAPI %s %s \n", EventName[i], str); + } + retval = PAPI_cleanup_eventset(EventSet); + if( retval != PAPI_OK ) + fprintf(stderr, "PAPI_cleanup_eventset failed\n"); + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) + fprintf(stderr, "PAPI_destroy_eventset failed\n"); + PAPI_shutdown(); +#endif + + } else { + printf("No Nvlink supported device found\n"); + } + + printf("Exit\n"); + return 0; +} diff --git a/src/components/cuda/tests/nvlink_bandwidth_cupti_only.cu b/src/components/cuda/tests/nvlink_bandwidth_cupti_only.cu new file mode 100755 index 0000000..840a54b --- /dev/null +++ b/src/components/cuda/tests/nvlink_bandwidth_cupti_only.cu @@ -0,0 +1,541 @@ + /* + * Copyright 2015-2016 NVIDIA Corporation. All rights reserved. + * + * Sample to demonstrate use of NVlink CUPTI APIs + */ + + #include + #include + #include + #include + #include + + #define CUPTI_CALL(call) \ + do { \ + CUptiResult _status = call; \ + if (_status != CUPTI_SUCCESS) { \ + const char *errstr; \ + cuptiGetResultString(_status, &errstr); \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #call, errstr); \ + exit(-1); \ + } \ + } while (0) + + #define DRIVER_API_CALL(apiFuncCall) \ + do { \ + CUresult _status = apiFuncCall; \ + if (_status != CUDA_SUCCESS) { \ + fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \ + __FILE__, __LINE__, #apiFuncCall, _status); \ + exit(-1); \ + } \ + } while (0) + + #define RUNTIME_API_CALL(apiFuncCall) \ + do { \ + cudaError_t _status = apiFuncCall; \ + if (_status != cudaSuccess) { \ + fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ + __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));\ + exit(-1); \ + } \ + } while (0) + + #define MEMORY_ALLOCATION_CALL(var) \ + do { \ + if (var == NULL) { \ + fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n", \ + __FILE__, __LINE__); \ + exit(-1); \ + } \ + } while (0) + + #define MAX_DEVICES (32) + #define BLOCK_SIZE (1024) + #define GRID_SIZE (512) + #define BUF_SIZE (32 * 1024) + #define ALIGN_SIZE (8) + #define SUCCESS (0) + #define NUM_METRIC (4) + #define NUM_EVENTS (2) + #define MAX_SIZE (64*1024*1024) // 64 MB + #define NUM_STREAMS (6) // gp100 has 6 physical copy engines + + CUpti_ActivityNvLink *nvlinkRec = NULL; + int cpuToGpu = 0; + int gpuToGpu = 0; + int cpuToGpuAccess = 0; + int gpuToGpuAccess = 0; + + extern "C" __global__ void test_nvlink_bandwidth(float *src, float *dst) + { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + dst[idx] = src[idx] * 2.0f; + } + + static void printActivity(CUpti_Activity *record) + { + if (record->kind == CUPTI_ACTIVITY_KIND_NVLINK) { + nvlinkRec = (CUpti_ActivityNvLink *)record; + + printf("typeDev0 %d, typeDev1 %d, sysmem %d, peer %d, physical links %d, portdev0 %d, %d, %d, %d, portDev1 %d, %d, %d, %d, bandwidth %llu\n", + nvlinkRec->typeDev0, + nvlinkRec->typeDev1, + ((nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS) ? 1 : 0), + ((nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS) ? 1 : 0), + nvlinkRec->physicalNvLinkCount, + nvlinkRec->portDev0[0], nvlinkRec->portDev0[1], nvlinkRec->portDev0[2], nvlinkRec->portDev0[3], + nvlinkRec->portDev1[0], nvlinkRec->portDev1[1], nvlinkRec->portDev1[2], nvlinkRec->portDev1[3], + (long long unsigned int)nvlinkRec->bandwidth); + cpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS); + gpuToGpuAccess |= (nvlinkRec->flag & CUPTI_LINK_FLAG_PEER_ACCESS); + } + else { + printf("Error : Unexpected CUPTI activity kind.\nExpected Activity kind : CUPTI_ACTIVITY_KIND_NVLINK\n"); + } + } + + static void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) + + { + *size = BUF_SIZE + ALIGN_SIZE; + *buffer = (uint8_t*) calloc(1, *size); + MEMORY_ALLOCATION_CALL(*buffer); + *maxNumRecords = 0; + } + + static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, + uint8_t *buffer, size_t size, + size_t validSize) + { + CUptiResult status; + CUpti_Activity *record = NULL; + do { + status = cuptiActivityGetNextRecord(buffer, validSize, &record); + if(status == CUPTI_SUCCESS) { + printActivity(record); + } + else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { + break; + } + else { + CUPTI_CALL(status); + } + } while (1); + + size_t dropped; + CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + if (dropped != 0) { + printf("Dropped %u activity records\n", (unsigned int)dropped); + } + } + + #define DIM(x) (sizeof(x)/sizeof(*(x))) + + void calculateSize(char *result, uint64_t size) + { + int i; + + const char *sizes[] = { "TB", "GB", "MB", "KB", "B" }; + uint64_t exbibytes = 1024ULL * 1024ULL * 1024ULL * 1024ULL; + + uint64_t multiplier = exbibytes; + + for (i = 0; (unsigned)i < DIM(sizes); i++, multiplier /= (uint64_t)1024) + { + if (size < multiplier) + continue; + sprintf(result, "%.1f %s", (float) size / multiplier, sizes[i]); + return; + } + strcpy(result, "0"); + return; + } + + void readMetricValue(CUpti_EventGroup eventGroup, uint32_t numEvents, + CUdevice dev, CUpti_MetricID *metricId, + uint64_t timeDuration, + CUpti_MetricValue *metricValue) { + + size_t bufferSizeBytes, numCountersRead; + uint64_t *eventValueArray = NULL; + CUpti_EventID *eventIdArray; + size_t arraySizeBytes = 0; + size_t numTotalInstancesSize = 0; + uint64_t numTotalInstances = 0; + uint64_t *aggrEventValueArray = NULL; + size_t aggrEventValueArraySize; + uint32_t i = 0, j = 0; + CUpti_EventDomainID domainId; + size_t domainSize; + + domainSize = sizeof(CUpti_EventDomainID); + + CUPTI_CALL(cuptiEventGroupGetAttribute(eventGroup, + CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, + &domainSize, + (void *)&domainId)); + + numTotalInstancesSize = sizeof(uint64_t); + + CUPTI_CALL(cuptiDeviceGetEventDomainAttribute(dev, + domainId, + CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, + &numTotalInstancesSize, + (void *)&numTotalInstances)); + + arraySizeBytes = sizeof(CUpti_EventID) * numEvents; + bufferSizeBytes = sizeof(uint64_t) * numEvents * numTotalInstances; + + eventValueArray = (uint64_t *) malloc(bufferSizeBytes); + MEMORY_ALLOCATION_CALL(eventValueArray); + + eventIdArray = (CUpti_EventID *) malloc(arraySizeBytes); + MEMORY_ALLOCATION_CALL(eventIdArray); + + aggrEventValueArray = (uint64_t *) calloc(numEvents, sizeof(uint64_t)); + MEMORY_ALLOCATION_CALL(aggrEventValueArray); + + aggrEventValueArraySize = sizeof(uint64_t) * numEvents; + + CUPTI_CALL(cuptiEventGroupReadAllEvents(eventGroup, + CUPTI_EVENT_READ_FLAG_NONE, + &bufferSizeBytes, + eventValueArray, + &arraySizeBytes, + eventIdArray, + &numCountersRead)); + + for (i = 0; i < numEvents; i++) { + for (j = 0; j < numTotalInstances; j++) { + aggrEventValueArray[i] += eventValueArray[i + numEvents * j]; + //printf("For event %d (id %d) instance %d value %ul aggregate %d = %ul\n", i, eventIdArray[i], j, eventValueArray[i + numEvents * j], i, aggrEventValueArray[i]); + } + } + + for (i = 0; i < NUM_METRIC; i++) { + CUPTI_CALL(cuptiMetricGetValue(dev, metricId[i], arraySizeBytes, + eventIdArray, aggrEventValueArraySize, + aggrEventValueArray, timeDuration, + &metricValue[i])); + } + + free(eventValueArray); + free(eventIdArray); + } + + // Print metric value, we format based on the value kind + int printMetricValue(CUpti_MetricID metricId, CUpti_MetricValue metricValue, const char *metricName, uint64_t timeDuration) { + + CUpti_MetricValueKind valueKind; + char str[64]; + size_t valueKindSize = sizeof(valueKind); + + CUPTI_CALL(cuptiMetricGetAttribute(metricId, CUPTI_METRIC_ATTR_VALUE_KIND, + &valueKindSize, &valueKind)); + switch (valueKind) { + + case CUPTI_METRIC_VALUE_KIND_DOUBLE: + printf("%s = ", metricName); + calculateSize(str, (uint64_t)metricValue.metricValueDouble); + // printf("%s (val %lu %lu nsec)\n", str, metricValue.metricValueUint64, timeDuration); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_UINT64: + printf("%s = ", metricName); + calculateSize(str, (uint64_t)metricValue.metricValueUint64); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_INT64: + printf("%s = ", metricName); + calculateSize(str, (uint64_t)metricValue.metricValueInt64); + printf("%s\n", str); + break; + + case CUPTI_METRIC_VALUE_KIND_THROUGHPUT: + printf("%s = ", metricName); + calculateSize(str, (uint64_t)metricValue.metricValueThroughput); + printf("%s\n", str); + break; + + default: + fprintf(stderr, "error: unknown value kind\n"); + return -1; + } + return 0; + } + + void testCpuToGpu(CUpti_EventGroup *eventGroup, CUdeviceptr *pDevBuffer, + float** pHostBuffer, size_t bufferSize, + cudaStream_t *cudaStreams, + uint64_t *timeDuration, int numEventGroup) + { + int i; + uint32_t value = 1; + uint64_t startTimestamp, endTimestamp; + + for (i = 0; i < numEventGroup; i++) { + CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], + CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, + sizeof(uint32_t), (void*)&value)); + } + + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); + + //Unidirectional copy H2D + for (i = 0; i < NUM_STREAMS; i++) + { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + //Unidirectional copy D2H + for (i = 0; i < NUM_STREAMS; i++) + { + RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i], (void *)pDevBuffer[i], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i]));} + + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + //Bidirectional copy + for (i = 0; i < NUM_STREAMS; i+=2) + { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + RUNTIME_API_CALL(cudaMemcpyAsync(pHostBuffer[i+1], (void *)pDevBuffer[i+1], bufferSize, cudaMemcpyDeviceToHost, cudaStreams[i+1])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); + *timeDuration = endTimestamp - startTimestamp; + } + + void testGpuToGpu(CUpti_EventGroup *eventGroup, CUdeviceptr *pDevBuffer0, CUdeviceptr *pDevBuffer1, + float** pHostBuffer, size_t bufferSize, + cudaStream_t *cudaStreams, + uint64_t *timeDuration, int numEventGroup) + { + int i; + uint32_t value = 1; + uint64_t startTimestamp, endTimestamp; + + + RUNTIME_API_CALL(cudaSetDevice(0)); + RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(1, 0)); + RUNTIME_API_CALL(cudaSetDevice(1)); + RUNTIME_API_CALL(cudaDeviceEnablePeerAccess(0, 0)); + + //Unidirectional copy H2D + for (i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer0[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for (i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer1[i], pHostBuffer[i], bufferSize, cudaMemcpyHostToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + + for (i = 0; i < numEventGroup; i++) { + printf("cuptiEventGroupEnable(eventGroup[%d])\n", i); + CUPTI_CALL(cuptiEventGroupEnable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupSetAttribute(eventGroup[i], + CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, + sizeof(uint32_t), (void*)&value)); + } + CUPTI_CALL(cuptiGetTimestamp(&startTimestamp)); + + for (i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer0[i], (void *)pDevBuffer1[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for (i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMemcpyAsync((void *)pDevBuffer1[i], (void *)pDevBuffer0[i], bufferSize, cudaMemcpyDeviceToDevice, cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + for (i = 0; i < NUM_STREAMS; i++) { + test_nvlink_bandwidth<<>>((float*)pDevBuffer1[i], (float*)pDevBuffer0[i]); + } + + CUPTI_CALL(cuptiGetTimestamp(&endTimestamp)); + *timeDuration = endTimestamp - startTimestamp; + } + + static void printUsage() { + printf("usage: Demonstrate use of NVlink CUPTI APIs\n"); + printf(" -help : display help message\n"); + printf(" --cpu-to-gpu : Show results for data transfer between CPU and GPU \n"); + printf(" --gpu-to-gpu : Show results for data transfer between two GPUs \n"); + } + + void parseCommandLineArgs(int argc, char *argv[]) + { + if (argc != 2) { + printf("Invalid number of options\n"); + exit(0); + } + + if (strcmp(argv[1], "--cpu-to-gpu") == 0) { + cpuToGpu = 1; + } + else if (strcmp(argv[1], "--gpu-to-gpu") == 0) { + gpuToGpu = 1; + } + else if ((strcmp(argv[1], "--help") == 0) || + (strcmp(argv[1], "-help") == 0) || + (strcmp(argv[1], "-h") == 0)) { + printUsage(); + exit(0); + } + else { + cpuToGpu = 1; + } + } + + int main(int argc, char *argv[]) + { + int deviceCount = 0, i = 0, j = 0, numEventGroup = 0; + size_t bufferSize = 0, freeMemory = 0, totalMemory = 0; + CUpti_EventGroupSets *passes = NULL; + CUcontext ctx; + char str[64]; + + CUdeviceptr pDevBuffer0[NUM_STREAMS]; + CUdeviceptr pDevBuffer1[NUM_STREAMS]; + float* pHostBuffer[NUM_STREAMS]; + + cudaStream_t cudaStreams[NUM_STREAMS] = {0}; + + CUpti_EventGroup eventGroup[32]; + CUpti_MetricID metricId[NUM_METRIC]; + uint32_t numEvents[NUM_METRIC]; + CUpti_MetricValue metricValue[NUM_METRIC]; + cudaDeviceProp prop[MAX_DEVICES]; + uint64_t timeDuration; + + // Adding nvlink Metrics. + const char *metricName[NUM_METRIC] = {"nvlink_total_data_transmitted", + "nvlink_total_data_received", + "nvlink_transmit_throughput", + "nvlink_receive_throughput"}; + + // Parse command line arguments + parseCommandLineArgs(argc, argv); + + CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NVLINK)); + CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted)); + + DRIVER_API_CALL(cuInit(0)); + + RUNTIME_API_CALL(cudaGetDeviceCount(&deviceCount)); + + printf("There are %d devices.\n", deviceCount); + + if (deviceCount == 0) { + printf("There is no device supporting CUDA.\n"); + exit(-1); + } + + for (i = 0; i < deviceCount; i++) { + RUNTIME_API_CALL(cudaGetDeviceProperties(&prop[i], i)); + printf("CUDA Device %d Name: %s\n", i, prop[i].name); + } + + // Set memcpy size based on available device memory + RUNTIME_API_CALL(cudaMemGetInfo(&freeMemory, &totalMemory)); + bufferSize = MAX_SIZE < (freeMemory/4) ? MAX_SIZE : (freeMemory/4); + + printf("Total Device Memory available : "); + calculateSize(str, (uint64_t)totalMemory); + printf("%s\n", str); + + printf("Memcpy size is set to %llu B (%llu MB)\n", + (unsigned long long)bufferSize, (unsigned long long)bufferSize/(1024*1024)); + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaStreamCreate(&cudaStreams[i])); + } + RUNTIME_API_CALL(cudaDeviceSynchronize()); + + // Nvlink-topology Records are generated even before cudaMemcpy API is called. + CUPTI_CALL(cuptiActivityFlushAll(0)); + + // Transfer Data between Host And Device, if Nvlink is Present + // Check condition : nvlinkRec->flag & CUPTI_LINK_FLAG_SYSMEM_ACCESS + // True : Nvlink is present between CPU & GPU + // False : Nvlink is not present. + if ((nvlinkRec) && (((cpuToGpu) && (cpuToGpuAccess)) || ((gpuToGpu) && (gpuToGpuAccess)))) { + + for (i = 0; i < NUM_METRIC; i++) { + CUPTI_CALL(cuptiMetricGetIdFromName(0, metricName[i], &metricId[i])); + CUPTI_CALL(cuptiMetricGetNumEvents(metricId[i], &numEvents[i])); + } + + DRIVER_API_CALL(cuCtxCreate(&ctx, 0, 0)); + + CUPTI_CALL(cuptiMetricCreateEventGroupSets(ctx, (sizeof metricId) ,metricId, &passes)); + + // EventGroups required to profile Nvlink metrics. + for (i = 0; i < (signed)passes->numSets; i++) { + for (j = 0; j < (signed)passes->sets[i].numEventGroups; j++) { + eventGroup[numEventGroup] = passes->sets[i].eventGroups[j]; + + if (!eventGroup[numEventGroup]) { + printf("\n eventGroup initialization failed \n"); + exit(-1); + } + + numEventGroup++; + } + } + + CUPTI_CALL(cuptiSetEventCollectionMode(ctx, CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS)); + + // ===== Allocate Memory ===================================== + + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMalloc((void**)&pDevBuffer0[i], bufferSize)); + + pHostBuffer[i] = (float *)malloc(bufferSize); + MEMORY_ALLOCATION_CALL(pHostBuffer[i]); + } + + if (cpuToGpu) { + testCpuToGpu(eventGroup, pDevBuffer0, pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); + printf("Data tranferred between CPU & Device%d : \n", (int)nvlinkRec->typeDev0); + } + else if(gpuToGpu) { + RUNTIME_API_CALL(cudaSetDevice(1)); + for(i = 0; i < NUM_STREAMS; i++) { + RUNTIME_API_CALL(cudaMalloc((void**)&pDevBuffer1[i], bufferSize)); + } + testGpuToGpu(eventGroup, pDevBuffer0, pDevBuffer1,pHostBuffer, bufferSize, cudaStreams, &timeDuration, numEventGroup); + printf("Data tranferred between Device 0 & Device 1 : \n"); + } + + // Collect Nvlink Metric values for the data transfer via Nvlink for all the eventGroups. + for (i = 0; i < numEventGroup; i++) { + readMetricValue(eventGroup[i], NUM_EVENTS, 0, metricId, timeDuration, metricValue); + + CUPTI_CALL(cuptiEventGroupDisable(eventGroup[i])); + CUPTI_CALL(cuptiEventGroupDestroy(eventGroup[i])); + + for (i = 0; i < NUM_METRIC; i++) { + if (printMetricValue(metricId[i], metricValue[i], metricName[i], timeDuration) != 0) { + printf("\n printMetricValue failed \n"); + exit(-1); + } + } + } + } + else { + printf("No Nvlink supported device found\n"); + } + + return 0; +} diff --git a/src/components/cuda/tests/simpleMultiGPU.cu b/src/components/cuda/tests/simpleMultiGPU.cu new file mode 100644 index 0000000..f13f754 --- /dev/null +++ b/src/components/cuda/tests/simpleMultiGPU.cu @@ -0,0 +1,374 @@ +/* PAPI Multiple GPU example. This example is taken from the NVIDIA + * documentation (Copyright 1993-2013 NVIDIA Corporation) and has been + * adapted to show the use of CUPTI and PAPI in collecting event + * counters for multiple GPU contexts. PAPI Team (2015) + */ + +/* + * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +/* + * This application demonstrates how to use the CUDA API to use multiple GPUs, + * with an emphasis on simple illustration of the techniques (not on performance). + * + * Note that in order to detect multiple GPUs in your system you have to disable + * SLI in the nvidia control panel. Otherwise only one GPU is visible to the + * application. On the other side, you can still extend your desktop to screens + * attached to both GPUs. + */ + +// System includes +#include +#include + +// CUDA runtime +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#if not defined PAPI +#undef PAPI +#endif + +#if not defined CUPTI_ONLY +#undef CUPTI_ONLY +#endif + +#ifndef MAX +#define MAX(a,b) (a > b ? a : b) +#endif + +#include "simpleMultiGPU.h" + +// ////////////////////////////////////////////////////////////////////////////// +// Data configuration +// ////////////////////////////////////////////////////////////////////////////// +const int MAX_GPU_COUNT = 32; +const int DATA_N = 48576 * 32; +#ifdef PAPI +const int MAX_NUM_EVENTS = 32; +#endif + +#define CHECK_CU_ERROR(err, cufunc) \ + if (err != CUDA_SUCCESS) { printf ("Error %d for CUDA Driver API function '%s'\n", err, cufunc); return -1; } + +#define CHECK_CUDA_ERROR(err) \ + if (err != cudaSuccess) { printf ("Error %d for CUDA \n", err ); return -1; } + +#define CHECK_CUPTI_ERROR(err, cuptifunc) \ + if (err != CUPTI_SUCCESS) { printf ("Error %d for CUPTI API function '%s'\n", err, cuptifunc); return -1; } + + +// ////////////////////////////////////////////////////////////////////////////// +// Simple reduction kernel. +// Refer to the 'reduction' CUDA SDK sample describing +// reduction optimization strategies +// ////////////////////////////////////////////////////////////////////////////// +__global__ static void reduceKernel( float *d_Result, float *d_Input, int N ) +{ + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + const int threadN = gridDim.x * blockDim.x; + float sum = 0; + + for( int pos = tid; pos < N; pos += threadN ) + sum += d_Input[pos]; + + d_Result[tid] = sum; +} + +// ////////////////////////////////////////////////////////////////////////////// +// Program main +// ////////////////////////////////////////////////////////////////////////////// +int main( int argc, char **argv ) +{ + // Solver config + TGPUplan plan[MAX_GPU_COUNT]; + // GPU reduction results + float h_SumGPU[MAX_GPU_COUNT]; + float sumGPU; + double sumCPU, diff; + int i, j, gpuBase, GPU_N; + + const int BLOCK_N = 32; + const int THREAD_N = 256; + const int ACCUM_N = BLOCK_N * THREAD_N; + + CUcontext ctx[MAX_GPU_COUNT]; + + printf( "Starting simpleMultiGPU\n" ); + + // Report on the available CUDA devices + int computeCapabilityMajor = 0, computeCapabilityMinor = 0; + int runtimeVersion = 0, driverVersion = 0; + char deviceName[64]; + CUdevice device[MAX_GPU_COUNT]; + CHECK_CUDA_ERROR( cudaGetDeviceCount( &GPU_N ) ); + if( GPU_N > MAX_GPU_COUNT ) GPU_N = MAX_GPU_COUNT; + printf( "CUDA-capable device count: %i\n", GPU_N ); + for ( i=0; i>> ( plan[i].d_Sum, plan[i].d_Data, plan[i].dataN ); + if ( cudaGetLastError() != cudaSuccess ) { printf( "reduceKernel() execution failed (GPU %d).\n", i ); exit(EXIT_FAILURE); } + // Read back GPU results + CHECK_CUDA_ERROR( cudaMemcpyAsync( plan[i].h_Sum_from_device, plan[i].d_Sum, ACCUM_N * sizeof( float ), cudaMemcpyDeviceToHost, plan[i].stream ) ); + CHECK_CU_ERROR( cuCtxPopCurrent(&(ctx[i])), "cuCtxPopCurrent" ); + } + + // Process GPU results + printf( "Process GPU results on %d GPUs...\n", GPU_N ); + for( i = 0; i < GPU_N; i++ ) { + float sum; + // Set device + CHECK_CUDA_ERROR( cudaSetDevice( i ) ); + CHECK_CU_ERROR(cuCtxPushCurrent(ctx[i]), "cuCtxPushCurrent"); + // Wait for all operations to finish + cudaStreamSynchronize( plan[i].stream ); + // Finalize GPU reduction for current subvector + sum = 0; + for( j = 0; j < ACCUM_N; j++ ) { + sum += plan[i].h_Sum_from_device[j]; + } + *( plan[i].h_Sum ) = ( float ) sum; + CHECK_CU_ERROR( cuCtxPopCurrent(&(ctx[i])), "cuCtxPopCurrent" ); + } + double gpuTime = GetTimer(); + + +#ifdef CUPTI_ONLY + size_t size = 1024; + size_t sizeBytes = size*sizeof(uint64_t); + uint64_t buffer[size]; + uint64_t tmp[size]; for (int jj=0; jj<1024; jj++) tmp[jj]=0; + for ( i=0; i %s \n", values[i], EventName[i] ); + + // retval = PAPI_read( EventSet, values ); + // if( retval != PAPI_OK ) fprintf( stderr, "PAPI_read failed\n" ); + // for( i = 0; i < eventCount; i++ ) + // printf( "PAPI counterValue %12lld \t\t --> %s \n", values[i], EventName[i] ); + + retval = PAPI_stop( EventSet, values ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" ); + for( i = 0; i < eventCount; i++ ) + printf( "PAPI counterValue %12lld \t\t --> %s \n", values[i], EventName[i] ); + + retval = PAPI_cleanup_eventset( EventSet ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_cleanup_eventset failed\n" ); + retval = PAPI_destroy_eventset( &EventSet ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_destroy_eventset failed\n" ); + PAPI_shutdown(); +#endif + + for( i = 0; i < GPU_N; i++ ) { + CHECK_CUDA_ERROR( cudaFreeHost( plan[i].h_Sum_from_device ) ); + CHECK_CUDA_ERROR( cudaFree( plan[i].d_Sum ) ); + CHECK_CUDA_ERROR( cudaFree( plan[i].d_Data ) ); + // Shut down this GPU + CHECK_CUDA_ERROR( cudaStreamDestroy( plan[i].stream ) ); + } + sumGPU = 0; + for( i = 0; i < GPU_N; i++ ) { + sumGPU += h_SumGPU[i]; + } + printf( " GPU Processing time: %f (ms)\n", gpuTime ); + + // Compute on Host CPU + printf( "Computing the same result with Host CPU...\n" ); + StartTimer(); + sumCPU = 0; + for( i = 0; i < GPU_N; i++ ) { + for( j = 0; j < plan[i].dataN; j++ ) { + sumCPU += plan[i].h_Data[j]; + } + } + double cpuTime = GetTimer(); + printf( " CPU Processing time: %f (ms)\n", cpuTime ); + + // Compare GPU and CPU results + printf( "Comparing GPU and Host CPU results...\n" ); + diff = fabs( sumCPU - sumGPU ) / fabs( sumCPU ); + printf( " GPU sum: %f\n CPU sum: %f\n", sumGPU, sumCPU ); + printf( " Relative difference: %E \n", diff ); + + // Cleanup and shutdown + for( i = 0; i < GPU_N; i++ ) { + CHECK_CUDA_ERROR( cudaSetDevice( i ) ); + CHECK_CUDA_ERROR( cudaFreeHost( plan[i].h_Data ) ); + cudaDeviceReset(); + } + + exit( ( diff < 1e-5 ) ? EXIT_SUCCESS : EXIT_FAILURE ); +} + diff --git a/src/components/cuda/tests/simpleMultiGPU.h b/src/components/cuda/tests/simpleMultiGPU.h new file mode 100644 index 0000000..d29aa56 --- /dev/null +++ b/src/components/cuda/tests/simpleMultiGPU.h @@ -0,0 +1,47 @@ +/* + * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +/* + * This application demonstrates how to use the CUDA API to use multiple GPUs. + * + * Note that in order to detect multiple GPUs in your system you have to disable + * SLI in the nvidia control panel. Otherwise only one GPU is visible to the + * application. On the other side, you can still extend your desktop to screens + * attached to both GPUs. + */ + +#ifndef SIMPLEMULTIGPU_H +#define SIMPLEMULTIGPU_H + +typedef struct +{ + //Host-side input data + int dataN; + float *h_Data; + + //Partial sum for this GPU + float *h_Sum; + + //Device buffers + float *d_Data,*d_Sum; + + //Reduction copied back from GPU + float *h_Sum_from_device; + + //Stream for asynchronous command execution + cudaStream_t stream; + +} TGPUplan; + +extern "C" +void launch_reduceKernel(float *d_Result, float *d_Input, int N, int BLOCK_N, int THREAD_N, cudaStream_t &s); + +#endif diff --git a/src/components/cuda/tests/timer.h b/src/components/cuda/tests/timer.h new file mode 100644 index 0000000..d856eb1 --- /dev/null +++ b/src/components/cuda/tests/timer.h @@ -0,0 +1,64 @@ +/** + * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +#ifndef TIMER_H +#define TIMER_H + +#include + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#define WIN32_LEAN_AND_MEAN +#include +#else +#include +#endif + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +double PCFreq = 0.0; +__int64 timerStart = 0; +#else +struct timeval timerStart; +#endif + +void StartTimer() +{ +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + LARGE_INTEGER li; + + if (!QueryPerformanceFrequency(&li)) + { + printf("QueryPerformanceFrequency failed!\n"); + } + + PCFreq = (double)li.QuadPart/1000.0; + QueryPerformanceCounter(&li); + timerStart = li.QuadPart; +#else + gettimeofday(&timerStart, NULL); +#endif +} + +// time elapsed in ms +double GetTimer() +{ +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + LARGE_INTEGER li; + QueryPerformanceCounter(&li); + return (double)(li.QuadPart-timerStart)/PCFreq; +#else + struct timeval timerStop, timerElapsed; + gettimeofday(&timerStop, NULL); + timersub(&timerStop, &timerStart, &timerElapsed); + return timerElapsed.tv_sec*1000.0+timerElapsed.tv_usec/1000.0; +#endif +} +#endif // TIMER_H + diff --git a/src/components/emon/README b/src/components/emon/README new file mode 100644 index 0000000..fe6be65 --- /dev/null +++ b/src/components/emon/README @@ -0,0 +1,23 @@ +/** +* @file: README +* @author: James Ralph +* ralph@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: EMON +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information +A component to provide access to Evniromental MONitoring power data on BG/Q systems. + +emon/ + + % ./configure --prefix=< your_choice > \ + --with-OS=bgq \ + --with-bgpm_installdir=/bgsys/drivers/ppcfloor \ + CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gcc \ + F77=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc64-bgq-linux-gfortran \ + --with-components="bgpm/L2unit bgpm/CNKunit bgpm/IOunit bgpm/NWunit emon" + +*/ diff --git a/src/components/emon/Rules.emon b/src/components/emon/Rules.emon new file mode 100644 index 0000000..dd1b0af --- /dev/null +++ b/src/components/emon/Rules.emon @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/emon/linux-emon.c +COMPOBJS += linux-emon.o + +linux-emon.o: components/emon/linux-emon.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/emon/linux-emon.c -o linux-emon.o -I/bgsys/drivers/ppcfloor diff --git a/src/components/emon/linux-emon.c b/src/components/emon/linux-emon.c new file mode 100644 index 0000000..d600e0b --- /dev/null +++ b/src/components/emon/linux-emon.c @@ -0,0 +1,641 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-emon.c + * @author Heike Jagode + * jagode@eecs.utk.edu + * BGPM / emon component + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware power data for BG/Q through the EMON interface. + */ + +#include +#include +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" + +#define EMON_DEFINE_GLOBALS +#include +#include // the emon library header file (no linking required) + +#define EMON_MAX_COUNTERS 8 +#define EMON_TOTAL_EVENTS 8 + +#ifndef DEBUG +#define EMONDBG( fmt, args...) do {} while(0) +#else +#define EMONDBG( fmt, args... ) do { printf("%s:%d\t"fmt, __func__, __LINE__, ##args); } while(0) +#endif + +/* Stores private information for each event */ +typedef struct EMON_register +{ + unsigned int selector; + /* Signifies which counter slot is being used */ + /* Indexed from 1 as 0 has a special meaning */ +} EMON_register_t; + +/** This structure is used to build the table of events */ +/* The contents of this structure will vary based on */ +/* your component, however having name and description */ +/* fields are probably useful. */ +typedef struct EMON_native_event_entry +{ + EMON_register_t resources; /**< Per counter resources */ + char *name; /**< Name of the counter */ + char *description; /**< Description of the counter */ + int return_type; +} EMON_native_event_entry_t; + + +/* Used when doing register allocation */ +typedef struct EMON_reg_alloc +{ + EMON_register_t ra_bits; +} EMON_reg_alloc_t; + +typedef struct EMON_overflow +{ + int threshold; + int EventIndex; +} EMON_overflow_t; + +/* Holds control flags */ +typedef struct EMON_control_state +{ + int count; + long long counters[EMON_MAX_COUNTERS]; + int being_measured[EMON_MAX_COUNTERS]; + long long last_update; +} EMON_control_state_t; + +/* Holds per-thread information */ +typedef struct EMON_context +{ + EMON_control_state_t state; +} EMON_context_t; + +/* Declare our vector in advance */ +papi_vector_t _emon2_vector; + +static void _check_EMON_error( char* emon2func, int err ) +{ + ( void ) emon2func; + if ( err < 0 ) { + printf( "Error: EMON API function '%s' returned %d.\n", + emon2func, err ); + } +} + + +/** This table contains the native events + * So with the EMON interface, we get every domain at a time. + */ +static EMON_native_event_entry_t EMON_native_table[] = +{ + { + .name = "DOMAIN1", + .description = "Chip core", + .resources.selector = 1, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN2", + .description = "Chip Memory Interface and Dramm", + .resources.selector = 2, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN3", + .description = "Optics", + .resources.selector = 3, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN4", + .description = "Optics + PCIExpress", + .resources.selector = 4, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN6", + .description = "HSS Network and Link Chip", + .resources.selector = 5, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN8", + .description = "Link Chip Core", + .resources.selector = 6, + .return_type = PAPI_DATATYPE_FP64, + }, + { + .name = "DOMAIN7", + .description = "Chip SRAM", + .resources.selector = 7, + .return_type = PAPI_DATATYPE_FP64, + }, + { .name="EMON_DOMAIN_ALL", + .description = "Measures power on all domains.", + .resources.selector = 8, + .return_type = PAPI_DATATYPE_FP64, + }, +}; + + + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +EMON_init_thread( hwd_context_t * ctx ) +{ + EMONDBG( "EMON_init_thread\n" ); + + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +EMON_init_component( int cidx ) +{ + int ret = 0; + _emon2_vector.cmp_info.CmpIdx = cidx; + EMONDBG( "EMON_init_component cidx = %d\n", cidx ); + /* Setup connection with the fpga: + * NOTE: any other threads attempting to call into the EMON API + * will be turned away. */ + ret = EMON_SetupPowerMeasurement(); + _check_EMON_error("EMON_SetupPowerMeasurement", ret ); + + _emon2_vector.cmp_info.num_native_events = EMON_TOTAL_EVENTS; + + _emon2_vector.cmp_info.num_cntrs = EMON_TOTAL_EVENTS; + _emon2_vector.cmp_info.num_mpx_cntrs = EMON_TOTAL_EVENTS; + + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +EMON_init_control_state( hwd_control_state_t * ptr ) +{ + EMONDBG( "EMON_init_control_state\n" ); + + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; + memset( this_state, 0, sizeof ( EMON_control_state_t ) ); + + return PAPI_OK; +} + +static int +_emon_accessor( EMON_control_state_t * this_state ) +{ + union { + long long ll; + double fp; + } return_value; + return_value.fp = -1; + + double volts[14],amps[14]; + double cpu = 0; + double dram = 0; + double link_chip = 0; + double network = 0; + double optics = 0; + double pci = 0; + double sram = 0; + unsigned k_const; + + EMONDBG( "_emon_accessor, enter this_state = %x\n", this_state); + return_value.fp = EMON_GetPower_impl( volts, amps ); + EMONDBG("_emon_accessor, after EMON_GetPower %lf \n", return_value.fp); + if ( -1 == return_value.fp ) { + PAPIERROR("EMON_GetPower() failed!\n"); + return ( PAPI_ESYS ); + } + + this_state->counters[7] = return_value.ll; + +/* We just stuff everything in counters, there is no extra overhead here */ + k_const = domain_info[0].k_const; /* Chip Core Voltage */ + cpu += volts[0] * amps[0] * k_const; + cpu += volts[1] * amps[1] * k_const; + + k_const = domain_info[1].k_const; /* Chip Core Voltage */ + dram += volts[2] * amps[2] * k_const; + dram += volts[3] * amps[3] * k_const; + + k_const = domain_info[2].k_const; /* Chip Core Voltage */ + optics += volts[4] * amps[4] * k_const; + optics += volts[5] * amps[5] * k_const; + + k_const = domain_info[3].k_const; /* Chip Core Voltage */ + pci += volts[6] * amps[6] * k_const; + pci += volts[7] * amps[7] * k_const; + + k_const = domain_info[4].k_const; /* Chip Core Voltage */ + network += volts[8] * amps[8] * k_const; + network += volts[9] * amps[9] * k_const; + + k_const = domain_info[5].k_const; /* Chip Core Voltage */ + link_chip += volts[10] * amps[10] * k_const; + link_chip += volts[11] * amps[11] * k_const; + + k_const = domain_info[6].k_const; /* Chip Core Voltage */ + sram += volts[12] * amps[12] * k_const; + sram += volts[13] * amps[13] * k_const; + + this_state->counters[0] = *(long long*)&cpu; + this_state->counters[1] = *(long long*)&dram; + this_state->counters[2] = *(long long*)&optics; + this_state->counters[3] = *(long long*)&pci; + this_state->counters[4] = *(long long*)&link_chip; + this_state->counters[5] = *(long long*)&network; + this_state->counters[6] = *(long long*)&sram; + + EMONDBG("CPU = %lf\n", *(double*)&this_state->counters[0]); + EMONDBG("DRAM = %lf\n", *(double*)&this_state->counters[1]); + EMONDBG("Optics = %lf\n", *(double*)&this_state->counters[2]); + EMONDBG("PCI = %lf\n", *(double*)&this_state->counters[3]); + EMONDBG("Link Chip = %lf\n", *(double*)&this_state->counters[4]); + EMONDBG("Network = %lf\n", *(double*)&this_state->counters[5]); + EMONDBG("SRAM = %lf\n", *(double*)&this_state->counters[6]); + EMONDBG("TOTAL = %lf\n", *(double*)&this_state->counters[7] ); + + return ( PAPI_OK ); +} + +/* + * + */ +int +EMON_start( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ + EMONDBG( "EMON_start\n" ); + ( void ) ctx; + ( void ) ptr; + /*EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr;*/ + + return ( PAPI_OK ); +} + + +/* + * + */ +int +EMON_stop( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ + EMONDBG( "EMON_stop\n" ); + ( void ) ctx; + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; + + return _emon_accessor( this_state ); +} + + +/* + * + */ +int +EMON_read( hwd_context_t * ctx, hwd_control_state_t * ptr, + long long ** events, int flags ) +{ + EMONDBG( "EMON_read\n" ); + ( void ) ctx; + ( void ) flags; + int ret; + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; + + ret = _emon_accessor( this_state ); + *events = this_state->counters; + return ret; +} + + +/* + * + */ +int +EMON_shutdown_thread( hwd_context_t * ctx ) +{ + EMONDBG( "EMON_shutdown_thread\n" ); + + ( void ) ctx; + return ( PAPI_OK ); +} + +int +EMON_shutdown_component( void ) +{ + EMONDBG( "EMON_shutdown_component\n" ); + + return ( PAPI_OK ); +} + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +EMON_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + EMONDBG( "EMON_ctl\n" ); + + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + +/* + * PAPI Cleanup Eventset + */ +int +EMON_cleanup_eventset( hwd_control_state_t * ctrl ) +{ + EMONDBG( "EMON_cleanup_eventset\n" ); + + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ctrl; + ( void ) this_state; + + return ( PAPI_OK ); +} + + +/* + * + */ +int +EMON_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ + EMONDBG( "EMON_update_control_state: count = %d\n", count ); + + ( void ) ctx; + int index, i; + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; + ( void ) ptr; + + + + // otherwise, add the events to the eventset + for ( i = 0; i < count; i++ ) { + index = ( native[i].ni_event ) ; + + native[i].ni_position = i; + + EMONDBG("EMON_update_control_state: ADD event: i = %d, index = %d\n", i, index ); + } + + // store how many events we added to an EventSet + this_state->count = count; + + return ( PAPI_OK ); +} + + +/* + * As a system wide count, PAPI_DOM_ALL is all we support + */ +int +EMON_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + EMONDBG( "EMON_set_domain\n" ); + ( void ) cntrl; + + if ( PAPI_DOM_ALL != domain ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +EMON_reset( hwd_context_t * ctx, hwd_control_state_t * ptr ) +{ + EMONDBG( "EMON_reset\n" ); + ( void ) ctx; + int retval; + EMON_control_state_t * this_state = ( EMON_control_state_t * ) ptr; + ( void ) this_state; + ( void ) retval; + + memset( this_state->counters, 0x0, sizeof(long long) * EMON_MAX_COUNTERS); + + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +EMON_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + EMONDBG( "EMON_ntv_enum_events, EventCode = %#x\n", *EventCode ); + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return ( PAPI_OK ); + break; + + case PAPI_ENUM_EVENTS: + { + int index = ( *EventCode ); + + if ( index < EMON_TOTAL_EVENTS ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else { + return ( PAPI_ENOEVNT ); + } + + break; + } + default: + return ( PAPI_EINVAL ); + } + return ( PAPI_EINVAL ); +} + +/* + * + */ +int +EMON_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + EMONDBG( "EMON_ntv_code_to_name\n" ); + int index; + ( void ) name; + ( void ) len; + + index = ( EventCode ); + + if ( index >= EMON_TOTAL_EVENTS || index < 0 ) { + return PAPI_ENOEVNT; + } + + strncpy( name, EMON_native_table[index].name, len ); + return ( PAPI_OK ); +} + +/* + * + */ +int +EMON_ntv_name_to_code( const char *name, unsigned int *code ) +{ + int index; + + for ( index = 0; index < EMON_TOTAL_EVENTS; index++ ) { + if ( 0 == strcmp( name, EMON_native_table[index].name ) ) { + *code = index; + } + } + return ( PAPI_OK ); +} + +int +EMON_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + EMONDBG( "EMON_ntv_code_to_descr\n" ); + int index; + ( void ) name; + ( void ) len; + + index = ( EventCode ) ; + + if ( index >= EMON_TOTAL_EVENTS || index < 0 ) { + return PAPI_ENOEVNT; + } + strncpy( name, EMON_native_table[index].description, len ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +EMON_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ + EMONDBG( "EMON_ntv_code_to_bits\n" ); + ( void ) EventCode; + ( void ) bits; + return ( PAPI_OK ); +} + +int +EMON_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ( ( index < 0) || (index >= EMON_TOTAL_EVENTS )) return PAPI_ENOEVNT; + + strncpy( info->symbol, EMON_native_table[index].name, + sizeof(info->symbol)); + + strncpy( info->long_descr, EMON_native_table[index].description, + sizeof(info->symbol)); + + //strncpy( info->units, rapl_native_events[index].units, + //sizeof(info->units)); + + info->data_type = EMON_native_table[index].return_type; + + return PAPI_OK; +} + +/* + * + */ +papi_vector_t _emon_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "EMON", + .short_name = "EMON", + .description = "Blue Gene/Q EMON component", + .num_native_events = EMON_MAX_COUNTERS, + .num_cntrs = EMON_MAX_COUNTERS, + .num_mpx_cntrs = EMON_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + + .kernel_multiplex = 0, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( EMON_context_t ), + .control_state = sizeof ( EMON_control_state_t ), + .reg_value = sizeof ( EMON_register_t ), + .reg_alloc = sizeof ( EMON_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = EMON_init_thread, + .init_component = EMON_init_component, + .init_control_state = EMON_init_control_state, + .start = EMON_start, + .stop = EMON_stop, + .read = EMON_read, + .shutdown_thread = EMON_shutdown_thread, + .shutdown_component = EMON_shutdown_component, + .cleanup_eventset = EMON_cleanup_eventset, + .ctl = EMON_ctl, + + .update_control_state = EMON_update_control_state, + .set_domain = EMON_set_domain, + .reset = EMON_reset, + + .ntv_enum_events = EMON_ntv_enum_events, + .ntv_code_to_name = EMON_ntv_code_to_name, + .ntv_code_to_descr = EMON_ntv_code_to_descr, + .ntv_code_to_bits = EMON_ntv_code_to_bits, + .ntv_code_to_info = EMON_ntv_code_to_info, +}; diff --git a/src/components/example/Rules.example b/src/components/example/Rules.example new file mode 100644 index 0000000..bacfdf6 --- /dev/null +++ b/src/components/example/Rules.example @@ -0,0 +1,5 @@ +COMPSRCS += components/example/example.c +COMPOBJS += example.o + +example.o: components/example/example.c components/example/example.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/example/example.c -o example.o diff --git a/src/components/example/example.c b/src/components/example/example.c new file mode 100644 index 0000000..cfa2a68 --- /dev/null +++ b/src/components/example/example.c @@ -0,0 +1,685 @@ +/** + * @file example.c + * @author Joachim Protze + * joachim.protze@zih.tu-dresden.de + * @author Vince Weaver + * vweaver1@eecs.utk.edu + * + * @ingroup papi_components + * + * @brief + * This is an example component, it demos the component interface + * and implements three example counters. + */ + +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" /* defines papi_malloc(), etc. */ + +/** This driver supports three counters counting at once */ +/* This is artificially low to allow testing of multiplexing */ +#define EXAMPLE_MAX_SIMULTANEOUS_COUNTERS 3 +#define EXAMPLE_MAX_MULTIPLEX_COUNTERS 4 + +/* Declare our vector in advance */ +/* This allows us to modify the component info */ +papi_vector_t _example_vector; + +/** Structure that stores private information for each event */ +typedef struct example_register +{ + unsigned int selector; + /**< Signifies which counter slot is being used */ + /**< Indexed from 1 as 0 has a special meaning */ +} example_register_t; + +/** This structure is used to build the table of events */ +/* The contents of this structure will vary based on */ +/* your component, however having name and description */ +/* fields are probably useful. */ +typedef struct example_native_event_entry +{ + example_register_t resources; /**< Per counter resources */ + char name[PAPI_MAX_STR_LEN]; /**< Name of the counter */ + char description[PAPI_MAX_STR_LEN]; /**< Description of the counter */ + int writable; /**< Whether counter is writable */ + /* any other counter parameters go here */ +} example_native_event_entry_t; + +/** This structure is used when doing register allocation + it possibly is not necessary when there are no + register constraints */ +typedef struct example_reg_alloc +{ + example_register_t ra_bits; +} example_reg_alloc_t; + +/** Holds control flags. + * There's one of these per event-set. + * Use this to hold data specific to the EventSet, either hardware + * counter settings or things like counter start values + */ +typedef struct example_control_state +{ + int num_events; + int domain; + int multiplexed; + int overflow; + int inherit; + int which_counter[EXAMPLE_MAX_SIMULTANEOUS_COUNTERS]; + long long counter[EXAMPLE_MAX_MULTIPLEX_COUNTERS]; /**< Copy of counts, holds results when stopped */ +} example_control_state_t; + +/** Holds per-thread information */ +typedef struct example_context +{ + long long autoinc_value; +} example_context_t; + +/** This table contains the native events */ +static example_native_event_entry_t *example_native_table; + +/** number of events in the table*/ +static int num_events = 0; + + +/*************************************************************************/ +/* Below is the actual "hardware implementation" of our example counters */ +/*************************************************************************/ + +#define EXAMPLE_ZERO_REG 0 +#define EXAMPLE_CONSTANT_REG 1 +#define EXAMPLE_AUTOINC_REG 2 +#define EXAMPLE_GLOBAL_AUTOINC_REG 3 + +#define EXAMPLE_TOTAL_EVENTS 4 + +static long long example_global_autoinc_value = 0; + +/** Code that resets the hardware. */ +static void +example_hardware_reset( example_context_t *ctx ) +{ + /* reset per-thread count */ + ctx->autoinc_value=0; + /* reset global count */ + example_global_autoinc_value = 0; + +} + +/** Code that reads event values. */ +/* You might replace this with code that accesses */ +/* hardware or reads values from the operatings system. */ +static long long +example_hardware_read( int which_one, example_context_t *ctx ) +{ + long long old_value; + + switch ( which_one ) { + case EXAMPLE_ZERO_REG: + return 0; + case EXAMPLE_CONSTANT_REG: + return 42; + case EXAMPLE_AUTOINC_REG: + old_value = ctx->autoinc_value; + ctx->autoinc_value++; + return old_value; + case EXAMPLE_GLOBAL_AUTOINC_REG: + old_value = example_global_autoinc_value; + example_global_autoinc_value++; + return old_value; + default: + fprintf(stderr,"Invalid counter read %#x\n",which_one ); + return -1; + } + + return 0; +} + +/** Code that writes event values. */ +static int +example_hardware_write( int which_one, + example_context_t *ctx, + long long value) +{ + + switch ( which_one ) { + case EXAMPLE_ZERO_REG: + case EXAMPLE_CONSTANT_REG: + return PAPI_OK; /* can't be written */ + case EXAMPLE_AUTOINC_REG: + ctx->autoinc_value=value; + return PAPI_OK; + case EXAMPLE_GLOBAL_AUTOINC_REG: + example_global_autoinc_value=value; + return PAPI_OK; + default: + perror( "Invalid counter write" ); + return -1; + } + + return 0; +} + +static int +detect_example(void) { + + return PAPI_OK; +} + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + + +/** Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_example_init_component( int cidx ) +{ + + SUBDBG( "_example_init_component..." ); + + + /* First, detect that our hardware is available */ + if (detect_example()!=PAPI_OK) { + return PAPI_ECMP; + } + + /* we know in advance how many events we want */ + /* for actual hardware this might have to be determined dynamically */ + num_events = EXAMPLE_TOTAL_EVENTS; + + /* Allocate memory for the our native event table */ + example_native_table = + ( example_native_event_entry_t * ) + papi_calloc( num_events, sizeof(example_native_event_entry_t) ); + if ( example_native_table == NULL ) { + PAPIERROR( "malloc():Could not get memory for events table" ); + return PAPI_ENOMEM; + } + + /* fill in the event table parameters */ + /* for complicated components this will be done dynamically */ + /* or by using an external library */ + + strcpy( example_native_table[0].name, "EXAMPLE_ZERO" ); + strcpy( example_native_table[0].description, + "This is an example counter, that always returns 0" ); + example_native_table[0].writable = 0; + + strcpy( example_native_table[1].name, "EXAMPLE_CONSTANT" ); + strcpy( example_native_table[1].description, + "This is an example counter, that always returns a constant value of 42" ); + example_native_table[1].writable = 0; + + strcpy( example_native_table[2].name, "EXAMPLE_AUTOINC" ); + strcpy( example_native_table[2].description, + "This is an example counter, that reports a per-thread auto-incrementing value" ); + example_native_table[2].writable = 1; + + strcpy( example_native_table[3].name, "EXAMPLE_GLOBAL_AUTOINC" ); + strcpy( example_native_table[3].description, + "This is an example counter, that reports a global auto-incrementing value" ); + example_native_table[3].writable = 1; + + /* Export the total number of events available */ + _example_vector.cmp_info.num_native_events = num_events; + + /* Export the component id */ + _example_vector.cmp_info.CmpIdx = cidx; + + + + return PAPI_OK; +} + +/** This is called whenever a thread is initialized */ +static int +_example_init_thread( hwd_context_t *ctx ) +{ + + example_context_t *example_context = (example_context_t *)ctx; + + example_context->autoinc_value=0; + + SUBDBG( "_example_init_thread %p...", ctx ); + + return PAPI_OK; +} + + + +/** Setup a counter control state. + * In general a control state holds the hardware info for an + * EventSet. + */ + +static int +_example_init_control_state( hwd_control_state_t * ctl ) +{ + SUBDBG( "example_init_control_state... %p\n", ctl ); + + example_control_state_t *example_ctl = ( example_control_state_t * ) ctl; + memset( example_ctl, 0, sizeof ( example_control_state_t ) ); + + return PAPI_OK; +} + + +/** Triggered by eventset operations like add or remove */ +static int +_example_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + + (void) ctx; + int i, index; + + example_control_state_t *example_ctl = ( example_control_state_t * ) ctl; + + SUBDBG( "_example_update_control_state %p %p...", ctl, ctx ); + + /* if no events, return */ + if (count==0) return PAPI_OK; + + for( i = 0; i < count; i++ ) { + index = native[i].ni_event; + + /* Map counter #i to Measure Event "index" */ + example_ctl->which_counter[i]=index; + + /* We have no constraints on event position, so any event */ + /* can be in any slot. */ + native[i].ni_position = i; + } + + example_ctl->num_events=count; + + return PAPI_OK; +} + +/** Triggered by PAPI_start() */ +static int +_example_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void) ctx; + (void) ctl; + + SUBDBG( "example_start %p %p...", ctx, ctl ); + + /* anything that would need to be set at counter start time */ + + /* reset counters? */ + /* For hardware that cannot reset counters, store initial */ + /* counter state to the ctl and subtract it off at read time */ + + /* start the counting ?*/ + + return PAPI_OK; +} + + +/** Triggered by PAPI_stop() */ +static int +_example_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void) ctx; + (void) ctl; + + SUBDBG( "example_stop %p %p...", ctx, ctl ); + + /* anything that would need to be done at counter stop time */ + + + + return PAPI_OK; +} + + +/** Triggered by PAPI_read() */ +/* flags field is never set? */ +static int +_example_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + + (void) flags; + + example_context_t *example_ctx = (example_context_t *) ctx; + example_control_state_t *example_ctl = ( example_control_state_t *) ctl; + + SUBDBG( "example_read... %p %d", ctx, flags ); + + int i; + + /* Read counters into expected slot */ + for(i=0;inum_events;i++) { + example_ctl->counter[i] = + example_hardware_read( example_ctl->which_counter[i], + example_ctx ); + } + + /* return pointer to the values we read */ + *events = example_ctl->counter; + + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +static int +_example_write( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long *events ) +{ + + example_context_t *example_ctx = (example_context_t *) ctx; + example_control_state_t *example_ctl = ( example_control_state_t *) ctl; + + int i; + + SUBDBG( "example_write... %p %p", ctx, ctl ); + + /* Write counters into expected slot */ + for(i=0;inum_events;i++) { + example_hardware_write( example_ctl->which_counter[i], + example_ctx, + events[i] ); + } + + return PAPI_OK; +} + + +/** Triggered by PAPI_reset() but only if the EventSet is currently running */ +/* If the eventset is not currently running, then the saved value in the */ +/* EventSet is set to zero without calling this routine. */ +static int +_example_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + example_context_t *event_ctx = (example_context_t *)ctx; + (void) ctl; + + SUBDBG( "example_reset ctx=%p ctrl=%p...", ctx, ctl ); + + /* Reset the hardware */ + example_hardware_reset( event_ctx ); + + return PAPI_OK; +} + +/** Triggered by PAPI_shutdown() */ +static int +_example_shutdown_component(void) +{ + + SUBDBG( "example_shutdown_component..." ); + + /* Free anything we allocated */ + + papi_free(example_native_table); + + return PAPI_OK; +} + +/** Called at thread shutdown */ +static int +_example_shutdown_thread( hwd_context_t *ctx ) +{ + + (void) ctx; + + SUBDBG( "example_shutdown_thread... %p", ctx ); + + /* Last chance to clean up thread */ + + return PAPI_OK; +} + + + +/** This function sets various options in the component + @param[in] ctx -- hardware context + @param[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, + PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param[in] option -- options to be set + */ +static int +_example_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + + (void) ctx; + (void) code; + (void) option; + + SUBDBG( "example_ctl..." ); + + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +static int +_example_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + (void) cntrl; + + int found = 0; + SUBDBG( "example_set_domain..." ); + + if ( PAPI_DOM_USER & domain ) { + SUBDBG( " PAPI_DOM_USER " ); + found = 1; + } + if ( PAPI_DOM_KERNEL & domain ) { + SUBDBG( " PAPI_DOM_KERNEL " ); + found = 1; + } + if ( PAPI_DOM_OTHER & domain ) { + SUBDBG( " PAPI_DOM_OTHER " ); + found = 1; + } + if ( PAPI_DOM_ALL & domain ) { + SUBDBG( " PAPI_DOM_ALL " ); + found = 1; + } + if ( !found ) + return ( PAPI_EINVAL ); + + return PAPI_OK; +} + + +/**************************************************************/ +/* Naming functions, used to translate event numbers to names */ +/**************************************************************/ + + +/** Enumerate Native Events + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + * If your component has attribute masks then these need to + * be handled here as well. + */ +static int +_example_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + + + switch ( modifier ) { + + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + /* return the first event that we support */ + + *EventCode = 0; + return PAPI_OK; + + /* return EventCode of next available event */ + case PAPI_ENUM_EVENTS: + index = *EventCode; + + /* Make sure we have at least 1 more event after us */ + if ( index < num_events - 1 ) { + + /* This assumes a non-sparse mapping of the events */ + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + +/** Takes a native event code and passes back the name + * @param EventCode is the native event code + * @param name is a pointer for the name to be copied to + * @param len is the size of the name string + */ +static int +_example_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index; + + index = EventCode; + + /* Make sure we are in range */ + if (index >= 0 && index < num_events) { + strncpy( name, example_native_table[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +/** Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +static int +_example_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ) +{ + int index; + index = EventCode; + + /* make sure event is in range */ + if (index >= 0 && index < num_events) { + strncpy( descr, example_native_table[index].description, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +/** Vector that points to entry points for our component */ +papi_vector_t _example_vector = { + .cmp_info = { + /* default component information */ + /* (unspecified values are initialized to 0) */ + /* we explicitly set them to zero in this example */ + /* to show what settings are available */ + + .name = "example", + .short_name = "example", + .description = "A simple example component", + .version = "1.15", + .support_version = "n/a", + .kernel_version = "n/a", + .num_cntrs = EXAMPLE_MAX_SIMULTANEOUS_COUNTERS, + .num_mpx_cntrs = EXAMPLE_MAX_SIMULTANEOUS_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + /* once per thread */ + .context = sizeof ( example_context_t ), + /* once per eventset */ + .control_state = sizeof ( example_control_state_t ), + /* ?? */ + .reg_value = sizeof ( example_register_t ), + /* ?? */ + .reg_alloc = sizeof ( example_reg_alloc_t ), + }, + + /* function pointers */ + /* by default they are set to NULL */ + + /* Used for general PAPI interactions */ + .start = _example_start, + .stop = _example_stop, + .read = _example_read, + .reset = _example_reset, + .write = _example_write, + .init_component = _example_init_component, + .init_thread = _example_init_thread, + .init_control_state = _example_init_control_state, + .update_control_state = _example_update_control_state, + .ctl = _example_ctl, + .shutdown_thread = _example_shutdown_thread, + .shutdown_component = _example_shutdown_component, + .set_domain = _example_set_domain, + /* .cleanup_eventset = NULL, */ + /* called in add_native_events() */ + /* .allocate_registers = NULL, */ + + /* Used for overflow/profiling */ + /* .dispatch_timer = NULL, */ + /* .get_overflow_address = NULL, */ + /* .stop_profiling = NULL, */ + /* .set_overflow = NULL, */ + /* .set_profile = NULL, */ + + /* ??? */ + /* .user = NULL, */ + + /* Name Mapping Functions */ + .ntv_enum_events = _example_ntv_enum_events, + .ntv_code_to_name = _example_ntv_code_to_name, + .ntv_code_to_descr = _example_ntv_code_to_descr, + /* if .ntv_name_to_code not available, PAPI emulates */ + /* it by enumerating all events and looking manually */ + .ntv_name_to_code = NULL, + + + /* These are only used by _papi_hwi_get_native_event_info() */ + /* Which currently only uses the info for printing native */ + /* event info, not for any sort of internal use. */ + /* .ntv_code_to_bits = NULL, */ + +}; + diff --git a/src/components/example/example.h b/src/components/example/example.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/components/example/example.h diff --git a/src/components/example/tests/Makefile b/src/components/example/tests/Makefile new file mode 100644 index 0000000..b2f3062 --- /dev/null +++ b/src/components/example/tests/Makefile @@ -0,0 +1,18 @@ +NAME=example +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = example_basic example_multiple_components + +example_tests: $(TESTS) + +example_basic: example_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o example_basic example_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +example_multiple_components: example_multiple_components.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o example_multiple_components example_multiple_components.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o diff --git a/src/components/example/tests/example_basic.c b/src/components/example/tests/example_basic.c new file mode 100644 index 0000000..3ebc9db --- /dev/null +++ b/src/components/example/tests/example_basic.c @@ -0,0 +1,576 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file example_basic.c + * @author Vince Weaver + * vweaver1@eecs.utk.edu + * test case for Example component + * + * + * @brief + * This file is a very simple example test and Makefile that acat + * as a guideline on how to add tests to components. + * The papi configure and papi Makefile will take care of the compilation + * of the component tests (if all tests are added to a directory named + * 'tests' in the specific component dir). + * See components/README for more details. + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 3 + +int main (int argc, char **argv) +{ + + int retval,i; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + const PAPI_component_info_t *cmpinfo = NULL; + int numcmp,cid,example_cid=-1; + int code,maximum_code=0; + char event_name[PAPI_MAX_STR_LEN]; + PAPI_event_info_t event_info; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf( "Testing example component with PAPI %d.%d.%d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + } + + /* Find our component */ + + numcmp = PAPI_num_components(); + for( cid=0; cidnum_native_events, + cmpinfo->name); + } + if (strstr(cmpinfo->name,"example")) { + /* FOUND! */ + example_cid=cid; + } + } + + + if (example_cid<0) { + test_skip(__FILE__, __LINE__, + "Example component not found\n", 0); + } + + if (!quiet) { + printf("\nFound Example Component at id %d\n",example_cid); + printf("\nListing all events in this component:\n"); + } + + /**************************************************/ + /* Listing all available events in this component */ + /* Along with descriptions */ + /**************************************************/ + code = PAPI_NATIVE_MASK; + + retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, example_cid ); + + while ( retval == PAPI_OK ) { + if (PAPI_event_code_to_name( code, event_name )!=PAPI_OK) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (PAPI_get_event_info( code, &event_info)!=PAPI_OK) { + printf("Error getting info for event %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info()", retval ); + } + + if (!quiet) { + printf("\tEvent %#x: %s -- %s\n", + code,event_name,event_info.long_descr); + } + + maximum_code=code; + + retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, example_cid ); + + } + if (!quiet) printf("\n"); + + /**********************************/ + /* Try accessing an invalid event */ + /**********************************/ + + retval=PAPI_event_code_to_name( maximum_code+10, event_name ); + if (retval!=PAPI_ENOEVNT) { + test_fail( __FILE__, __LINE__, + "Failed to return PAPI_ENOEVNT on invalid event", retval ); + } + + /***********************************/ + /* Test the EXAMPLE_ZERO event */ + /***********************************/ + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_ZERO not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("Testing EXAMPLE_ZERO: %lld\n",values[0]); + + if (values[0]!=0) { + test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /***********************************/ + /* Test the EXAMPLE_CONSTANT event */ + /***********************************/ + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_CONSTANT not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("Testing EXAMPLE_CONSTANT: %lld\n",values[0]); + + if (values[0]!=42) { + test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + + /***********************************/ + /* Test the EXAMPLE_AUTOINC event */ + /***********************************/ + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_AUTOINC", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_AUTOINC not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + if (!quiet) printf("Testing EXAMPLE_AUTOINC: "); + + for(i=0;i<10;i++) { + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("%lld ",values[0]); + + if (values[0]!=i) { + test_fail( __FILE__, __LINE__, "Result wrong!\n", 0); + } + } + + if (!quiet) printf("\n"); + + + /***********************************/ + /* Test multiple reads */ + /***********************************/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + for(i=0;i<10;i++) { + + retval=PAPI_read( EventSet, values); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read failed\n", retval); + } + if (!quiet) printf("%lld ",values[0]); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + if (!quiet) printf("%lld\n",values[0]); + + // if (values[0]!=i) { + // test_fail( __FILE__, __LINE__, "Result wrong!\n", 0); + //} + + /***********************************/ + /* Test PAPI_reset() */ + /***********************************/ + + retval = PAPI_reset( EventSet); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_reset() failed\n",retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_reset( EventSet); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_reset() failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + + if (!quiet) printf("Testing EXAMPLE_AUTOINC after PAPI_reset(): %lld\n", + values[0]); + + if (values[0]!=0) { + test_fail( __FILE__, __LINE__, "Result not zero!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /***********************************/ + /* Test multiple events */ + /***********************************/ + + if (!quiet) printf("Testing Multiple Events: "); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_CONSTANT not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_GLOBAL_AUTOINC", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_GLOBAL_AUTOINC not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_ZERO not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) { + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + if (values[0]!=42) { + test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); + } + + if (values[2]!=0) { + test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + /***********************************/ + /* Test writing to an event */ + /***********************************/ + + if (!quiet) printf("Testing Write\n"); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_CONSTANT not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_GLOBAL_AUTOINC", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_GLOBAL_AUTOINC not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_ZERO", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_ZERO not found\n",retval ); + } + + retval = PAPI_add_event( EventSet, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + retval = PAPI_read ( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_read failed\n",retval ); + } + + if (!quiet) { + printf("Before values: "); + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + values[0]=100; + values[1]=200; + values[2]=300; + + retval = PAPI_write ( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_write failed\n",retval ); + } + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) { + printf("After values: "); + for(i=0;i<3;i++) { + printf("%lld ",values[i]); + } + printf("\n"); + } + + + if (values[0]!=42) { + test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); + } + + if (values[1]!=200) { + test_fail( __FILE__, __LINE__, "Result should be 200!\n", 0); + } + + if (values[2]!=0) { + test_fail( __FILE__, __LINE__, "Result should be 0!\n", 0); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + EventSet=PAPI_NULL; + + + /************/ + /* All Done */ + /************/ + + if (!quiet) printf("\n"); + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/example/tests/example_multiple_components.c b/src/components/example/tests/example_multiple_components.c new file mode 100644 index 0000000..26467fc --- /dev/null +++ b/src/components/example/tests/example_multiple_components.c @@ -0,0 +1,192 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file example_multiple_components.c + * @author Vince Weaver + * vweaver1@eecs.utk.edu + * test if multiple components can be used at once + * + * + * @brief + * This tests to see if the CPU component and Example component + * can be used simultaneously. + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval; + int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; + long long values1[NUM_EVENTS]; + long long values2[NUM_EVENTS]; + const PAPI_component_info_t *cmpinfo = NULL; + int numcmp,cid,example_cid=-1; + int code; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf( "Testing simultaneous component use with PAPI %d.%d.%d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + } + + /* Find our component */ + + numcmp = PAPI_num_components(); + for( cid=0; cidnum_native_events, + cmpinfo->name); + } + if (strstr(cmpinfo->name,"example")) { + /* FOUND! */ + example_cid=cid; + } + } + + + if (example_cid<0) { + test_skip(__FILE__, __LINE__, + "Example component not found\n", 0); + } + + if (!quiet) { + printf("\nFound Example Component at id %d\n",example_cid); + } + + + /* Create an eventset for the Example component */ + + retval = PAPI_create_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("EXAMPLE_CONSTANT", &code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "EXAMPLE_ZERO not found\n",retval ); + } + + retval = PAPI_add_event( EventSet1, code); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_events failed\n", retval ); + } + + + /* Create an eventset for the CPU component */ + + retval = PAPI_create_eventset( &EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_create_eventset() failed\n", retval ); + } + + retval = PAPI_event_name_to_code("PAPI_TOT_CYC", &code); + if ( retval != PAPI_OK ) { + test_skip( __FILE__, __LINE__, + "PAPI_TOT_CYC not available\n",retval ); + } + + retval = PAPI_add_event( EventSet2, code); + if ( retval != PAPI_OK ) { + test_skip( __FILE__, __LINE__, + "NO CPU component found\n", retval ); + } + + if (!quiet) printf("\nStarting EXAMPLE_CONSTANT and PAPI_TOT_CYC at the same time\n"); + + /* Start CPU component event */ + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + /* Start example component */ + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_start failed\n",retval ); + } + + + + + /* Stop example component */ + retval = PAPI_stop( EventSet1, values1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + /* Stop CPU component */ + retval = PAPI_stop( EventSet2, values2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop failed\n", retval); + } + + if (!quiet) printf("Stopping EXAMPLE_CONSTANT and PAPI_TOT_CYC\n\n"); + + + if (!quiet) printf("Results from EXAMPLE_CONSTANT: %lld\n",values1[0]); + + if (values1[0]!=42) { + test_fail( __FILE__, __LINE__, "Result should be 42!\n", 0); + } + + if (!quiet) printf("Results from PAPI_TOT_CYC: %lld\n\n",values2[0]); + + if (values2[0]<1) { + test_fail( __FILE__, __LINE__, "Result should greater than 0\n", 0); + } + + /* Cleanup EventSets */ + retval = PAPI_cleanup_eventset(EventSet1); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + retval = PAPI_cleanup_eventset(EventSet2); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset!\n", retval); + } + + /* Destroy EventSets */ + retval = PAPI_destroy_eventset(&EventSet2); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset!\n", retval); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/host_micpower/Makefile.host_micpower.in b/src/components/host_micpower/Makefile.host_micpower.in new file mode 100644 index 0000000..d392b1c --- /dev/null +++ b/src/components/host_micpower/Makefile.host_micpower.in @@ -0,0 +1,2 @@ +SYSMGMT_CFLAGS = @SYSMGMT_CFLAGS@ +SYSMGMT_LIBS = @SYSMGMT_LIBS@ diff --git a/src/components/host_micpower/README b/src/components/host_micpower/README new file mode 100644 index 0000000..56c5f06 --- /dev/null +++ b/src/components/host_micpower/README @@ -0,0 +1,31 @@ +This is a component that exports power information for Intel Xeon Phi cards (MIC). +The component makes use of the MicAccessAPI distributed with the Intel Manycore Platform Software Stack. +(http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss) +Specifically in the intel-mic-sysmgmt package. + +A configure script allows for non-default locations for the sysmgmt sdk. see ./configure --help + +About the data, PAPI retrieves the data via the MicGetPowerUsage call. +Per the SDK documentation: +MicGetPowerUsage - Retrieve power usage values of Intel® Xeon Phi™ Coprocessor and components. + Data Fields + MicPwrPws  total0 +   Total power utilization by Intel® Xeon Phi™ product codenamed “Knights Corner” device, Averaged over Time Window 0 (uWatts). + MicPwrPws  total1 +   Total power utilization by Intel® Xeon Phi™ product codenamed “Knights Corner” device, Averaged over Time Window 1 (uWatts). + MicPwrPws  inst +   Instantaneous power (uWatts). + MicPwrPws  imax +   Max instantaneous power (uWatts). + MicPwrPws  pcie +   PCI-E connector power (uWatts). + MicPwrPws  c2x3 +   2x3 connector power (uWatts). + MicPwrPws  c2x4 +   2x4 connector power (uWatts). + MicPwrVrr  vccp +   Core rail (uVolts). + MicPwrVrr  vddg +   Uncore rail (uVolts). + MicPwrVrr  vddq +   Memory subsystem rail (uVolts). diff --git a/src/components/host_micpower/Rules.host_micpower b/src/components/host_micpower/Rules.host_micpower new file mode 100644 index 0000000..7bcbd80 --- /dev/null +++ b/src/components/host_micpower/Rules.host_micpower @@ -0,0 +1,20 @@ +include components/host_micpower/Makefile.host_micpower + +COMPSRCS += components/host_micpower/linux-host_micpower.c +COMPOBJS += linux-host_micpower.o + +CFLAGS += -D MICACCESSAPI -D LINUX + +# default install location +MPSSROOT ?= /opt/intel/mic +SYSMGT = $(MPSSROOT)/sysmgmt/sdk +LIBPATH = -L$(SYSMGT)/lib/Linux + +#SCIF_LIBPATH=/usr/lib64 + +#LDFLAGS += $(LIBPATH) $(SCIF_LIBPATH) -lpthread -ldl +LDFLAGS += -pthread $(LDL) $(SYSMGMT_LIBS) +CFLAGS += $(SYSMGMT_CFLAGS) + +linux-host_micpower.o: components/host_micpower/linux-host_micpower.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/host_micpower/linux-host_micpower.c -o linux-host_micpower.o diff --git a/src/components/host_micpower/configure b/src/components/host_micpower/configure new file mode 100755 index 0000000..202a12d --- /dev/null +++ b/src/components/host_micpower/configure @@ -0,0 +1,4637 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.63 for host_micpower version-0.1. +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + +if test "x$CONFIG_SHELL" = x; then + if (eval ":") 2>/dev/null; then + as_have_required=yes +else + as_have_required=no +fi + + if test $as_have_required = yes && (eval ": +(as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=\$LINENO + as_lineno_2=\$LINENO + test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && + test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } +") 2> /dev/null; then + : +else + as_candidate_shells= + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + case $as_dir in + /*) + for as_base in sh bash ksh sh5; do + as_candidate_shells="$as_candidate_shells $as_dir/$as_base" + done;; + esac +done +IFS=$as_save_IFS + + + for as_shell in $as_candidate_shells $SHELL; do + # Try only shells that exist, to save several forks. + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { ("$as_shell") 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +_ASEOF +}; then + CONFIG_SHELL=$as_shell + as_have_required=yes + if { "$as_shell" 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +(as_func_return () { + (exit $1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = "$1" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test $exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } + +_ASEOF +}; then + break +fi + +fi + + done + + if test "x$CONFIG_SHELL" != x; then + for as_var in BASH_ENV ENV + do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + done + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} +fi + + + if test $as_have_required = no; then + echo This script requires a shell more modern than all the + echo shells that I found on your system. Please install a + echo modern shell, or manually run the script under such a + echo shell if you do have one. + { (exit 1); exit 1; } +fi + + +fi + +fi + + + +(eval "as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0") || { + echo No shell found that supports shell functions. + echo Please tell bug-autoconf@gnu.org about your system, + echo including any error possibly output before this message. + echo This can help us improve future autoconf versions. + echo Configuration will now proceed without shell functions. +} + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + + +exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Identity of this package. +PACKAGE_NAME='host_micpower' +PACKAGE_TARNAME='host_micpower' +PACKAGE_VERSION='version-0.1' +PACKAGE_STRING='host_micpower version-0.1' +PACKAGE_BUGREPORT='' + +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +EGREP +GREP +CPP +SYSMGMT_LIBS +SYSMGMT_CFLAGS +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_sysmgmt_include_path +with_sysmgmt_lib_path +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { $as_echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { $as_echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2 + { (exit 1); exit 1; }; } ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; } +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + { $as_echo "$as_me: error: working directory cannot be determined" >&2 + { (exit 1); exit 1; }; } +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + { $as_echo "$as_me: error: pwd does not report name of working directory" >&2 + { (exit 1); exit 1; }; } + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2 + { (exit 1); exit 1; }; } + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures host_micpower version-0.1 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/host_micpower] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of host_micpower version-0.1:";; + esac + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-sysmgmt-include-path + location of the MPSS sysmgmt api headers, defaults + to /opt/intel/mic/sysmgmt/sdk/include + --with-sysmgmt-lib-path location of the MPSS sysmgmt libraries, feed to the + runtime linker; defaults to + /opt/intel/mic/sysmgmt/sdk/lib/Linux + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +host_micpower configure version-0.1 +generated by GNU Autoconf 2.63 + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by host_micpower $as_me version-0.1, which was +generated by GNU Autoconf 2.63. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" +done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args '$ac_arg'" + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------------- ## +## File substitutions. ## +## ------------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + ac_site_file1=$CONFIG_SITE +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test -r "$ac_site_file"; then + { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +$as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + + + + + + + + + + + + + + + + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } + +# Provide some information about the compiler. +$as_echo "$as_me:$LINENO: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { (ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi + +{ $as_echo "$as_me:$LINENO: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +if test -z "$ac_file"; then + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; }; } +fi + +ac_exeext=$ac_cv_exeext + +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } + fi + fi +fi +{ $as_echo "$as_me:$LINENO: result: yes" >&5 +$as_echo "yes" >&6; } + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +{ $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +{ $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +rm -f conftest$ac_cv_exeext +{ $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +{ $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if test "${ac_cv_objext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_c89=$ac_arg +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:$LINENO: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:$LINENO: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +# Check whether --with-sysmgmt-include-path was given. +if test "${with_sysmgmt_include_path+set}" = set; then + withval=$with_sysmgmt_include_path; SYSMGMT_CFLAGS="-I$withval" +else + SYSMGMT_CFLAGS="-I/opt/intel/mic/sysmgmt/sdk/include" +fi + + + + +# Check whether --with-sysmgmt-lib-path was given. +if test "${with_sysmgmt_lib_path+set}" = set; then + withval=$with_sysmgmt_lib_path; SYSMGMT_LIBS="-Wl,-rpath,$withval" +else + SYSMGMT_LIBS="-Wl,-rpath,/opt/intel/mic/sysmgmt/sdk/lib/Linux" +fi + + + +#AC_ARG_WITH([scif-lib-path], +# [AS_HELP_STRING([--with-scif-lib-path],[location of the SCIF library, needed by libMicAccessApi.so]), +# [], +# []) + +OLD_CPPFLAGS=$CPPFLAGS +CPPFLAGS="-DMICACCESSAPI -DLINUX $SYSMGMT_CFLAGS" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:$LINENO: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if test "${ac_cv_path_GREP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done +done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +$as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:$LINENO: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if test "${ac_cv_path_EGREP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done +done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +$as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if test "${ac_cv_header_stdc+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_header_stdc=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_header_stdc=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +{ $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 +$as_echo_n "checking for $ac_header... " >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + eval "$as_ac_Header=yes" +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_Header=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +ac_res=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +as_val=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + if test "x$as_val" = x""yes; then + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + +for ac_header in MicAccessApi.h +do +as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + { $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 +$as_echo_n "checking for $ac_header... " >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + $as_echo_n "(cached) " >&6 +fi +ac_res=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:$LINENO: checking $ac_header usability" >&5 +$as_echo_n "checking $ac_header usability... " >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_header_compiler=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:$LINENO: checking $ac_header presence" >&5 +$as_echo_n "checking $ac_header presence... " >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + ac_header_preproc=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +$as_echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +$as_echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + + ;; +esac +{ $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 +$as_echo_n "checking for $ac_header... " >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + $as_echo_n "(cached) " >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +ac_res=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + +fi +as_val=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + if test "x$as_val" = x""yes; then + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +else + { { $as_echo "$as_me:$LINENO: error: Couldn't find MicAccessApi.h...try installing MPSS from \ +http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss" >&5 +$as_echo "$as_me: error: Couldn't find MicAccessApi.h...try installing MPSS from \ +http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss" >&2;} + { (exit 1); exit 1; }; } +fi + +done + +CPPFLAGS=$OLD_CPPFLAGS + +ac_config_files="$ac_config_files Makefile.host_micpower" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + test "x$cache_file" != "x/dev/null" && + { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + cat confcache >$cache_file + else + { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 + +# Save the log message, to keep $[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by host_micpower $as_me version-0.1, which was +generated by GNU Autoconf 2.63. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTION]... [FILE]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_version="\\ +host_micpower config.status version-0.1 +configured by $0, generated by GNU Autoconf 2.63, + with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2008 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + CONFIG_FILES="$CONFIG_FILES '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { $as_echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "Makefile.host_micpower") CONFIG_FILES="$CONFIG_FILES Makefile.host_micpower" ;; + + *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= + trap 'exit_status=$? + { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status +' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || +{ + $as_echo "$as_me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=' ' +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } +ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\).*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\).*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \ + || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5 +$as_echo "$as_me: error: could not setup config files machinery" >&2;} + { (exit 1); exit 1; }; } +_ACEOF + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/ +s/:*\${srcdir}:*/:/ +s/:*@srcdir@:*/:/ +s/^\([^=]*=[ ]*\):*/\1/ +s/:*$// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5 +$as_echo "$as_me: error: invalid tag $ac_tag" >&2;} + { (exit 1); exit 1; }; };; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 +$as_echo "$as_me: error: cannot find input file: $ac_f" >&2;} + { (exit 1); exit 1; }; };; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + ac_file_inputs="$ac_file_inputs '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:$LINENO: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$tmp/stdin" \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { as_dir="$ac_dir" + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 +$as_echo "$as_me: error: cannot create directory $as_dir" >&2;} + { (exit 1); exit 1; }; }; } + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= + +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p +' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&2;} + + rm -f "$tmp/stdin" + case $ac_file in + -) cat "$tmp/out" && rm -f "$tmp/out";; + *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";; + esac \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } + ;; + + + + esac + +done # for ac_tag + + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/components/host_micpower/configure.ac b/src/components/host_micpower/configure.ac new file mode 100644 index 0000000..8794710 --- /dev/null +++ b/src/components/host_micpower/configure.ac @@ -0,0 +1,32 @@ +AC_INIT(host_micpower, version-0.1) + +AC_PROG_CC + +AC_ARG_WITH([sysmgmt-include-path], + [AS_HELP_STRING([--with-sysmgmt-include-path], + [location of the MPSS sysmgmt api headers, defaults to /opt/intel/mic/sysmgmt/sdk/include])], + [SYSMGMT_CFLAGS="-I$withval"], + [SYSMGMT_CFLAGS="-I/opt/intel/mic/sysmgmt/sdk/include"] ) +AC_SUBST([SYSMGMT_CFLAGS]) + +AC_ARG_WITH([sysmgmt-lib-path], + [AS_HELP_STRING([--with-sysmgmt-lib-path], + [location of the MPSS sysmgmt libraries, feed to the runtime linker; \ +defaults to /opt/intel/mic/sysmgmt/sdk/lib/Linux])], + [SYSMGMT_LIBS="-Wl,-rpath,$withval"], + [SYSMGMT_LIBS="-Wl,-rpath,/opt/intel/mic/sysmgmt/sdk/lib/Linux"]) +AC_SUBST([SYSMGMT_LIBS]) + +#AC_ARG_WITH([scif-lib-path], +# [AS_HELP_STRING([--with-scif-lib-path],[location of the SCIF library, needed by libMicAccessApi.so]), +# [], +# []) + +OLD_CPPFLAGS=$CPPFLAGS +CPPFLAGS=["-DMICACCESSAPI -DLINUX $SYSMGMT_CFLAGS"] +AC_CHECK_HEADERS([MicAccessApi.h], [], AC_MSG_ERROR([Couldn't find MicAccessApi.h...try installing MPSS from \ +http://software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss]) ) +CPPFLAGS=$OLD_CPPFLAGS + +AC_CONFIG_FILES([Makefile.host_micpower]) +AC_OUTPUT diff --git a/src/components/host_micpower/linux-host_micpower.c b/src/components/host_micpower/linux-host_micpower.c new file mode 100644 index 0000000..6aef22e --- /dev/null +++ b/src/components/host_micpower/linux-host_micpower.c @@ -0,0 +1,617 @@ +/** linux-host_micpower.c + * @author James Ralph + * ralph@icl.utk.edu + * + * @ingroup papi_components + * + * @brief + * This component wraps the MicAccessAPI to provide hostside + * power information for attached Intel Xeon Phi (MIC) cards. +*/ + +/* From intel examples, see $(mic_dir)/sysmgt/sdk/Examples/Usage */ +#define MAX_DEVICES (32) +#define EVENTS_PER_DEVICE 10 +#include +#include +#include +#include + +#include "MicAccessTypes.h" +#include "MicBasicTypes.h" +#include "MicAccessErrorTypes.h" +#include "MicAccessApi.h" +#include "MicPowerManagerAPI.h" + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); + +/* This is a guess, refine this later */ +#define UPDATEFREQ 500000 + +papi_vector_t _host_micpower_vector; + +typedef struct host_micpower_register { + /** Corresponds to counter slot, indexed from 1, 0 has a special meaning */ + unsigned int selector; +} host_micpower_register_t; + +typedef struct host_micpower_reg_alloc { + host_micpower_register_t ra_bits; +} host_micpower_reg_alloc_t; + +/** Internal structure used to build the table of events */ +typedef struct host_micpower_native_event_entry { + host_micpower_register_t resources; + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + char units[3]; +} host_micpower_native_event_entry_t; + +/** Per-eventset structure used to hold control flags. */ +typedef struct host_micpower_control_state { + int num_events; + int resident[MAX_DEVICES*EVENTS_PER_DEVICE]; + long long counts[MAX_DEVICES*EVENTS_PER_DEVICE]; + long long lastupdate[MAX_DEVICES]; +} host_micpower_control_state_t; + +/** Per-thread data */ +typedef struct host_micpower_context { + host_micpower_control_state_t state; +} host_micpower_context_t; + +/* Global state info */ +static MicDeviceOnSystem adapters[MAX_DEVICES]; +static HANDLE handles[MAX_DEVICES]; +static long long lastupdate[MAX_DEVICES]; +static HANDLE accessHandle = NULL; +static U32 nAdapters = MAX_DEVICES; + +static void* mic_access = NULL; +static void* scif_access = NULL; + +#undef MICACCESS_API +#define MICACCESS_API __attribute__((weak)) +const char *MicGetErrorString(U32); +U32 MICACCESS_API MicCloseAdapter(HANDLE); +U32 MICACCESS_API MicInitAPI(HANDLE *, ETarget, MicDeviceOnSystem *, U32 *); +U32 MICACCESS_API MicCloseAPI(HANDLE *); +U32 MICACCESS_API MicInitAdapter(HANDLE *, MicDeviceOnSystem *); +U32 MICACCESS_API MicGetPowerUsage(HANDLE, MicPwrUsage *); + +const char *(*MicGetErrorStringPtr)(U32); +U32 (*MicCloseAdapterPtr)(HANDLE); +U32 (*MicInitAPIPtr)(HANDLE *, ETarget, MicDeviceOnSystem *, U32 *); +U32 (*MicCloseAPIPtr)(HANDLE *); +U32 (*MicInitAdapterPtr)(HANDLE *, MicDeviceOnSystem *); +U32 (*MicGetPowerUsagePtr)(HANDLE, MicPwrUsage *); +static host_micpower_native_event_entry_t *native_events_table = NULL; + +struct powers { + int total0; + int total1; + int inst; + int imax; + int pcie; + int c2x3; + int c2x4; + int vccp; + int vddg; + int vddq; +}; + +typedef union { + struct powers power; + int array[EVENTS_PER_DEVICE]; +} power_t; + +static power_t cached_values[MAX_DEVICES]; + +static int +loadFunctionPtrs() +{ + /* Attempt to guess if we were statically linked to libc, if so bail */ + if ( _dl_non_dynamic_init != NULL ) { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "The host_micpower component does not support statically linking of libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + /* Need to link in the cuda libraries, if not found disable the component */ + scif_access = dlopen("libscif.so", RTLD_NOW | RTLD_GLOBAL); + if (NULL == scif_access) + { + snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Problem loading the SCIF library: %s\n", dlerror()); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + + mic_access = dlopen("libMicAccessSDK.so", RTLD_NOW | RTLD_GLOBAL); + if (NULL == mic_access) + { + snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Problem loading libMicAccessSDK.so: %s\n", dlerror()); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + + MicGetErrorStringPtr = dlsym(mic_access, "MicGetErrorString"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicGetErrorString not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + MicCloseAdapterPtr = dlsym(mic_access, "MicCloseAdapter"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicCloseAdapter not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + MicInitAPIPtr = dlsym(mic_access, "MicInitAPI"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicInitAPI not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + MicCloseAPIPtr = dlsym(mic_access, "MicCloseAPI"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicCloseAPI not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + MicInitAdapterPtr = dlsym(mic_access, "MicInitAdapter"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicInitAdapter not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + + MicGetPowerUsagePtr = dlsym(mic_access, "MicGetPowerUsage"); + if (dlerror() != NULL) + { + strncpy(_host_micpower_vector.cmp_info.disabled_reason, "MicAccessSDK function MicGetPowerUsage not found.",PAPI_MAX_STR_LEN); + _host_micpower_vector.cmp_info.disabled = 1; + return ( PAPI_ENOSUPP ); + } + + return 0; +} + + +/* ############################################### + * Component Interface code + * ############################################### */ + + +int +_host_micpower_init_component( int cidx ) +{ + U32 ret = MIC_ACCESS_API_ERROR_UNKNOWN; + U32 adapterNum = 0; + U32 throwaway = 1; + + _host_micpower_vector.cmp_info.CmpIdx = cidx; + + if ( loadFunctionPtrs() ) { + goto disable_me; + } + + memset( lastupdate, 0x0, sizeof(lastupdate)); + memset( cached_values, 0x0, sizeof(struct powers)*MAX_DEVICES ); + ret = MicInitAPIPtr( &accessHandle, eTARGET_SCIF_DRIVER, adapters, &nAdapters ); + if ( MIC_ACCESS_API_SUCCESS != ret ) { + snprintf( _host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Failed to init: %s", MicGetErrorStringPtr(ret)); + MicCloseAPIPtr(&accessHandle); + goto disable_me; + } + /* Sanity check on array size */ + if ( nAdapters >= MAX_DEVICES ) { + snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "Too many MIC cards [%d] found, bailing.", nAdapters); + MicCloseAPIPtr(&accessHandle); + goto disable_me; + } + +/* XXX: This code initializes a token for each adapter, in testing this appeared to be required/ + * One has to call MicInitAdapter() before calling into that adapter's entries */ + for (adapterNum=0; adapterNum < nAdapters; adapterNum++) { + ret = MicInitAPIPtr( &handles[adapterNum], eTARGET_SCIF_DRIVER, adapters, &throwaway ); + throwaway = 1; + if (MIC_ACCESS_API_SUCCESS != ret) { + fprintf(stderr, "%d:MicInitAPI carps: %s\n", __LINE__, MicGetErrorStringPtr(ret)); + nAdapters = adapterNum; + for (adapterNum=0; adapterNum < nAdapters; adapterNum++) + MicCloseAdapterPtr( handles[adapterNum] ); + MicCloseAPIPtr( &accessHandle ); + snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, + "Failed to initialize card %d's interface.", nAdapters); + goto disable_me; + } + ret = MicInitAdapterPtr(&handles[adapterNum], &adapters[adapterNum]); + if (MIC_ACCESS_API_SUCCESS != ret) { + fprintf(stderr, "%d:MicInitAdapter carps: %s\n", __LINE__, MicGetErrorStringPtr(ret)); + nAdapters = adapterNum; + for (adapterNum=0; adapterNum < nAdapters; adapterNum++) + MicCloseAdapterPtr( handles[adapterNum] ); + MicCloseAPIPtr( &accessHandle ); + snprintf(_host_micpower_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, + "Failed to initialize card %d's interface.", nAdapters); + goto disable_me; + } + } + + native_events_table = ( host_micpower_native_event_entry_t*)papi_malloc( nAdapters * EVENTS_PER_DEVICE * sizeof(host_micpower_native_event_entry_t)); + if ( NULL == native_events_table ) { + return PAPI_ENOMEM; + } + for (adapterNum=0; adapterNum < nAdapters; adapterNum++) { + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].name, PAPI_MAX_STR_LEN, "mic%d:tot0", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].description, PAPI_MAX_STR_LEN, "Total power utilization, Averaged over Time Window 0 (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE].resources.selector = adapterNum*EVENTS_PER_DEVICE + 1; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].name, PAPI_MAX_STR_LEN, "mic%d:tot1", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].description, PAPI_MAX_STR_LEN, "Total power utilization, Averaged over Time Window 1 (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].resources.selector = adapterNum*EVENTS_PER_DEVICE + 2; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 1].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].name, PAPI_MAX_STR_LEN, "mic%d:pcie", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].description, PAPI_MAX_STR_LEN, "PCI-E connector power (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].resources.selector = adapterNum*EVENTS_PER_DEVICE + 3; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 2].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].name, PAPI_MAX_STR_LEN, "mic%d:inst", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].description, PAPI_MAX_STR_LEN, "Instantaneous power (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].resources.selector = adapterNum*EVENTS_PER_DEVICE + 4; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 3].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].name, PAPI_MAX_STR_LEN, "mic%d:imax", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].description, PAPI_MAX_STR_LEN, "Max instantaneous power (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].resources.selector = adapterNum*EVENTS_PER_DEVICE + 5; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 4].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].name, PAPI_MAX_STR_LEN, "mic%d:c2x3", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].description, PAPI_MAX_STR_LEN, "2x3 connector power (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].resources.selector = adapterNum*EVENTS_PER_DEVICE + 6; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 5].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].name, PAPI_MAX_STR_LEN, "mic%d:c2x4", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].description, PAPI_MAX_STR_LEN, "2x4 connector power (uWatts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].resources.selector = adapterNum*EVENTS_PER_DEVICE + 7; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 6].units, PAPI_MIN_STR_LEN, "uW"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].name, PAPI_MAX_STR_LEN, "mic%d:vccp", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].description, PAPI_MAX_STR_LEN, "Core rail (uVolts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].resources.selector = adapterNum*EVENTS_PER_DEVICE + 8; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 7].units, PAPI_MIN_STR_LEN, "uV"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].name, PAPI_MAX_STR_LEN, "mic%d:vddg", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].description, PAPI_MAX_STR_LEN, "Uncore rail (uVolts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].resources.selector = adapterNum*EVENTS_PER_DEVICE + 9; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 8].units, PAPI_MIN_STR_LEN, "uV"); + + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].name, PAPI_MAX_STR_LEN, "mic%d:vddq", adapterNum); + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].description, PAPI_MAX_STR_LEN, "Memory subsystem rail (uVolts)"); + native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].resources.selector = adapterNum*EVENTS_PER_DEVICE + 10; + snprintf(native_events_table[adapterNum*EVENTS_PER_DEVICE + 9].units, PAPI_MIN_STR_LEN, "uV"); + } + + _host_micpower_vector.cmp_info.num_cntrs = EVENTS_PER_DEVICE*nAdapters; + _host_micpower_vector.cmp_info.num_mpx_cntrs = EVENTS_PER_DEVICE*nAdapters; + + _host_micpower_vector.cmp_info.num_native_events = EVENTS_PER_DEVICE*nAdapters; + + return PAPI_OK; + +disable_me: + _host_micpower_vector.cmp_info.num_cntrs = 0; + _host_micpower_vector.cmp_info.num_mpx_cntrs = 0; + _host_micpower_vector.cmp_info.num_native_events = 0; + _host_micpower_vector.cmp_info.disabled = 1; + + nAdapters = 0; + return PAPI_ENOSUPP; +} + +int _host_micpower_init_thread( hwd_context_t *ctx) { + (void)ctx; + return PAPI_OK; +} + +int +_host_micpower_shutdown_component( void ) { + U32 i = 0; + for( i=0; iresident[i] = 0; + + for (i=0; i < count; i++) { + index = info[i].ni_event&PAPI_NATIVE_AND_MASK; + info[i].ni_position=native_events_table[index].resources.selector-1; + state->resident[index] = 1; + } + state->num_events = count; + + return PAPI_OK; +} + +int +_host_micpower_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + return PAPI_OK; +} + +static int +read_power( struct powers *pwr, int which_one ) +{ + MicPwrUsage power; + U32 ret = MIC_ACCESS_API_ERROR_UNKNOWN; + + if ( which_one < 0 || which_one > (int)nAdapters ) + return PAPI_ENOEVNT; + + + ret = MicGetPowerUsagePtr(handles[which_one], &power); + if (MIC_ACCESS_API_SUCCESS != ret) { + fprintf(stderr,"Oops MicGetPowerUsage failed: %s\n", + MicGetErrorStringPtr(ret)); + return PAPI_ECMP; + } + + pwr->total0 = power.total0.prr; + pwr->total1 = power.total1.prr; + pwr->inst = power.inst.prr; + pwr->imax = power.imax.prr; + pwr->pcie = power.pcie.prr; + pwr->c2x3 = power.c2x3.prr; + pwr->c2x4 = power.c2x4.prr; + pwr->vccp = power.vccp.pwr; + pwr->vddg = power.vddg.pwr; + pwr->vddq = power.vddq.pwr; + + return PAPI_OK; +} + +int +_host_micpower_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags) +{ + (void)flags; + (void)events; + (void)ctx; + unsigned int i,j; + int needs_update = 0; + host_micpower_control_state_t* control = (host_micpower_control_state_t*)ctl; + long long now = PAPI_get_real_usec(); + + for( i=0; iresident[EVENTS_PER_DEVICE*i+j]) { + needs_update = 1; + break; + } + } + + if ( needs_update ) { + /* Do the global update */ + if ( now >= lastupdate[i] + UPDATEFREQ) { + read_power( &cached_values[i].power, i ); + lastupdate[i] = now; + } + /* update from cached values */ + if ( control->lastupdate[i] < lastupdate[i]) { + control->lastupdate[i] = lastupdate[i]; + } + for (j=0; jresident[EVENTS_PER_DEVICE*i+j] ) { + control->counts[EVENTS_PER_DEVICE*i+j] = (long long)cached_values[i].array[j]; + } + } + } + } + + *events = control->counts; + return PAPI_OK; +} + +int +_host_micpower_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void)ctx; + int needs_update = 0; + unsigned int i,j; + host_micpower_control_state_t* control = (host_micpower_control_state_t*)ctl; + long long now = PAPI_get_real_usec(); + + for( i=0; iresident[EVENTS_PER_DEVICE*i+j]) { + needs_update = 1; + break; + } + } + + if ( needs_update ) { + /* Do the global update */ + if ( now >= lastupdate[i] + UPDATEFREQ) { + read_power( &cached_values[i].power, i ); + lastupdate[i] = now; + } + /* update from cached values */ + if ( control->lastupdate[i] < lastupdate[i]) { + control->lastupdate[i] = lastupdate[i]; + } + for (j=0; jresident[EVENTS_PER_DEVICE*i+j] ) { + control->counts[EVENTS_PER_DEVICE*i+j] = (long long)cached_values[i].array[j]; + } + } + } + } + return PAPI_OK; + +} + +int _host_micpower_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + switch (modifier) { + case PAPI_ENUM_FIRST: + if (0 == _host_micpower_vector.cmp_info.num_cntrs) + return PAPI_ENOEVNT; + *EventCode = 0; + return PAPI_OK; + case PAPI_ENUM_EVENTS: + index = *EventCode; + if ( index < _host_micpower_vector.cmp_info.num_cntrs - 1) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +int +_host_micpower_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; + if ( code < _host_micpower_vector.cmp_info.num_cntrs ) { + strncpy( name, native_events_table[code].name, len); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +int +_host_micpower_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; + if ( code < _host_micpower_vector.cmp_info.num_cntrs ) { + strncpy( name, native_events_table[code].description, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +int +_host_micpower_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info) +{ + unsigned int code = EventCode & PAPI_NATIVE_AND_MASK; + if ( code >= _host_micpower_vector.cmp_info.num_cntrs) + return PAPI_ENOEVNT; + strncpy( info->symbol, native_events_table[code].name, sizeof(info->symbol) ); + strncpy( info->long_descr, native_events_table[code].description, sizeof(info->long_descr) ); + strncpy( info->units, native_events_table[code].units, sizeof(info->units) ); + return PAPI_OK; +} + +int +_host_micpower_ctl( hwd_context_t* ctx, int code, _papi_int_option_t *option) +{ + (void)ctx; + (void)code; + (void)option; + return PAPI_OK; +} + +int +_host_micpower_set_domain( hwd_control_state_t* ctl, int domain) +{ + (void)ctl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + return PAPI_OK; +} + +papi_vector_t _host_micpower_vector = { + .cmp_info = { + .name = "host_micpower", + .short_name = "host_micpower", + .description = "A host-side component to read power usage on MIC guest cards.", + .version = "0.1", + .support_version = "n/a", + .kernel_version = "n/a", + .num_cntrs = 0, + .num_mpx_cntrs = 0, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + }, + + .size = { + .context = sizeof(host_micpower_context_t), + .control_state = sizeof(host_micpower_control_state_t), + .reg_value = sizeof(host_micpower_register_t), + .reg_alloc = sizeof(host_micpower_reg_alloc_t), + }, + + .start = _host_micpower_start, + .stop = _host_micpower_start, + .read = _host_micpower_read, + .reset = NULL, + .write = NULL, + .init_component = _host_micpower_init_component, + .init_thread = _host_micpower_init_thread, + .init_control_state = _host_micpower_init_control_state, + .update_control_state = _host_micpower_update_control_state, + .ctl = _host_micpower_ctl, + .shutdown_thread = _host_micpower_shutdown_thread, + .shutdown_component = _host_micpower_shutdown_component, + .set_domain = _host_micpower_set_domain, + + .ntv_enum_events = _host_micpower_ntv_enum_events, + .ntv_code_to_name = _host_micpower_ntv_code_to_name, + .ntv_code_to_descr = _host_micpower_ntv_code_to_descr, + .ntv_code_to_info = _host_micpower_ntv_code_to_info, + +}; diff --git a/src/components/host_micpower/tests/Makefile b/src/components/host_micpower/tests/Makefile new file mode 100644 index 0000000..c532b80 --- /dev/null +++ b/src/components/host_micpower/tests/Makefile @@ -0,0 +1,21 @@ +NAME=host_micpower +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = host_micpower_basic +host_micpower_tests : $(TESTS) + +micpower_tests: $(TESTS) + +host_micpower_basic: host_micpower_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o host_micpower_basic host_micpower_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/host_micpower/tests/host_micpower_basic.c b/src/components/host_micpower/tests/host_micpower_basic.c new file mode 100644 index 0000000..71dfd1d --- /dev/null +++ b/src/components/host_micpower/tests/host_micpower_basic.c @@ -0,0 +1,127 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Vince Weaver + * + * test case for micpower component + * Based on coretemp test code by Vince Weaver + * + * + * @brief + * Tests basic component functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname); + } + + if ( 0 != strncmp(cmpinfo->name,"host_micpower",13)) { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) printf("%#x %s ",code,event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()",retval); + } + + if (!TESTS_QUIET) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + + test_skip(__FILE__,__LINE__,"No events from host_micpower found",0); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/host_micpower/utils/Makefile b/src/components/host_micpower/utils/Makefile new file mode 100644 index 0000000..96cc386 --- /dev/null +++ b/src/components/host_micpower/utils/Makefile @@ -0,0 +1,16 @@ +CC = gcc +CFLAGS = -O2 -Wall +LFLAGS = +PAPI_INCLUDE = ../../.. +PAPI_LIBRARY = ../../../libpapi.a + +all: host_micpower_plot + +host_micpower_plot: host_micpower_plot.o + $(CC) $(LFLAGS) -o host_micpower_plot host_micpower_plot.o $(PAPI_LIBRARY) -ldl -lpthread + +host_micpower_plot.o: host_micpower_plot.c + $(CC) $(CFLAGS) -I$(PAPI_INCLUDE) -c host_micpower_plot.c + +clean: + rm -f *~ *.o host_micpower_plot results.* diff --git a/src/components/host_micpower/utils/README b/src/components/host_micpower/utils/README new file mode 100644 index 0000000..289a328 --- /dev/null +++ b/src/components/host_micpower/utils/README @@ -0,0 +1,22 @@ +This tool can be used to gather Power (and Voltage) measurements on +Intel Xeon Phi (aka Intel MIC) chips using the MicAccessAPI. + +Be sure to configure the PAPI host_micpower component: +$ cd "/src/components/host_micpower" +$ ./configure +as well as PAPI with --with-components: +$ cd "/src" +$ ./configure --with-components=host_micpower + +It works by using PAPI to poll the MIC power stats every 100ms. +It will dump each statistic to different files, which then +can be plotted. + +The measurements (in uW and uV) are dumped every 100ms. +You can adjust the frequency by changing the source code. + +You can then take those files and put them into your favorite plotting +program. You might want to edit the source to remove the extra +commentary from the data, the plotting program I use ignores things +surrounded by (* brackets. + diff --git a/src/components/host_micpower/utils/host_micpower_plot.c b/src/components/host_micpower/utils/host_micpower_plot.c new file mode 100644 index 0000000..6bf6f1a --- /dev/null +++ b/src/components/host_micpower/utils/host_micpower_plot.c @@ -0,0 +1,185 @@ +/** + * @author Vince Weaver, Heike McCraw + */ + +#include +#include +#include +#include + +#include "papi.h" + +#define MAX_DEVICES (32) +#define EVENTS_PER_DEVICE 10 + +#define MAX_EVENTS (MAX_DEVICES*EVENTS_PER_DEVICE) + +char events[MAX_EVENTS][BUFSIZ]; +char filenames[MAX_EVENTS][BUFSIZ]; + +FILE *fff[MAX_EVENTS]; + +static int num_events=0; + +int main (int argc, char **argv) +{ + + int retval,cid,host_micpower_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_EVENTS]; + int i,code,enum_retval; + const PAPI_component_info_t *cmpinfo = NULL; + long long start_time,before_time,after_time; + double elapsed_time,total_time; + double energy = 0.0; + char event_name[BUFSIZ]; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + fprintf(stderr,"PAPI_library_init failed\n"); + exit(1); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"host_micpower")) { + host_micpower_cid=cid; + printf("Found host_micpower component at cid %d\n", host_micpower_cid); + + if (cmpinfo->disabled) { + fprintf(stderr,"No host_micpower events found: %s\n", + cmpinfo->disabled_reason); + exit(1); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + fprintf(stderr,"No host_micpower component found\n"); + exit(1); + } + + /* Find Events */ + code = PAPI_NATIVE_MASK; + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( enum_retval == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + exit(1); + } + + printf("Found: %s\n",event_name); + strncpy(events[num_events],event_name,BUFSIZ); + sprintf(filenames[num_events],"results.%s",event_name); + num_events++; + + if (num_events==MAX_EVENTS) { + printf("Too many events! %d\n",num_events); + exit(1); + } + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + + } + + if (num_events==0) { + printf("Error! No host_micpower events found!\n"); + exit(1); + } + + /* Open output files */ + for(i=0;i +#include +#include +#include +#include +#include +#include +#include +#include +#include "pscanf.h" + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ +/* this number assumes that there will never be more events than indicated */ +#define INFINIBAND_MAX_COUNTERS 128 + +/** Structure that stores private information of each event */ +typedef struct infiniband_register +{ + /* This is used by the framework.It likes it to be !=0 to do somehting */ + unsigned int selector; +} infiniband_register_t; + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + +typedef struct _ib_device_type +{ + char* dev_name; + int dev_port; + struct _ib_device_type *next; +} ib_device_t; + +typedef struct _ib_counter_type +{ + char* ev_name; + char* ev_file_name; + ib_device_t* ev_device; + int extended; // if this is an extended (64-bit) counter + struct _ib_counter_type *next; +} ib_counter_t; + +static const char *ib_dir_path = "/sys/class/infiniband"; + +/** This structure is used to build the table of events */ +typedef struct _infiniband_native_event_entry +{ + infiniband_register_t resources; + char *name; + char *description; + char* file_name; + ib_device_t* device; + int extended; /* if this is an extended (64-bit) counter */ +} infiniband_native_event_entry_t; + + +typedef struct _infiniband_control_state +{ + long long counts[INFINIBAND_MAX_COUNTERS]; + int being_measured[INFINIBAND_MAX_COUNTERS]; + /* all IB counters need difference, but use a flag for generality */ + int need_difference[INFINIBAND_MAX_COUNTERS]; + long long lastupdate; +} infiniband_control_state_t; + + +typedef struct _infiniband_context +{ + infiniband_control_state_t state; + long long start_value[INFINIBAND_MAX_COUNTERS]; +} infiniband_context_t; + + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ +/* This table contains the component native events */ +static infiniband_native_event_entry_t *infiniband_native_events = 0; +/* number of events in the table*/ +static int num_events = 0; + + +papi_vector_t _infiniband_vector; + +/****************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** + *****************************************************************************/ + +static ib_device_t *root_device = 0; +static ib_counter_t *root_counter = 0; + +static char* +make_ib_event_description(const char* input_str, int extended) +{ + int i, len; + char *desc = 0; + if (! input_str) + return (0); + + desc = (char*) papi_calloc(PAPI_MAX_STR_LEN, 1); + if (desc == 0) { + PAPIERROR("cannot allocate memory for event description"); + return (0); + } + len = strlen(input_str); + + snprintf(desc, PAPI_MAX_STR_LEN, "%s (%s).", + input_str, (extended ? "free-running 64bit counter" : + "overflowing, auto-resetting counter")); + desc[0] = toupper(desc[0]); + for (i=0 ; idev_name = strdup(name); + new_dev->dev_port = port; + if (new_dev->dev_name==0) + { + PAPIERROR("cannot allocate memory for device internal fields"); + papi_free(new_dev); + return (0); + } + + // prepend the new device to the device list + new_dev->next = root_device; + root_device = new_dev; + + return (new_dev); +} + +static ib_counter_t* +add_ib_counter(const char* name, const char* file_name, int extended, ib_device_t *device) +{ + ib_counter_t *new_cnt = (ib_counter_t*) papi_calloc(sizeof(ib_counter_t), 1); + if (new_cnt == 0) { + PAPIERROR("cannot allocate memory for new IB counter structure"); + return (0); + } + + new_cnt->ev_name = strdup(name); + new_cnt->ev_file_name = strdup(file_name); + new_cnt->extended = extended; + new_cnt->ev_device = device; + if (new_cnt->ev_name==0 || new_cnt->ev_file_name==0) + { + PAPIERROR("cannot allocate memory for counter internal fields"); + papi_free(new_cnt); + return (0); + } + + // prepend the new counter to the counter list + new_cnt->next = root_counter; + root_counter = new_cnt; + + return (new_cnt); +} + + +static int +find_ib_device_events(ib_device_t *dev, int extended) +{ + int nevents = 0; + DIR *cnt_dir = NULL; + char counters_path[128]; + + if ( extended ) { + /* mofed driver version <4.0 */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters%s", + ib_dir_path, dev->dev_name, dev->dev_port, (extended?"_ext":"")); + + cnt_dir = opendir(counters_path); + if (cnt_dir == NULL) { + /* directory counters_ext in sysfs fs has changed to hw_counters */ + /* in 4.0 version of mofed driver */ + SUBDBG("cannot open counters directory `%s'\n", counters_path); + + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", + ib_dir_path, dev->dev_name, dev->dev_port, "hw_"); + + cnt_dir = opendir(counters_path); + } + } + else { + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", + ib_dir_path, dev->dev_name, dev->dev_port); + cnt_dir = opendir(counters_path); + } + + if (cnt_dir == NULL) { + SUBDBG("cannot open counters directory `%s'\n", counters_path); + goto out; + } + + struct dirent *ev_ent; + /* iterate over all the events */ + while ((ev_ent = readdir(cnt_dir)) != NULL) { + char *ev_name = ev_ent->d_name; + long long value = -1; + char event_path[160]; + char counter_name[80]; + + if (ev_name[0] == '.') + continue; + + /* Check that we can read an integer from the counter file */ + snprintf(event_path, sizeof(event_path), "%s/%s", counters_path, ev_name); + if (pscanf(event_path, "%lld", &value) != 1) { + SUBDBG("cannot read value for event '%s'\n", ev_name); + continue; + } + + /* Create new counter */ + snprintf(counter_name, sizeof(counter_name), "%s_%d%s:%s", + dev->dev_name, dev->dev_port, (extended?"_ext":""), ev_name); + if (add_ib_counter(counter_name, ev_name, extended, dev)) + { + SUBDBG("Added new counter `%s'\n", counter_name); + nevents += 1; + } + } + + out: + if (cnt_dir != NULL) + closedir(cnt_dir); + + return (nevents); +} + +static int +find_ib_devices() +{ + DIR *ib_dir = NULL; + int result = PAPI_OK; + num_events = 0; + + ib_dir = opendir(ib_dir_path); + if (ib_dir == NULL) { + SUBDBG("cannot open `%s'\n", ib_dir_path); + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "Infiniband sysfs interface not found", PAPI_MAX_STR_LEN); + result = PAPI_ENOSUPP; + goto out; + } + + struct dirent *hca_ent; + while ((hca_ent = readdir(ib_dir)) != NULL) { + char *hca = hca_ent->d_name; + char ports_path[80]; + DIR *ports_dir = NULL; + + if (hca[0] == '.') + goto next_hca; + + snprintf(ports_path, sizeof(ports_path), "%s/%s/ports", ib_dir_path, hca); + ports_dir = opendir(ports_path); + if (ports_dir == NULL) { + SUBDBG("cannot open `%s'\n", ports_path); + goto next_hca; + } + + struct dirent *port_ent; + while ((port_ent = readdir(ports_dir)) != NULL) { + int port = atoi(port_ent->d_name); + if (port <= 0) + continue; + + /* Check that port is active. .../HCA/ports/PORT/state should read "4: ACTIVE." */ + int state = -1; + char state_path[80]; + snprintf(state_path, sizeof(state_path), "%s/%s/ports/%d/state", ib_dir_path, hca, port); + if (pscanf(state_path, "%d", &state) != 1) { + SUBDBG("cannot read state of IB HCA `%s' port %d\n", hca, port); + continue; + } + + if (state != 4) { + SUBDBG("skipping inactive IB HCA `%s', port %d, state %d\n", hca, port, state); + continue; + } + + /* Create dev name (HCA/PORT) and get stats for dev. */ + SUBDBG("Found IB device `%s', port %d\n", hca, port); + ib_device_t *dev = add_ib_device(hca, port); + if (!dev) + continue; + // do we want to check for short counters only if no extended counters found? + num_events += find_ib_device_events(dev, 1); // check if we have extended (64bit) counters + num_events += find_ib_device_events(dev, 0); // check also for short counters + } + + next_hca: + if (ports_dir != NULL) + closedir(ports_dir); + } + + if (root_device == 0) // no active devices found + { + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "No active Infiniband ports found", PAPI_MAX_STR_LEN); + result = PAPI_ENOIMPL; + } else if (num_events == 0) + { + strncpy(_infiniband_vector.cmp_info.disabled_reason, + "No supported Infiniband events found", PAPI_MAX_STR_LEN); + result = PAPI_ENOIMPL; + } else + { + // Events are stored in a linked list, in reverse order than how I found them + // Revert them again, so that they are in finding order, not that it matters. + int i = num_events - 1; + // now allocate memory to store the counters into the native table + infiniband_native_events = (infiniband_native_event_entry_t*) + papi_calloc(num_events, sizeof(infiniband_native_event_entry_t)); + ib_counter_t *iter = root_counter; + while (iter != 0) + { + infiniband_native_events[i].name = iter->ev_name; + infiniband_native_events[i].file_name = iter->ev_file_name; + infiniband_native_events[i].device = iter->ev_device; + infiniband_native_events[i].extended = iter->extended; + infiniband_native_events[i].resources.selector = i + 1; + infiniband_native_events[i].description = + make_ib_event_description(iter->ev_file_name, iter->extended); + + ib_counter_t *tmp = iter; + iter = iter->next; + papi_free(tmp); + -- i; + } + root_counter = 0; + } + + out: + if (ib_dir != NULL) + closedir(ib_dir); + + return (result); +} + +static long long +read_ib_counter_value(int index) +{ + char ev_file[128]; + char counters_path[128]; + DIR *cnt_dir = NULL; + long long value = 0ll; + infiniband_native_event_entry_t *iter = &infiniband_native_events[index]; + + if ( iter->extended ) { + /* mofed driver version <4.0 */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters%s", + ib_dir_path, iter->device->dev_name, iter->device->dev_port, "_ext"); + + cnt_dir = opendir(counters_path); + if (cnt_dir == NULL) { + /* directory counters_ext in sysfs fs has changed to hw_counters */ + /* in 4.0 version of mofed driver */ + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/%scounters", + ib_dir_path, iter->device->dev_name, iter->device->dev_port, "hw_"); + + cnt_dir = opendir(counters_path); + } + } + else { + snprintf(counters_path, sizeof(counters_path), "%s/%s/ports/%d/counters", + ib_dir_path, iter->device->dev_name, iter->device->dev_port ); + cnt_dir = opendir(counters_path); + } + + + if (cnt_dir != NULL) + closedir(cnt_dir); + + + snprintf(ev_file, sizeof(ev_file), "%s/%s", + counters_path, iter->file_name); + + if (pscanf(ev_file, "%lld", &value) != 1) { + PAPIERROR("cannot read value for counter '%s'\n", iter->name); + } else + { + SUBDBG("Counter '%s': %lld\n", iter->name, value); + } + return (value); +} + +static void +deallocate_infiniband_resources() +{ + int i; + + if (infiniband_native_events) + { + for (i=0 ; idev_name) + free(iter->dev_name); + + ib_device_t *tmp = iter; + iter = iter->next; + papi_free(tmp); + } + root_device = 0; +} + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_infiniband_init_thread( hwd_context_t *ctx ) +{ + (void) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_infiniband_init_component( int cidx ) +{ + /* discover Infiniband devices and available events */ + int result = find_ib_devices(); + + if (result != PAPI_OK) // we couldn't initialize the component + { + // deallocate any eventually allocated memory + deallocate_infiniband_resources(); + } + + _infiniband_vector.cmp_info.num_native_events = num_events; + + _infiniband_vector.cmp_info.num_cntrs = num_events; + _infiniband_vector.cmp_info.num_mpx_cntrs = num_events; + + + /* Export the component id */ + _infiniband_vector.cmp_info.CmpIdx = cidx; + + return (result); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_infiniband_init_control_state( hwd_control_state_t *ctl ) +{ + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + int i; + + for (i=0 ; ibeing_measured[i] = 0; + } + + return PAPI_OK; +} + +/* + * + */ +static int +_infiniband_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + infiniband_context_t* context = (infiniband_context_t*) ctx; + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + + for (i=0 ; ibeing_measured[i] && control->need_difference[i]) { + context->start_value[i] = read_ib_counter_value(i); + } + } + control->lastupdate = now; + + return PAPI_OK; +} + + +/* + * + */ +static int +_infiniband_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + infiniband_context_t* context = (infiniband_context_t*) ctx; + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + long long temp; + + for (i=0 ; ibeing_measured[i]) + { + temp = read_ib_counter_value(i); + if (context->start_value[i] && control->need_difference[i]) { + /* Must subtract values, but check for wraparound. + * We cannot even detect all wraparound cases. Using the short, + * auto-resetting IB counters is error prone. + */ + if (temp < context->start_value[i]) { + SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", + (unsigned)context->start_value[i], (unsigned)temp); + /* The counters auto-reset. I cannot even adjust them to + * account for a simple wraparound. + * Just use the current reading of the counter, which is useless. + */ + } else + temp -= context->start_value[i]; + } + control->counts[i] = temp; + } + } + control->lastupdate = now; + + return PAPI_OK; +} + + +/* + * + */ +static int +_infiniband_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long_long ** events, int flags ) +{ + ( void ) flags; + + _infiniband_stop(ctx, ctl); /* we cannot actually stop the counters */ + /* Pass back a pointer to our results */ + *events = ((infiniband_control_state_t*) ctl)->counts; + + return PAPI_OK; +} + + +static int +_infiniband_shutdown_component( void ) +{ + /* Cleanup resources used by this component before leaving */ + deallocate_infiniband_resources(); + + return PAPI_OK; +} + +static int +_infiniband_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_infiniband_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + return PAPI_OK; +} + + +static int +_infiniband_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t * native, + int count, + hwd_context_t *ctx ) +{ + int i, index; + ( void ) ctx; + + infiniband_control_state_t* control = (infiniband_control_state_t*) ctl; + + for (i=0 ; ibeing_measured[i] = 0; + } + + for (i=0 ; ibeing_measured[index] = 1; + control->need_difference[index] = 1; + } + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_infiniband_set_domain( hwd_control_state_t *ctl, int domain ) +{ + int found = 0; + (void) ctl; + + if (PAPI_DOM_USER & domain) + found = 1; + + if (PAPI_DOM_KERNEL & domain) + found = 1; + + if (PAPI_DOM_OTHER & domain) + found = 1; + + if (!found) + return (PAPI_EINVAL); + + return (PAPI_OK); +} + + +/* + * Cannot reset the counters using the sysfs interface. + */ +static int +_infiniband_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_infiniband_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + switch (modifier) { + case PAPI_ENUM_FIRST: + if (num_events == 0) + return (PAPI_ENOEVNT); + + *EventCode = 0; + return PAPI_OK; + + case PAPI_ENUM_EVENTS: + { + int index = *EventCode & PAPI_NATIVE_AND_MASK; + + if (index < num_events - 1) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else + return PAPI_ENOEVNT; + + break; + } + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +/* + * + */ +static int +_infiniband_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if (index>=0 && index=0 && index= num_events )) return PAPI_ENOEVNT; + + if (infiniband_native_events[index].name) + { + unsigned int len = strlen(infiniband_native_events[index].name); + if (len > sizeof(info->symbol)-1) len = sizeof(info->symbol)-1; + strncpy(info->symbol, infiniband_native_events[index].name, len); + info->symbol[len] = '\0'; + } + if (infiniband_native_events[index].description) + { + unsigned int len = strlen(infiniband_native_events[index].description); + if (len > sizeof(info->long_descr)-1) len = sizeof(info->long_descr)-1; + strncpy(info->long_descr, infiniband_native_events[index].description, len); + info->long_descr[len] = '\0'; + } + + strncpy(info->units, "\0", 1); + /* infiniband_native_events[index].units, sizeof(info->units)); */ + +/* info->data_type = infiniband_native_events[index].return_type; + */ + return PAPI_OK; +} + + +/* + * + */ +papi_vector_t _infiniband_vector = { + .cmp_info = { + /* component information (unspecified values are initialized to 0) */ + .name = "infiniband", + .short_name = "infiniband", + .version = "5.3.0", + .description = "Linux Infiniband statistics using the sysfs interface", + .num_mpx_cntrs = INFINIBAND_MAX_COUNTERS, + .num_cntrs = INFINIBAND_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof (infiniband_context_t), + .control_state = sizeof (infiniband_control_state_t), + .reg_value = sizeof (infiniband_register_t), + /* .reg_alloc = sizeof (infiniband_reg_alloc_t), */ + }, + /* function pointers in this component */ + .init_thread = _infiniband_init_thread, + .init_component = _infiniband_init_component, + .init_control_state = _infiniband_init_control_state, + .start = _infiniband_start, + .stop = _infiniband_stop, + .read = _infiniband_read, + .shutdown_thread = _infiniband_shutdown_thread, + .shutdown_component = _infiniband_shutdown_component, + .ctl = _infiniband_ctl, + .update_control_state = _infiniband_update_control_state, + .set_domain = _infiniband_set_domain, + .reset = _infiniband_reset, + + .ntv_enum_events = _infiniband_ntv_enum_events, + .ntv_code_to_name = _infiniband_ntv_code_to_name, + .ntv_code_to_descr = _infiniband_ntv_code_to_descr, + .ntv_code_to_info = _infiniband_ntv_code_to_info, +}; diff --git a/src/components/infiniband/pscanf.h b/src/components/infiniband/pscanf.h new file mode 100644 index 0000000..5ba8ffe --- /dev/null +++ b/src/components/infiniband/pscanf.h @@ -0,0 +1,32 @@ +/* This file was taken from the tacc_stats utility, which is distributed + * under a GPL license. + */ +#ifndef _PSCANF_H_ +#define _PSCANF_H_ +#include +#include + +__attribute__((format(scanf, 2, 3))) + static inline int pscanf(const char *path, const char *fmt, ...) +{ + int rc = -1; + FILE *file = NULL; + char file_buf[4096]; + va_list arg_list; + va_start(arg_list, fmt); + + file = fopen(path, "r"); + if (file == NULL) + goto out; + setvbuf(file, file_buf, _IOFBF, sizeof(file_buf)); + + rc = vfscanf(file, fmt, arg_list); + + out: + if (file != NULL) + fclose(file); + va_end(arg_list); + return rc; +} + +#endif diff --git a/src/components/infiniband/tests/Makefile b/src/components/infiniband/tests/Makefile new file mode 100644 index 0000000..ee0dfc0 --- /dev/null +++ b/src/components/infiniband/tests/Makefile @@ -0,0 +1,20 @@ +NAME=infiniband +include ../../Makefile_comp_tests.target + +TESTS = infiniband_list_events infiniband_values_by_code + +infiniband_tests: $(TESTS) + + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +infiniband_list_events: infiniband_list_events.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) + +infiniband_values_by_code: infiniband_values_by_code.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + diff --git a/src/components/infiniband/tests/infiniband_list_events.c b/src/components/infiniband/tests/infiniband_list_events.c new file mode 100644 index 0000000..c70d582 --- /dev/null +++ b/src/components/infiniband/tests/infiniband_list_events.c @@ -0,0 +1,96 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-infiniband component + * Adapted from its counterpart in the net component. + * + * @brief + * List all net events codes and names + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int total_events=0; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Listing all infiniband events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname, "infiniband") == NULL) { + continue; + } + + if (!TESTS_QUIET) { + printf("Component %d (%d) - %d events - %s\n", + cid, cmpinfo->CmpIdx, + cmpinfo->num_native_events, cmpinfo->name); + } + if (cmpinfo->disabled) { + test_skip(__FILE__,__LINE__,"Component infiniband is disabled", 0); + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %s\n", code, event_name); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No infiniband events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/infiniband/tests/infiniband_values_by_code.c b/src/components/infiniband/tests/infiniband_values_by_code.c new file mode 100644 index 0000000..5589509 --- /dev/null +++ b/src/components/infiniband/tests/infiniband_values_by_code.c @@ -0,0 +1,149 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-infiniband component + * Adapted from its counterpart in the net component. + * + * @brief + * Prints the value of every native event (by code) + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long *values = 0; + int *codes = 0; + char *names = 0; + int code, i; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all infiniband events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidnum_native_events, cmpinfo->name); + } + + if ( strstr(cmpinfo->name, "infiniband") == NULL) { + continue; + } + if (cmpinfo->disabled) { + test_skip(__FILE__,__LINE__,"Component infiniband is disabled", 0); + continue; + } + + values = (long long*) malloc(sizeof(long long) * cmpinfo->num_native_events); + codes = (int*) malloc(sizeof(int) * cmpinfo->num_native_events); + names = (char*) malloc(PAPI_MAX_STR_LEN * cmpinfo->num_native_events); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + i = 0; + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, &names[i*PAPI_MAX_STR_LEN] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + codes[i] = code; + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_add_event()", retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + i += 1; + } + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + /* XXX figure out a general method to generate some traffic + * for infiniband + * the operation should take more than one second in order + * to guarantee that the network counters are updated */ + /* For now, just sleep for 10 seconds */ + sleep(10); + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); + } + + if (!TESTS_QUIET) { + for (i=0 ; inum_native_events ; ++i) + printf("%#x %-24s = %lld\n", codes[i], names+i*PAPI_MAX_STR_LEN, values[i]); + } + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()", retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()", retval); + } + + free(names); + free(codes); + free(values); + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No infiniband events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/infiniband_umad/Makefile.infiniband_umad.in b/src/components/infiniband_umad/Makefile.infiniband_umad.in new file mode 100644 index 0000000..153ca31 --- /dev/null +++ b/src/components/infiniband_umad/Makefile.infiniband_umad.in @@ -0,0 +1,2 @@ +INFINIBAND_IBMAD_DIR = @infiniband_ibmad_dir@ +INFINIBAND_IBUMAD_DIR = @infiniband_ibumad_dir@ \ No newline at end of file diff --git a/src/components/infiniband_umad/README b/src/components/infiniband_umad/README new file mode 100644 index 0000000..e448035 --- /dev/null +++ b/src/components/infiniband_umad/README @@ -0,0 +1,17 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Dan Terpstra +* terpstra@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: Infiniband +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +infiniband_umad/ +These files have the source code for a component that enables PAPI-C to access hardware monitoring counters for InfiniBand devices through the OFED library. Since a new interface was introduced with OFED version 1.4 (released Dec 2008), the current InfiniBand component does not support OFED versions < 1.4. + +*/ diff --git a/src/components/infiniband_umad/Rules.infiniband_umad b/src/components/infiniband_umad/Rules.infiniband_umad new file mode 100644 index 0000000..d8798aa --- /dev/null +++ b/src/components/infiniband_umad/Rules.infiniband_umad @@ -0,0 +1,11 @@ +# $Id$ + +include components/infiniband_umad/Makefile.infiniband_umad + +COMPSRCS += components/infiniband_umad/linux-infiniband_umad.c +COMPOBJS += linux-infiniband_umad.o +CFLAGS += -I$(INFINIBAND_IBMAD_DIR)/include -I$(INFINIBAND_IBUMAD_DIR)/include +LDFLAGS += $(LDL) + +linux-infiniband_umad.o: components/infiniband_umad/linux-infiniband_umad.c components/infiniband_umad/linux-infiniband_umad.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/infiniband_umad/linux-infiniband_umad.c -o linux-infiniband_umad.o diff --git a/src/components/infiniband_umad/configure b/src/components/infiniband_umad/configure new file mode 100755 index 0000000..4368a24 --- /dev/null +++ b/src/components/infiniband_umad/configure @@ -0,0 +1,3430 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.59. +# +# Copyright (C) 2003 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_config_libobj_dir=. +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT infiniband_ibmad_dir infiniband_ibumad_dir LIBOBJS LTLIBOBJS' +ac_subst_files='' + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 + { (exit 1); exit 1; }; } +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-infiniband_ibumad_dir= Specify path to InfiniBand ibumad root directory + --with-infiniband_ibmad_dir= Specify path to InfiniBand ibmad root directory + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have + headers in a nonstandard directory + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd $ac_popdir + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF + +Copyright (C) 2003 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_sep= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------- ## +## Output files. ## +## ------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h | sort + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + + + +# looking for infiniband ibumad packages header file and library + +# Check whether --with-infiniband_ibumad_dir or --without-infiniband_ibumad_dir was given. +if test "${with_infiniband_ibumad_dir+set}" = set; then + withval="$with_infiniband_ibumad_dir" + case "$with_infiniband_ibumad_dir" in + yes|''|no) { { echo "$as_me:$LINENO: error: --with-infiniband_ibumad_dir requires a path" >&5 +echo "$as_me: error: --with-infiniband_ibumad_dir requires a path" >&2;} + { (exit 1); exit 1; }; } ;; + *) infiniband_ibumad_dir=$with_infiniband_ibumad_dir ;; + esac +else + infiniband_ibumad_dir="/usr/include/infiniband" +fi; + +CFLAGS="$CFLAGS -I$infiniband_ibumad_dir/include" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext + break;; + * ) + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +for ac_header in infiniband/umad.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +cat >>confdefs.h <<\_ACEOF +#define HAVE_INFINIBAND_H 1 +_ACEOF + +else + { { echo "$as_me:$LINENO: error: umad.h not found. See --with-infiniband_ibumad_dir " >&5 +echo "$as_me: error: umad.h not found. See --with-infiniband_ibumad_dir " >&2;} + { (exit 1); exit 1; }; } +fi + +done + + +LDFLAGS="$LDFLAGS -L$infiniband_ibumad_dir/lib64 -libumad" + +echo "$as_me:$LINENO: checking for umad_init in -libumad" >&5 +echo $ECHO_N "checking for umad_init in -libumad... $ECHO_C" >&6 +if test "${ac_cv_lib_ibumad_umad_init+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-libumad $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char umad_init (); +int +main () +{ +umad_init (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_ibumad_umad_init=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_ibumad_umad_init=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_ibumad_umad_init" >&5 +echo "${ECHO_T}$ac_cv_lib_ibumad_umad_init" >&6 +if test $ac_cv_lib_ibumad_umad_init = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBIBUMAD 1 +_ACEOF + + LIBS="-libumad $LIBS" + +else + { { echo "$as_me:$LINENO: error: libibumad.a is needed for the PAPI infiniband component" >&5 +echo "$as_me: error: libibumad.a is needed for the PAPI infiniband component" >&2;} + { (exit 1); exit 1; }; } +fi + + + +# looking for infiniband ibmad packages header file and library + +# Check whether --with-infiniband_ibmad_dir or --without-infiniband_ibmad_dir was given. +if test "${with_infiniband_ibmad_dir+set}" = set; then + withval="$with_infiniband_ibmad_dir" + case "$with_infiniband_ibmad_dir" in + yes|''|no) { { echo "$as_me:$LINENO: error: --with-infiniband_ibmad_dir requires a path" >&5 +echo "$as_me: error: --with-infiniband_ibmad_dir requires a path" >&2;} + { (exit 1); exit 1; }; } ;; + *) infiniband_ibmad_dir=$with_infiniband_ibmad_dir ;; + esac +else + infiniband_ibmad_dir="/usr/include/infiniband" +fi; + +CFLAGS="$CFLAGS -I$infiniband_ibmad_dir/include" + +for ac_header in infiniband/mad.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +cat >>confdefs.h <<\_ACEOF +#define HAVE_INFINIBAND_H 1 +_ACEOF + +else + { { echo "$as_me:$LINENO: error: mad.h not found. See --with-infiniband_ibmad_dir " >&5 +echo "$as_me: error: mad.h not found. See --with-infiniband_ibmad_dir " >&2;} + { (exit 1); exit 1; }; } +fi + +done + + +LDFLAGS="$LDFLAGS -L$infiniband_ibmad_dir/lib64 -libmad -L$infiniband_ibumad_dir/lib64 -libumad" + +echo "$as_me:$LINENO: checking for madrpc_init in -libmad" >&5 +echo $ECHO_N "checking for madrpc_init in -libmad... $ECHO_C" >&6 +if test "${ac_cv_lib_ibmad_madrpc_init+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-libmad $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char madrpc_init (); +int +main () +{ +madrpc_init (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_ibmad_madrpc_init=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_ibmad_madrpc_init=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_ibmad_madrpc_init" >&5 +echo "${ECHO_T}$ac_cv_lib_ibmad_madrpc_init" >&6 +if test $ac_cv_lib_ibmad_madrpc_init = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBIBMAD 1 +_ACEOF + + LIBS="-libmad $LIBS" + +else + { { echo "$as_me:$LINENO: error: libibmad.a is needed for the PAPI infiniband component" >&5 +echo "$as_me: error: libibmad.a is needed for the PAPI infiniband component" >&2;} + { (exit 1); exit 1; }; } +fi + + + + + +## AC_SUBST(infiniband_libdir) + ac_config_files="$ac_config_files Makefile.infiniband_umad" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then we branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +cat >confdef2opt.sed <<\_ACEOF +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\),-D\1=\2,g +t quote +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\),-D\1=\2,g +t quote +d +: quote +s,[ `~#$^&*(){}\\|;'"<>?],\\&,g +s,\[,\\&,g +s,\],\\&,g +s,\$,$$,g +p +_ACEOF +# We use echo to avoid assuming a particular line-breaking character. +# The extra dot is to prevent the shell from consuming trailing +# line-breaks from the sub-command output. A line-break within +# single-quotes doesn't work because, if this script is created in a +# platform that uses two characters for line-breaks (e.g., DOS), tr +# would break. +ac_LF_and_DOT=`echo; echo .` +DEFS=`sed -n -f confdef2opt.sed confdefs.h | tr "$ac_LF_and_DOT" ' .'` +rm -f confdef2opt.sed + + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to ." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF + + + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "Makefile.infiniband_umad" ) CONFIG_FILES="$CONFIG_FILES Makefile.infiniband_umad" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@infiniband_ibmad_dir@,$infiniband_ibmad_dir,;t t +s,@infiniband_ibumad_dir@,$infiniband_ibumad_dir,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + diff --git a/src/components/infiniband_umad/configure.in b/src/components/infiniband_umad/configure.in new file mode 100644 index 0000000..9d4dd7c --- /dev/null +++ b/src/components/infiniband_umad/configure.in @@ -0,0 +1,57 @@ +# Process this file with autoconf to produce a configure script. +# File: components/infiniband/configure.in +# CVS: $Id$ + +AC_INIT + + +# looking for infiniband ibumad packages header file and library +AC_ARG_WITH(infiniband_ibumad_dir, + [ --with-infiniband_ibumad_dir= Specify path to InfiniBand ibumad root directory ], + [case "$with_infiniband_ibumad_dir" in + yes|''|no) AC_MSG_ERROR([--with-infiniband_ibumad_dir requires a path]) ;; + *) infiniband_ibumad_dir=$with_infiniband_ibumad_dir ;; + esac], + [infiniband_ibumad_dir="/usr/include/infiniband"]) + +CFLAGS="$CFLAGS -I$infiniband_ibumad_dir/include" +AC_CHECK_HEADERS([infiniband/umad.h], + [AC_DEFINE([HAVE_INFINIBAND_H], [1], [infiniband header] )], + [AC_MSG_ERROR([umad.h not found. See --with-infiniband_ibumad_dir ])], + [#include ]) + +LDFLAGS="$LDFLAGS -L$infiniband_ibumad_dir/lib64 -libumad" +AC_CHECK_LIB([ibumad], + [umad_init], + [], + [AC_MSG_ERROR([libibumad.a is needed for the PAPI infiniband component])]) + + +# looking for infiniband ibmad packages header file and library +AC_ARG_WITH(infiniband_ibmad_dir, + [ --with-infiniband_ibmad_dir= Specify path to InfiniBand ibmad root directory ], + [case "$with_infiniband_ibmad_dir" in + yes|''|no) AC_MSG_ERROR([--with-infiniband_ibmad_dir requires a path]) ;; + *) infiniband_ibmad_dir=$with_infiniband_ibmad_dir ;; + esac], + [infiniband_ibmad_dir="/usr/include/infiniband"]) + +CFLAGS="$CFLAGS -I$infiniband_ibmad_dir/include" +AC_CHECK_HEADERS([infiniband/mad.h], + [AC_DEFINE([HAVE_INFINIBAND_H], [1], [infiniband header] )], + [AC_MSG_ERROR([mad.h not found. See --with-infiniband_ibmad_dir ])], + [#include ]) + +LDFLAGS="$LDFLAGS -L$infiniband_ibmad_dir/lib64 -libmad -L$infiniband_ibumad_dir/lib64 -libumad" +AC_CHECK_LIB([ibmad], + [madrpc_init], + [], + [AC_MSG_ERROR([libibmad.a is needed for the PAPI infiniband component])]) + + +AC_SUBST(infiniband_ibmad_dir) +AC_SUBST(infiniband_ibumad_dir) +## AC_SUBST(infiniband_libdir) +AC_CONFIG_FILES([Makefile.infiniband_umad]) + +AC_OUTPUT diff --git a/src/components/infiniband_umad/linux-infiniband_umad.c b/src/components/infiniband_umad/linux-infiniband_umad.c new file mode 100644 index 0000000..2c3ca00 --- /dev/null +++ b/src/components/infiniband_umad/linux-infiniband_umad.c @@ -0,0 +1,960 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-infiniband_umad.c + * @author Heike Jagode (in collaboration with Michael Kluge, TU Dresden) + * jagode@eecs.utk.edu + * + * @ingroup papi_components + * + * InfiniBand component + * + * Tested version of OFED: 1.4 + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for InfiniBand devices through the + * OFED library. Since a new interface was introduced with OFED version 1.4 + * (released Dec 2008), the current InfiniBand component does not support + * OFED versions < 1.4. + */ +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "linux-infiniband_umad.h" + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); + +/******** CHANGE PROTOTYPES TO DECLARE Infiniband LIBRARY SYMBOLS AS WEAK ********** + * This is done so that a version of PAPI built with the infiniband component can * + * be installed on a system which does not have the infiniband libraries installed. * + * * + * If this is done without these prototypes, then all papi services on the system * + * without the infiniband libraries installed will fail. The PAPI libraries * + * contain references to the infiniband libraries which are not installed. The * + * load of PAPI commands fails because the infiniband library references can not * + * be resolved. * + * * + * This also defines pointers to the infiniband library functions that we call. * + * These function pointers will be resolved with dlopen/dlsym calls at component * + * initialization time. The component then calls the infiniband library functions * + * through these function pointers. * + *************************************************************************************/ +int __attribute__((weak)) umad_init ( void ); +int __attribute__((weak)) umad_get_cas_names ( char [][UMAD_CA_NAME_LEN], int ); +int __attribute__((weak)) umad_get_ca ( char *, umad_ca_t * ); +void __attribute__((weak)) mad_decode_field ( unsigned char *, enum MAD_FIELDS, void *); +struct ibmad_port * __attribute__((weak)) mad_rpc_open_port ( char *, int, int *, int ); +int __attribute__((weak)) ib_resolve_self_via ( ib_portid_t *, int *, ibmad_gid_t *, const struct ibmad_port * ); +uint8_t * __attribute__((weak)) performance_reset_via ( void *, ib_portid_t *, int, unsigned, unsigned, unsigned, const struct ibmad_port * ); +uint8_t * __attribute__((weak)) pma_query_via ( void *, ib_portid_t *, int, unsigned, unsigned, const struct ibmad_port * ); + +int (*umad_initPtr) ( void ); +int (*umad_get_cas_namesPtr) ( char [][UMAD_CA_NAME_LEN], int ); +int (*umad_get_caPtr) ( char *, umad_ca_t * ); +void (*mad_decode_fieldPtr) ( unsigned char *, enum MAD_FIELDS, void * ); +struct ibmad_port * (*mad_rpc_open_portPtr) ( char *, int, int *, int ); +int (*ib_resolve_self_viaPtr) (ib_portid_t *, int *, ibmad_gid_t *, const struct ibmad_port * ); +uint8_t * (*performance_reset_viaPtr) (void *, ib_portid_t *, int, unsigned, unsigned, unsigned, const struct ibmad_port * ); +uint8_t * (*pma_query_viaPtr) (void *, ib_portid_t *, int, unsigned, unsigned, const struct ibmad_port * ); + +// file handles used to access Infiniband libraries with dlopen +static void* dl1 = NULL; +static void* dl2 = NULL; + +static int linkInfinibandLibraries (); + +papi_vector_t _infiniband_umad_vector; + + + +struct ibmad_port *srcport; +static ib_portid_t portid; +static int ib_timeout = 0; +static int ibportnum = 0; + +static counter_info *subscriptions[INFINIBAND_MAX_COUNTERS]; +static int is_initialized = 0; +static int num_counters = 0; +static int is_finalized = 0; + +/* counters are kept in a list */ +static counter_info *root_counter = NULL; +/* IB ports found are kept in a list */ +static ib_port *root_ib_port = NULL; +static ib_port *active_ib_port = NULL; + +#define infiniband_native_table subscriptions +/* macro to initialize entire structs to 0 */ +#define InitStruct(var, type) type var; memset(&var, 0, sizeof(type)) + +long long _papi_hwd_infiniband_register_start[INFINIBAND_MAX_COUNTERS]; +long long _papi_hwd_infiniband_register[INFINIBAND_MAX_COUNTERS]; + + +/******************************************************************************* + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ********* + ******************************************************************************/ + +/** + * use libumad to discover IB ports + */ +static void +init_ib_counter( ) +{ + char names[20][UMAD_CA_NAME_LEN]; + int n, i; + char *ca_name; + umad_ca_t ca; + int r; + int portnum; + +// if ( umad_init( ) < 0 ) { +// fprintf( stderr, "can't init UMAD library\n" ); +// exit( 1 ); +// } + + if ( ( n = (*umad_get_cas_namesPtr)( ( void * ) names, UMAD_CA_NAME_LEN ) ) < 0 ) { + fprintf( stderr, "can't list IB device names\n" ); + exit( 1 ); + } + + for ( i = 0; i < n; i++ ) { + ca_name = names[i]; + + if ( ( r = (*umad_get_caPtr)( ca_name, &ca ) ) < 0 ) { + fprintf( stderr, "can't read ca from IB device\n" ); + exit( 1 ); + } + + if ( !ca.node_type ) + continue; + + /* port numbers are '1' based in OFED */ + for ( portnum = 1; portnum <= ca.numports; portnum++ ) + addIBPort( ca.ca_name, ca.ports[portnum] ); + } +} + + +/** + * add a counter to the list of available counters + * @param name the short name of the counter + * @param desc a longer description + * @param unit the unit for this counter + */ +static counter_info * +addCounter( const char *name, const char *desc, const char *unit ) +{ + counter_info *cntr, *last; + + cntr = ( counter_info * ) malloc( sizeof ( counter_info ) ); + if ( cntr == NULL ) { + fprintf( stderr, "can not allocate memory for new counter\n" ); + exit( 1 ); + } + cntr->name = strdup( name ); + cntr->description = strdup( desc ); + cntr->unit = strdup( unit ); + cntr->value = 0; + cntr->next = NULL; + + if ( root_counter == NULL ) { + root_counter = cntr; + } else { + last = root_counter; + while ( last->next != NULL ) + last = last->next; + last->next = cntr; + } + + return cntr; +} + + +/** + * add one IB port to the list of available ports and add the + * counters related to this port to the global counter list + */ +static void +addIBPort( const char *ca_name, umad_port_t * port ) +{ + ib_port *nwif, *last; + char counter_name[512]; + + nwif = ( ib_port * ) malloc( sizeof ( ib_port ) ); + + if ( nwif == NULL ) { + fprintf( stderr, "can not allocate memory for IB port description\n" ); + exit( 1 ); + } + + sprintf( counter_name, "%s_%d", ca_name, port->portnum ); + nwif->name = strdup( counter_name ); + + sprintf( counter_name, "%s_%d_recv", ca_name, port->portnum ); + nwif->recv_cntr = + addCounter( counter_name, "bytes received on this IB port", "bytes" ); + + sprintf( counter_name, "%s_%d_send", ca_name, port->portnum ); + nwif->send_cntr = + addCounter( counter_name, "bytes written to this IB port", "bytes" ); + + nwif->port_rate = port->rate; + nwif->is_initialized = 0; + nwif->port_number = port->portnum; + nwif->next = NULL; + + num_counters += 2; + + if ( root_ib_port == NULL ) { + root_ib_port = nwif; + } else { + last = root_ib_port; + while ( last->next != NULL ) + last = last->next; + last->next = nwif; + } +} + + +/** + * initialize one IB port so that we are able to read values from it + */ +static int +init_ib_port( ib_port * portdata ) +{ + int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, + IB_PERFORMANCE_CLASS + }; + char *ca = 0; + static uint8_t pc[1024]; + int mask = 0xFFFF; + + srcport = (*mad_rpc_open_portPtr)( ca, portdata->port_number, mgmt_classes, 4 ); + if ( !srcport ) { + fprintf( stderr, "Failed to open '%s' port '%d'\n", ca, + portdata->port_number ); + exit( 1 ); + } + + if ( (*ib_resolve_self_viaPtr)( &portid, &ibportnum, 0, srcport ) < 0 ) { + fprintf( stderr, "can't resolve self port\n" ); + exit( 1 ); + } + + /* PerfMgt ClassPortInfo is a required attribute */ + /* might be redundant, could be left out for fast implementation */ + if ( !(*pma_query_viaPtr) ( pc, &portid, ibportnum, ib_timeout, CLASS_PORT_INFO, srcport ) ) { + fprintf( stderr, "classportinfo query\n" ); + exit( 1 ); + } + + if ( !(*performance_reset_viaPtr) ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { + fprintf( stderr, "perf reset\n" ); + exit( 1 ); + } + + /* read the initial values */ + (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &portdata->last_send_val ); + portdata->sum_send_val = 0; + (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &portdata->last_recv_val ); + portdata->sum_recv_val = 0; + + portdata->is_initialized = 1; + + return 0; +} + + +/** + * read and reset IB counters (reset on demand) + */ +static int +read_ib_counter( ) +{ + uint32_t send_val; + uint32_t recv_val; + uint8_t pc[1024]; + /* 32 bit counter FFFFFFFF */ + uint32_t max_val = 4294967295; + /* if it is bigger than this -> reset */ + uint32_t reset_limit = max_val * 0.7; + int mask = 0xFFFF; + + if ( active_ib_port == NULL ) + return 0; + + /* reading cost ~70 mirco secs */ + if ( !(*pma_query_viaPtr) ( pc, &portid, ibportnum, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { + fprintf( stderr, "perfquery\n" ); + exit( 1 ); + } + + (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &send_val ); + (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &recv_val ); + + /* multiply the numbers read by 4 as the IB port counters are not + counting bytes. they always count 32dwords. see man page of + perfquery for details + internally a uint64_t ia used to sum up the values */ + active_ib_port->sum_send_val += + ( send_val - active_ib_port->last_send_val ) * 4; + active_ib_port->sum_recv_val += + ( recv_val - active_ib_port->last_recv_val ) * 4; + + active_ib_port->send_cntr->value = active_ib_port->sum_send_val; + active_ib_port->recv_cntr->value = active_ib_port->sum_recv_val; + + if ( send_val > reset_limit || recv_val > reset_limit ) { + /* reset cost ~70 mirco secs */ + if ( !(*performance_reset_viaPtr) ( pc, &portid, ibportnum, mask, ib_timeout, IB_GSI_PORT_COUNTERS, srcport ) ) { + fprintf( stderr, "perf reset\n" ); + exit( 1 ); + } + + (*mad_decode_fieldPtr)( pc, IB_PC_XMT_BYTES_F, &active_ib_port->last_send_val ); + (*mad_decode_fieldPtr)( pc, IB_PC_RCV_BYTES_F, &active_ib_port->last_recv_val ); + } else { + active_ib_port->last_send_val = send_val; + active_ib_port->last_recv_val = recv_val; + } + + return 0; +} + + +void +host_read_values( long long *data ) +{ + int loop; + + read_ib_counter( ); + + for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) { + if ( subscriptions[loop] == NULL ) + break; + + data[loop] = subscriptions[loop]->value; + } +} + + +/** + * find the pointer for a counter_info structure based on the counter name + */ +static counter_info * +counterFromName( const char *cntr ) +{ + int loop = 0; + char tmp[512]; + counter_info *local_cntr = root_counter; + + while ( local_cntr != NULL ) { + if ( strcmp( cntr, local_cntr->name ) == 0 ) + return local_cntr; + + local_cntr = local_cntr->next; + loop++; + } + + gethostname( tmp, 512 ); + fprintf( stderr, "can not find host counter: %s on %s\n", cntr, tmp ); + fprintf( stderr, "we only have: " ); + local_cntr = root_counter; + + while ( local_cntr != NULL ) { + fprintf( stderr, "'%s' ", local_cntr->name ); + local_cntr = local_cntr->next; + loop++; + } + + fprintf( stderr, "\n" ); + exit( 1 ); + /* never reached */ + return 0; +} + + +/** + * allow external code to subscribe to a counter based on the counter name + */ +static uint64_t +host_subscribe( const char *cntr ) +{ + int loop; + int len; + char tmp_name[512]; + ib_port *aktp; + + counter_info *counter = counterFromName( cntr ); + + for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) { + if ( subscriptions[loop] == NULL ) { + subscriptions[loop] = counter; + counter->idx = loop; + + /* we have an IB counter if the name ends with _send or _recv and + the prefix before that is in the ib_port list */ + if ( ( len = strlen( cntr ) ) > 5 ) { + if ( strcmp( &cntr[len - 5], "_recv" ) == 0 || + strcmp( &cntr[len - 5], "_send" ) == 0 ) { + /* look through all IB_counters */ + strncpy( tmp_name, cntr, len - 5 ); + tmp_name[len - 5] = 0; + aktp = root_ib_port; + // printf("looking for IB port '%s'\n", tmp_name); + while ( aktp != NULL ) { + if ( strcmp( aktp->name, tmp_name ) == 0 ) { + if ( !aktp->is_initialized ) { + init_ib_port( aktp ); + active_ib_port = aktp; + } + return loop + 1; + } + /* name does not match, if this counter is + initialized, we can't have two active IB ports */ + if ( aktp->is_initialized ) { +#if 0 /* not necessary with OFED version >= 1.4 */ + fprintf( stderr, + "unable to activate IB port monitoring for more than one port\n" ); + exit( 1 ); +#endif + } + aktp = aktp->next; + } + } + } + return loop + 1; + } + } + fprintf( stderr, "please subscribe only once to each counter\n" ); + exit( 1 ); + /* never reached */ + return 0; +} + + +/** + * return a newly allocated list of strings containing all counter names + */ +static string_list * +host_listCounter( int num_counters1 ) +{ + string_list *list; + counter_info *cntr = root_counter; + + list = malloc( sizeof ( string_list ) ); + if ( list == NULL ) { + fprintf( stderr, "unable to allocate memory for new string_list" ); + exit( 1 ); + } + list->count = 0; + list->data = ( char ** ) malloc( num_counters1 * sizeof ( char * ) ); + + if ( list->data == NULL ) { + fprintf( stderr, + "unable to allocate memory for %d pointers in a new string_list\n", + num_counters1 ); + exit( 1 ); + } + + while ( cntr != NULL ) { + list->data[list->count++] = strdup( cntr->name ); + cntr = cntr->next; + } + + return list; +} + + +/** + * finalizes the library + */ +static void +host_finalize( ) +{ + counter_info *cntr, *next; + + if ( is_finalized ) + return; + + cntr = root_counter; + + while ( cntr != NULL ) { + next = cntr->next; + free( cntr->name ); + free( cntr->description ); + free( cntr->unit ); + free( cntr ); + cntr = next; + } + + root_counter = NULL; + + is_finalized = 1; +} + + +/** + * delete a list of strings + */ +static void +host_deleteStringList( string_list * to_delete ) +{ + int loop; + + if ( to_delete->data != NULL ) { + for ( loop = 0; loop < to_delete->count; loop++ ) + free( to_delete->data[loop] ); + + free( to_delete->data ); + } + + free( to_delete ); +} + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +int +INFINIBAND_init_thread( hwd_context_t * ctx ) +{ + string_list *counter_list = NULL; + int i; + int loop; + + /* initialize portid struct of type ib_portid_t to 0 */ + InitStruct( portid, ib_portid_t ); + + if ( is_initialized ) + return PAPI_OK; + + is_initialized = 1; + + init_ib_counter( ); + + for ( loop = 0; loop < INFINIBAND_MAX_COUNTERS; loop++ ) + subscriptions[loop] = NULL; + + counter_list = host_listCounter( num_counters ); + + for ( i = 0; i < counter_list->count; i++ ) + host_subscribe( counter_list->data[i] ); + + ( ( INFINIBAND_context_t * ) ctx )->state.ncounter = counter_list->count; + + host_deleteStringList( counter_list ); + + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +INFINIBAND_init_component( int cidx ) +{ + SUBDBG ("Entry: cidx: %d\n", cidx); + int i; + + /* link in all the infiniband libraries and resolve the symbols we need to use */ + if (linkInfinibandLibraries() != PAPI_OK) { + SUBDBG ("Dynamic link of Infiniband libraries failed, component will be disabled.\n"); + SUBDBG ("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + /* make sure that the infiniband library finds the kernel module loaded. */ + if ( (*umad_initPtr)( ) < 0 ) { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Call to initialize umad library failed.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + + for ( i = 0; i < INFINIBAND_MAX_COUNTERS; i++ ) { + _papi_hwd_infiniband_register_start[i] = -1; + _papi_hwd_infiniband_register[i] = -1; + } + + /* Export the component id */ + _infiniband_umad_vector.cmp_info.CmpIdx = cidx; + + return ( PAPI_OK ); +} + + +/* + * Link the necessary Infiniband libraries to use the Infiniband component. If any of them can not be found, then + * the Infiniband component will just be disabled. This is done at runtime so that a version of PAPI built + * with the Infiniband component can be installed and used on systems which have the Infiniband libraries installed + * and on systems where these libraries are not installed. + */ +static int +linkInfinibandLibraries () +{ + /* Attempt to guess if we were statically linked to libc, if so bail */ + if ( _dl_non_dynamic_init != NULL ) { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "The Infiniband component does not support statically linking of libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + /* Need to link in the Infiniband libraries, if not found disable the component */ + dl1 = dlopen("libibumad.so", RTLD_NOW | RTLD_GLOBAL); + if (!dl1) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband library libibumad.so not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + umad_initPtr = dlsym(dl1, "umad_init"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_init not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + umad_get_cas_namesPtr = dlsym(dl1, "umad_get_cas_names"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_get_cas_names not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + umad_get_caPtr = dlsym(dl1, "umad_get_ca"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function umad_get_ca not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + + /* Need to link in the Infiniband libraries, if not found disable the component */ + dl2 = dlopen("libibmad.so", RTLD_NOW | RTLD_GLOBAL); + if (!dl2) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband library libibmad.so not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + mad_decode_fieldPtr = dlsym(dl2, "mad_decode_field"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function mad_decode_field not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + mad_rpc_open_portPtr = dlsym(dl2, "mad_rpc_open_port"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function mad_rpc_open_port not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + ib_resolve_self_viaPtr = dlsym(dl2, "ib_resolve_self_via"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function ib_resolve_self_via not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + performance_reset_viaPtr = dlsym(dl2, "performance_reset_via"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function performance_reset_via not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + pma_query_viaPtr = dlsym(dl2, "pma_query_via"); + if (dlerror() != NULL) + { + strncpy(_infiniband_umad_vector.cmp_info.disabled_reason, "Infiniband function pma_query_via not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + + return ( PAPI_OK ); +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int +INFINIBAND_init_control_state( hwd_control_state_t * ctrl ) +{ + ( void ) ctrl; + return PAPI_OK; +} + + +/* + * + */ +int +INFINIBAND_start( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + ( void ) ctx; + ( void ) ctrl; + + host_read_values( _papi_hwd_infiniband_register_start ); + + memcpy( _papi_hwd_infiniband_register, _papi_hwd_infiniband_register_start, + INFINIBAND_MAX_COUNTERS * sizeof ( long long ) ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +INFINIBAND_stop( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + int i; + ( void ) ctx; + + host_read_values( _papi_hwd_infiniband_register ); + + for ( i = 0; i < ( ( INFINIBAND_context_t * ) ctx )->state.ncounter; i++ ) { + ( ( INFINIBAND_control_state_t * ) ctrl )->counts[i] = + _papi_hwd_infiniband_register[i] - + _papi_hwd_infiniband_register_start[i]; + } + + return ( PAPI_OK ); +} + + +/* + * + */ +int +INFINIBAND_read( hwd_context_t * ctx, hwd_control_state_t * ctrl, + long_long ** events, int flags ) +{ + int i; + ( void ) flags; + + host_read_values( _papi_hwd_infiniband_register ); + + for ( i = 0; i < ( ( INFINIBAND_context_t * ) ctx )->state.ncounter; i++ ) { + ( ( INFINIBAND_control_state_t * ) ctrl )->counts[i] = + _papi_hwd_infiniband_register[i] - + _papi_hwd_infiniband_register_start[i]; + } + + *events = ( ( INFINIBAND_control_state_t * ) ctrl )->counts; + return ( PAPI_OK ); +} + + +/* + * + */ +int +INFINIBAND_shutdown_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; + host_finalize( ); + return ( PAPI_OK ); +} + + +/* + * + */ +int +INFINIBAND_shutdown_component( void ) +{ + // close the dynamic libraries needed by this component (opened in the init substrate call) + dlclose(dl1); + dlclose(dl2); + + return ( PAPI_OK ); +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int +INFINIBAND_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + return ( PAPI_OK ); +} + + +//int INFINIBAND_ntv_code_to_bits ( unsigned int EventCode, hwd_register_t * bits ); + + +/* + * + */ +int +INFINIBAND_update_control_state( hwd_control_state_t * ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ + ( void ) ptr; + ( void ) ctx; + int i, index; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + native[i].ni_position = index; + } + + return ( PAPI_OK ); +} + + +/* + * Infiniband counts are system wide, so this is the only domain we will respond to + */ +int +INFINIBAND_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + (void) cntrl; + if ( PAPI_DOM_ALL != domain ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +int +INFINIBAND_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + INFINIBAND_start( ctx, ctrl ); + return ( PAPI_OK ); +} + + +/* + * Native Event functions + */ +int +INFINIBAND_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + if ( modifier == PAPI_ENUM_FIRST ) { + *EventCode = 0; + return PAPI_OK; + } + + if ( modifier == PAPI_ENUM_EVENTS ) { + int index = *EventCode; + + if ( infiniband_native_table[index + 1] ) { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_ENOEVNT ); + } else + return ( PAPI_EINVAL ); +} + + +/* + * + */ +int +INFINIBAND_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + strncpy( name, infiniband_native_table[EventCode]->name, len ); + + return PAPI_OK; +} + + +/* + * + */ +int +INFINIBAND_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + strncpy( name, infiniband_native_table[EventCode]->description, len ); + + return PAPI_OK; +} + + +/* + * + */ +int +INFINIBAND_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ + memcpy( ( INFINIBAND_register_t * ) bits, + infiniband_native_table[EventCode], + sizeof ( INFINIBAND_register_t ) ); + + return PAPI_OK; +} + + +/* + * + */ +papi_vector_t _infiniband_umad_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name ="infiniband", + .short_name="infiniband", + .version = "4.2.1", + .description = "Infiniband statistics", + .num_mpx_cntrs = INFINIBAND_MAX_COUNTERS, + .num_cntrs = INFINIBAND_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( INFINIBAND_context_t ), + .control_state = sizeof ( INFINIBAND_control_state_t ), + .reg_value = sizeof ( INFINIBAND_register_t ), + .reg_alloc = sizeof ( INFINIBAND_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = INFINIBAND_init_thread, + .init_component = INFINIBAND_init_component, + .init_control_state = INFINIBAND_init_control_state, + .start = INFINIBAND_start, + .stop = INFINIBAND_stop, + .read = INFINIBAND_read, + .shutdown_component = INFINIBAND_shutdown_component, + .shutdown_thread = INFINIBAND_shutdown_thread, + .ctl = INFINIBAND_ctl, + + .update_control_state = INFINIBAND_update_control_state, + .set_domain = INFINIBAND_set_domain, + .reset = INFINIBAND_reset, + + .ntv_enum_events = INFINIBAND_ntv_enum_events, + .ntv_code_to_name = INFINIBAND_ntv_code_to_name, + .ntv_code_to_descr = INFINIBAND_ntv_code_to_descr, + .ntv_code_to_bits = INFINIBAND_ntv_code_to_bits, +}; diff --git a/src/components/infiniband_umad/linux-infiniband_umad.h b/src/components/infiniband_umad/linux-infiniband_umad.h new file mode 100644 index 0000000..e133ca5 --- /dev/null +++ b/src/components/infiniband_umad/linux-infiniband_umad.h @@ -0,0 +1,95 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-infiniband_umad.h + * @author Heike Jagode (in collaboration with Michael Kluge, TU Dresden) + * jagode@eecs.utk.edu + * + * @ingroup papi_components + * + * InfiniBand component + * + * Tested version of OFED: 1.4 + * + * @brief + * This file has the source code for a component that enables PAPI-C to + * access hardware monitoring counters for InfiniBand devices through the + * OFED library. Since a new interface was introduced with OFED version 1.4 + * (released Dec 2008), the current InfiniBand component does not support + * OFED versions < 1.4. + */ + +#ifndef _PAPI_INFINIBAND_H +#define _PAPI_INFINIBAND_H + +#define __BUILD_VERSION_TAG__ 1.2 + +#include +#include + +/* describes a single counter with its properties */ +typedef struct counter_info_struct +{ + int idx; + char *name; + char *description; + char *unit; + uint64_t value; + struct counter_info_struct *next; +} counter_info; + +typedef struct +{ + int count; + char **data; +} string_list; + +/* infos collected of a single IB port */ +typedef struct ib_port_struct +{ + char *name; + counter_info *send_cntr; + counter_info *recv_cntr; + int port_rate; + int port_number; + int is_initialized; + uint64_t sum_send_val; + uint64_t sum_recv_val; + uint32_t last_send_val; + uint32_t last_recv_val; + struct ib_port_struct *next; +} ib_port; + + +static void init_ib_counter( ); +static int read_ib_counter( ); +static int init_ib_port( ib_port * portdata ); +static void addIBPort( const char *ca_name, umad_port_t * port ); + + +/************************* DEFINES SECTION ******************************* + ***************************************************************************/ +/* this number assumes that there will never be more events than indicated */ +#define INFINIBAND_MAX_COUNTERS 100 +#define INFINIBAND_MAX_COUNTER_TERMS INFINIBAND_MAX_COUNTERS + +typedef counter_info INFINIBAND_register_t; +typedef counter_info INFINIBAND_native_event_entry_t; +typedef counter_info INFINIBAND_reg_alloc_t; + + +typedef struct INFINIBAND_control_state +{ + long long counts[INFINIBAND_MAX_COUNTERS]; + int ncounter; +} INFINIBAND_control_state_t; + + +typedef struct INFINIBAND_context +{ + INFINIBAND_control_state_t state; +} INFINIBAND_context_t; + +#endif /* _PAPI_INFINIBAND_H */ diff --git a/src/components/infiniband_umad/tests/Makefile b/src/components/infiniband_umad/tests/Makefile new file mode 100644 index 0000000..6040b6d --- /dev/null +++ b/src/components/infiniband_umad/tests/Makefile @@ -0,0 +1,23 @@ +NAME=infiniband_umad +include ../../Makefile_comp_tests.target +include ../Makefile.infiniband_umad + +INFINIBANDLIBS = -L$(INFINIBAND_IBMAD_DIR)/lib64 -L$(INFINIBAND_IBUMAD_DIR)/lib64 -libumad -libmad + +TESTS = infiniband_umad_list_events infiniband_umad_values_by_code + +infiniband_umad_tests: $(TESTS) + + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +infiniband_umad_list_events: infiniband_umad_list_events.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) + +infiniband_umad_values_by_code: infiniband_umad_values_by_code.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ $^ $(LDFLAGS) $(INFINIBANDLIBS) + +clean: + rm -f $(TESTS) *.o + diff --git a/src/components/infiniband_umad/tests/infiniband_umad_list_events.c b/src/components/infiniband_umad/tests/infiniband_umad_list_events.c new file mode 100644 index 0000000..d065b43 --- /dev/null +++ b/src/components/infiniband_umad/tests/infiniband_umad_list_events.c @@ -0,0 +1,91 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-infiniband component + * Adapted from its counterpart in the net component. + * + * @brief + * List all net events codes and names + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int total_events=0; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Listing all net events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname, "infiniband") == NULL) { + continue; + } + + if (!TESTS_QUIET) { + printf("Component %d (%d) - %d events - %s\n", + cid, cmpinfo->CmpIdx, + cmpinfo->num_native_events, cmpinfo->name); + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %s\n", code, event_name); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No net events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c b/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c new file mode 100644 index 0000000..b9b4a65 --- /dev/null +++ b/src/components/infiniband_umad/tests/infiniband_umad_values_by_code.c @@ -0,0 +1,140 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-infiniband component + * Adapted from its counterpart in the net component. + * + * @brief + * Prints the value of every net event (by code) + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define PINGADDR "127.0.0.1" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long value; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all net events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidnum_native_events, cmpinfo->name); + } + + if ( strstr(cmpinfo->name, "infiniband") == NULL) { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %-24s = ", code, event_name); + } + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_add_event()", retval); + } + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + if (strcmp(event_name, "_recv") == 0) { + /* XXX figure out a general method to generate some traffic + * for infiniband + * the operation should take more than one second in order + * to guarantee that the network counters are updated */ + retval = system("ping -c 4 " PINGADDR " > /dev/null"); + if (retval < 0) { + test_fail(__FILE__, __LINE__, "Unable to start ping", retval); + } + } + + retval = PAPI_stop( EventSet, &value ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); + } + + if (!TESTS_QUIET) printf("%lld\n", value); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()", retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()", retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No net events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/libmsr/Makefile.libmsr.in b/src/components/libmsr/Makefile.libmsr.in new file mode 100644 index 0000000..03fad07 --- /dev/null +++ b/src/components/libmsr/Makefile.libmsr.in @@ -0,0 +1,4 @@ +LIBMSR_INCDIR = @LIBMSR_INCDIR@ +LIBMSR_INCFLAG = -I@LIBMSR_INCDIR@ +LIBMSR_LIBDIR = @LIBMSR_LIBDIR@ +LIBMSR_LIBFLAG = -L@LIBMSR_LIBDIR@ diff --git a/src/components/libmsr/README b/src/components/libmsr/README new file mode 100644 index 0000000..f28bd7b --- /dev/null +++ b/src/components/libmsr/README @@ -0,0 +1,133 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Asim YarKhan +* yarkhan@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: libmsr +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +libmsr/ + +NOTE: This libmsr component is an initial version, and has been tested +with libmsr (v0.1.17 11/2015) and the msr_safe kernel module (19/2015 +version). + https://github.com/scalability-llnl/libmsr + https://github.com/scalability-llnl/msr-safe + + +The PAPI libmsr component supports measuring and capping power usage +on recent Intel architectures using the RAPL interface exposed through +MSRs (model-specific registers). + +Lawrence Livermore National Laboratory has released a library (libmsr) +designed to provide a simple, safe, consistent interface to several of +the model-specific registers (MSRs) in Intel processors. The problem +is that permitting open access to the MSRs on a machine can be a +safety hazard, so access to MSRs is usually limited. In order to +encourage system administrators to give wider access to the MSRs on a +machine, LLNL has released a Linux kernel module (msr_safe) which +provides safer, white-listed access to the MSRs. + +PAPI has created a libmsr component that can provide read and write +access to the information and controls exposed via the libmsr library. + +This PAPI component introduces a new ability for PAPI; it is the first +case where PAPI is writing information to a counter as well as reading +the data from the counter. + + +-------------------------------------------------- +ENABLE ACCESS TO THE MSRS (MODEL SPECIFIC REGISTERS) + +https://github.com/scalability-llnl/msr-safe + +To use this component, the system will need to provide access to Model +Specific Registers (MSRs) from user space. The actions described +below will generally require superuser ability. Note, these actions +may have security and performance consequences, so please make sure +you know what you are doing. + + OPTION 1: EITHER: Enable MSR access using msr-safe + Install the msr-safe module from LLNL, + lsmod | grep msr (should show msr_safe) + Use chmod to set site-appropriate access permissions (e.g. 766) for + /dev/cpu/*/msr_safe /dev/cpu/msr_batch /dev/cpu/msr_whitelist + Load a whitelist appropriate for your machine + e.g. for SandyBridge: + cat msr-safe/whitelists/wl_062D > /dev/cpu/msr_whitelist + + OPTION 2: OR: Enable MSR access via the filesystem and elevated permissions. + Or, enable access to the standard MSRs filesystem + + For Linux kernel version < 3.7, using only file system checks + chmod 666 /dev/cpu/*/msr + + For Linux kernel version >= 3.7, using capabilities + chmod 666 /dev/cpu/*/msr + The final executable needs CAP_SYS_RWIO to open MSR device files [1] + setcap cap_sys_rawio=ep + The final executable cannot be on a shared network partition. + + The dynamic linker on most operating systems will remove variables + that control dynamic linking from the environment of executables + with extended rights, such as setuid executables or executables + with raised capabilities. One such variable is + LD_LIBRARY_PATH. Therefore, executables that have the RAWIO + capability can only load shared libraries from default system + directories. + + One can work around this restriction by either installing the + shared libraries in system directories, linking statically against + those libraries, or using the -rpath linker option to specify the + full path to the shared libraries during the linking step. + + +-------------------------------------------------- +COMPILE THE LIBMSR LIBRARY TO ACCESS THE MSRS + +https://github.com/scalability-llnl/libmsr + +Get the library and follow the instructions to build using CMake. +This library contains a subdirectory, test, which will exercise the +functionality. + +-------------------------------------------------- +CONFIGURING THE PAPI LIBMSR COMPONENT + +Set libmsr library and header files by configuring within the component. + % cd /src/components/libmsr + % ./configure --with-libmsr-incdir= --with-libmsr-libdir= +Then, at the higher src dirctory, configure with this component + % cd /src + % ./configure --with-components="libmsr" + or if you want to specify the compilers and enable debug. + % ./configure CC=gcc F77=gfortran --with-debug --with-components="libmsr" +Finally, follow the standard PAPI build (make) instructions + % make +To use the module, make sure that the libraries are accessible. + % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib:${LIBMSRDIR}/lib:${LD_LIBRARY_PATH} +To check the installation, the following should show some available counters + % ./utils/papi_native_avail | grep libmsr +To check the installation, the following should some counter values + % ./utils/papi_native_avail -e "libmsr:::PKG_WATTS:PACKAGE0" + % ./utils/papi_command_line "libmsr:::PKG_WATTS:PACKAGE0" + % ./utils/papi_command_line "libmsr:::PKG_DELTA_ENERGY:PACKAGE0" + + +-------------------------------------------------- +USE THE PAPI LIBMSR COMPONENT + +See the components/libmsr/utils/README file for instructions. This +test demonstrates how to write power constraints, and gives an +estimate of the overheads for reading and writing information to the +RAPL MSRs. + + +[1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 + +*/ diff --git a/src/components/libmsr/Rules.libmsr b/src/components/libmsr/Rules.libmsr new file mode 100644 index 0000000..34f7620 --- /dev/null +++ b/src/components/libmsr/Rules.libmsr @@ -0,0 +1,9 @@ +include components/libmsr/Makefile.libmsr + +COMPSRCS += components/libmsr/linux-libmsr.c +COMPOBJS += linux-libmsr.o +CFLAGS += $(LIBMSR_INCFLAG) +LDFLAGS += $(LDL) + +linux-libmsr.o: components/libmsr/linux-libmsr.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/libmsr/linux-libmsr.c -o linux-libmsr.o diff --git a/src/components/libmsr/configure b/src/components/libmsr/configure new file mode 100755 index 0000000..71ce473 --- /dev/null +++ b/src/components/libmsr/configure @@ -0,0 +1,4687 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.63. +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + +if test "x$CONFIG_SHELL" = x; then + if (eval ":") 2>/dev/null; then + as_have_required=yes +else + as_have_required=no +fi + + if test $as_have_required = yes && (eval ": +(as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=\$LINENO + as_lineno_2=\$LINENO + test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && + test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } +") 2> /dev/null; then + : +else + as_candidate_shells= + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + case $as_dir in + /*) + for as_base in sh bash ksh sh5; do + as_candidate_shells="$as_candidate_shells $as_dir/$as_base" + done;; + esac +done +IFS=$as_save_IFS + + + for as_shell in $as_candidate_shells $SHELL; do + # Try only shells that exist, to save several forks. + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { ("$as_shell") 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +_ASEOF +}; then + CONFIG_SHELL=$as_shell + as_have_required=yes + if { "$as_shell" 2> /dev/null <<\_ASEOF +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + +: +(as_func_return () { + (exit $1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = "$1" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test $exitcode = 0) || { (exit 1); exit 1; } + +( + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } + +_ASEOF +}; then + break +fi + +fi + + done + + if test "x$CONFIG_SHELL" != x; then + for as_var in BASH_ENV ENV + do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var + done + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} +fi + + + if test $as_have_required = no; then + echo This script requires a shell more modern than all the + echo shells that I found on your system. Please install a + echo modern shell, or manually run the script under such a + echo shell if you do have one. + { (exit 1); exit 1; } +fi + + +fi + +fi + + + +(eval "as_func_return () { + (exit \$1) +} +as_func_success () { + as_func_return 0 +} +as_func_failure () { + as_func_return 1 +} +as_func_ret_success () { + return 0 +} +as_func_ret_failure () { + return 1 +} + +exitcode=0 +if as_func_success; then + : +else + exitcode=1 + echo as_func_success failed. +fi + +if as_func_failure; then + exitcode=1 + echo as_func_failure succeeded. +fi + +if as_func_ret_success; then + : +else + exitcode=1 + echo as_func_ret_success failed. +fi + +if as_func_ret_failure; then + exitcode=1 + echo as_func_ret_failure succeeded. +fi + +if ( set x; as_func_ret_success y && test x = \"\$1\" ); then + : +else + exitcode=1 + echo positional parameters were not saved. +fi + +test \$exitcode = 0") || { + echo No shell found that supports shell functions. + echo Please tell bug-autoconf@gnu.org about your system, + echo including any error possibly output before this message. + echo This can help us improve future autoconf versions. + echo Configuration will now proceed without shell functions. +} + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + + +exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +CUDA_DIR +LIBMSR_LIBDIR +LIBMSR_INCDIR +EGREP +GREP +CPP +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_libmsr_incdir +with_libmsr_libdir +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2 + { (exit 1); exit 1; }; } + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { $as_echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { $as_echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2 + { (exit 1); exit 1; }; } ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; } +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + { $as_echo "$as_me: error: working directory cannot be determined" >&2 + { (exit 1); exit 1; }; } +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + { $as_echo "$as_me: error: pwd does not report name of working directory" >&2 + { (exit 1); exit 1; }; } + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2 + { (exit 1); exit 1; }; } + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-libmsr-incdir= Specify directory of libmsr header files (libmsr.h) in a specific location + --with-libmsr-libdir= Specify directory of libmsr library (libmsr.a or libmsr.so) in a specific location + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +configure +generated by GNU Autoconf 2.63 + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.63. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" +done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args '$ac_arg'" + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------------- ## +## File substitutions. ## +## ------------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + ac_site_file1=$CONFIG_SITE +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test -r "$ac_site_file"; then + { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:$LINENO: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:$LINENO: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +$as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + + + + + + + + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + +# Check whether --with-libmsr_incdir was given. +if test "${with_libmsr_incdir+set}" = set; then + withval=$with_libmsr_incdir; libmsr_incdir=$withval +fi + + +# Check whether --with-libmsr_libdir was given. +if test "${with_libmsr_libdir+set}" = set; then + withval=$with_libmsr_libdir; libmsr_libdir=$withval +fi + + +if test "x$libmsr_libdir" != "x"; then + LIBS="-L$libmsr_libdir" + libmsr_dotest=1 +fi +if test "x$libmsr_incdir" != "x"; then + CPPFLAGS="-I$libmsr_incdir" + libmsr_dotest=1 +fi + +CFLAGS="$CFLAGS -I$libmsr_incdir" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:$LINENO: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:$LINENO: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } + +# Provide some information about the compiler. +$as_echo "$as_me:$LINENO: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +{ (ac_try="$ac_compiler --version >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler --version >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -v >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -v >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (ac_try="$ac_compiler -V >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compiler -V >&5") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { (ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi + +{ $as_echo "$as_me:$LINENO: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +if test -z "$ac_file"; then + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; }; } +fi + +ac_exeext=$ac_cv_exeext + +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } + fi + fi +fi +{ $as_echo "$as_me:$LINENO: result: yes" >&5 +$as_echo "yes" >&6; } + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +{ $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +{ $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +rm -f conftest$ac_cv_exeext +{ $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +{ $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if test "${ac_cv_objext+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_compiler_gnu=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_compiler_gnu=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + CFLAGS="" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_g=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_prog_cc_c89=$ac_arg +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:$LINENO: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:$LINENO: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:$LINENO: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + : +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi + +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + # Broken: success on invalid input. +continue +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi + +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { $as_echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +$as_echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if test "${ac_cv_path_GREP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done +done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + { { $as_echo "$as_me:$LINENO: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +$as_echo "$as_me: error: no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:$LINENO: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if test "${ac_cv_path_EGREP+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + ac_count=`expr $ac_count + 1` + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done +done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + { { $as_echo "$as_me:$LINENO: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 +$as_echo "$as_me: error: no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} + { (exit 1); exit 1; }; } + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if test "${ac_cv_header_stdc+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_cv_header_stdc=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_header_stdc=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +rm -f conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + $as_echo "$as_me: program exited with status $ac_status" >&5 +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -rf conftest.dSYM +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi + + +fi +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +{ $as_echo "$as_me:$LINENO: checking for $ac_header" >&5 +$as_echo_n "checking for $ac_header... " >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + eval "$as_ac_Header=yes" +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + eval "$as_ac_Header=no" +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +ac_res=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + { $as_echo "$as_me:$LINENO: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +as_val=`eval 'as_val=${'$as_ac_Header'} + $as_echo "$as_val"'` + if test "x$as_val" = x""yes; then + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then + { $as_echo "$as_me:$LINENO: checking for msr/msr_rapl.h" >&5 +$as_echo_n "checking for msr/msr_rapl.h... " >&6; } +if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then + $as_echo_n "(cached) " >&6 +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_msr_msr_rapl_h" >&5 +$as_echo "$ac_cv_header_msr_msr_rapl_h" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:$LINENO: checking msr/msr_rapl.h usability" >&5 +$as_echo_n "checking msr/msr_rapl.h usability... " >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then + ac_header_compiler=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:$LINENO: checking msr/msr_rapl.h presence" >&5 +$as_echo_n "checking msr/msr_rapl.h presence... " >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then + ac_header_preproc=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ $as_echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: present but cannot be compiled" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: proceeding with the preprocessor's result" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: proceeding with the preprocessor's result" >&2;} + { $as_echo "$as_me:$LINENO: WARNING: msr/msr_rapl.h: in the future, the compiler will take precedence" >&5 +$as_echo "$as_me: WARNING: msr/msr_rapl.h: in the future, the compiler will take precedence" >&2;} + + ;; +esac +{ $as_echo "$as_me:$LINENO: checking for msr/msr_rapl.h" >&5 +$as_echo_n "checking for msr/msr_rapl.h... " >&6; } +if test "${ac_cv_header_msr_msr_rapl_h+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_cv_header_msr_msr_rapl_h=$ac_header_preproc +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_header_msr_msr_rapl_h" >&5 +$as_echo "$ac_cv_header_msr_msr_rapl_h" >&6; } + +fi +if test "x$ac_cv_header_msr_msr_rapl_h" = x""yes; then + : +else + { { $as_echo "$as_me:$LINENO: error: libmsr component: msr/msr_rapl.h not found: use configure flags to set the path " >&5 +$as_echo "$as_me: error: libmsr component: msr/msr_rapl.h not found: use configure flags to set the path " >&2;} + { (exit 1); exit 1; }; } +fi + + +LDFLAGS="$LDFLAGS -L$libmsr_libdir -Wl,-rpath,$libmsr_libdir" + +{ $as_echo "$as_me:$LINENO: checking for init_msr in -lmsr" >&5 +$as_echo_n "checking for init_msr in -lmsr... " >&6; } +if test "${ac_cv_lib_msr_init_msr+set}" = set; then + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmsr $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char init_msr (); +int +main () +{ +return init_msr (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then + ac_cv_lib_msr_init_msr=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_lib_msr_init_msr=no +fi + +rm -rf conftest.dSYM +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_msr_init_msr" >&5 +$as_echo "$ac_cv_lib_msr_init_msr" >&6; } +if test "x$ac_cv_lib_msr_init_msr" = x""yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBMSR 1 +_ACEOF + + LIBS="-lmsr $LIBS" + +else + { { $as_echo "$as_me:$LINENO: error: libmsr component: libmsr.so not found: use configure flags to set the path" >&5 +$as_echo "$as_me: error: libmsr component: libmsr.so not found: use configure flags to set the path" >&2;} + { (exit 1); exit 1; }; } +fi + +LIBMSR_INCDIR=$libmsr_incdir +LIBMSR_LIBDIR=$libmsr_libdir + +ac_config_files="$ac_config_files Makefile.libmsr" + + + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) $as_unset $ac_var ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + test "x$cache_file" != "x/dev/null" && + { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + cat confcache >$cache_file + else + { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in + *posix*) set -o posix ;; +esac + +fi + + + + +# PATH needs CR +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + { (exit 1); exit 1; } +fi + +# Work around bugs in pre-3.0 UWIN ksh. +for as_var in ENV MAIL MAILPATH +do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# CDPATH. +$as_unset CDPATH + + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line after each line using $LINENO; the second 'sed' + # does the real work. The second script uses 'N' to pair each + # line-number line with the line containing $LINENO, and appends + # trailing '-' during substitution so that $LINENO is not a special + # case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # scripts with optimization help from Paolo Bonzini. Blame Lee + # E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in +-n*) + case `echo 'x\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + *) ECHO_C='\c';; + esac;; +*) + ECHO_N='-n';; +esac +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 + +# Save the log message, to keep $[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by $as_me, which was +generated by GNU Autoconf 2.63. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTION]... [FILE]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.63, + with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2008 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + CONFIG_FILES="$CONFIG_FILES '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { $as_echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "Makefile.libmsr") CONFIG_FILES="$CONFIG_FILES Makefile.libmsr" ;; + + *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= + trap 'exit_status=$? + { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status +' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || +{ + $as_echo "$as_me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=' ' +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } +ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\).*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\).*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \ + || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5 +$as_echo "$as_me: error: could not setup config files machinery" >&2;} + { (exit 1); exit 1; }; } +_ACEOF + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/ +s/:*\${srcdir}:*/:/ +s/:*@srcdir@:*/:/ +s/^\([^=]*=[ ]*\):*/\1/ +s/:*$// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5 +$as_echo "$as_me: error: invalid tag $ac_tag" >&2;} + { (exit 1); exit 1; }; };; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 +$as_echo "$as_me: error: cannot find input file: $ac_f" >&2;} + { (exit 1); exit 1; }; };; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + ac_file_inputs="$ac_file_inputs '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:$LINENO: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$tmp/stdin" \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { as_dir="$ac_dir" + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 +$as_echo "$as_me: error: cannot create directory $as_dir" >&2;} + { (exit 1); exit 1; }; }; } + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= + +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p +' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined." >&2;} + + rm -f "$tmp/stdin" + case $ac_file in + -) cat "$tmp/out" && rm -f "$tmp/out";; + *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";; + esac \ + || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5 +$as_echo "$as_me: error: could not create $ac_file" >&2;} + { (exit 1); exit 1; }; } + ;; + + + + esac + +done # for ac_tag + + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;} + { (exit 1); exit 1; }; } + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/components/libmsr/configure.in b/src/components/libmsr/configure.in new file mode 100644 index 0000000..f37b711 --- /dev/null +++ b/src/components/libmsr/configure.in @@ -0,0 +1,33 @@ +# Process this file with autoconf to produce a configure script. +# File: components/libmsr/configure.in + +AC_INIT + +AC_ARG_WITH(libmsr_incdir, + [ --with-libmsr-incdir= Specify directory of libmsr header files (libmsr.h) in a specific location ], + [libmsr_incdir=$withval]) +AC_ARG_WITH(libmsr_libdir, + [ --with-libmsr-libdir= Specify directory of libmsr library (libmsr.a or libmsr.so) in a specific location ], + [libmsr_libdir=$withval]) + +if test "x$libmsr_libdir" != "x"; then + LIBS="-L$libmsr_libdir" + libmsr_dotest=1 +fi +if test "x$libmsr_incdir" != "x"; then + CPPFLAGS="-I$libmsr_incdir" + libmsr_dotest=1 +fi + +CFLAGS="$CFLAGS -I$libmsr_incdir" +AC_CHECK_HEADER( [msr/msr_rapl.h], [], [AC_MSG_ERROR([libmsr component: msr/msr_rapl.h not found: use configure flags to set the path ])], ) +LDFLAGS="$LDFLAGS -L$libmsr_libdir -Wl,-rpath,$libmsr_libdir" +AC_CHECK_LIB( [msr], [init_msr], [], [AC_MSG_ERROR([libmsr component: libmsr.so not found: use configure flags to set the path])] ) +LIBMSR_INCDIR=$libmsr_incdir +LIBMSR_LIBDIR=$libmsr_libdir + +AC_CONFIG_FILES([Makefile.libmsr]) +AC_SUBST(LIBMSR_INCDIR) +AC_SUBST(LIBMSR_LIBDIR) +AC_SUBST(CUDA_DIR) +AC_OUTPUT diff --git a/src/components/libmsr/linux-libmsr.c b/src/components/libmsr/linux-libmsr.c new file mode 100644 index 0000000..44b3722 --- /dev/null +++ b/src/components/libmsr/linux-libmsr.c @@ -0,0 +1,838 @@ +/** + * @file linux-libmsr.c + * @author Asim YarKhan + * + * @ingroup papi_components + * + * @brief libmsr component + * + * This PAPI component provides access to libmsr from LLNL + * (https://github.com/scalability-llnl/libmsr), specifically the RAPL + * (Running Average Power Level) access in libmsr, which provides + * energy measurements on modern Intel CPUs. + * + * To work, either msr_safe kernel module from LLNL + * (https://github.com/scalability-llnl/msr-safe), or the x86 generic + * MSR driver must be installed (CONFIG_X86_MSR) and the + * /dev/cpu/?/ files must have read permissions + * + * If /dev/cpu/?/{msr_safe,msr} have appropriate write permissions, + * you can write to the events PACKAGE_POWER_LIMIT_{1,2} to change the + * average power (in watts) consumed by the packages/sockets over a + * certain time window specified by events + * PKG_TIME_WINDOW_POWER_LIMIT_{1,2} respectively. + */ +/* Based on the rapl component by Vince Weaver */ + +#include +#include +#include +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include +#include +#include + +typedef enum { + PKG_ENERGY=0, + PKG_ELAPSED, + PKG_DELTA_ENERGY, + PKG_WATTS, + PKG_POWER_LIMIT_1, + PKG_TIME_WINDOW_POWER_LIMIT_1, + PKG_POWER_LIMIT_2, + PKG_TIME_WINDOW_POWER_LIMIT_2, + NUM_OF_EVENTTYPES +} eventtype_enum; + +typedef struct _libmsr_register { + unsigned int selector; +} _libmsr_register_t; + +typedef struct _libmsr_native_event_entry { + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int package_num; /* which package/socket for this event */ + eventtype_enum eventtype; + int return_type; + _libmsr_register_t resources; +} _libmsr_native_event_entry_t; + +typedef struct _libmsr_reg_alloc { + _libmsr_register_t ra_bits; +} _libmsr_reg_alloc_t; + +/* actually 32? But setting this to be safe? */ +#define LIBMSR_MAX_COUNTERS 64 +#define LIBMSR_MAX_PACKAGES 64 + +typedef struct _libmsr_control_state { + /* The following are one per event being measured */ + int num_events_measured; + /* int domain; */ + /* int multiplexed; */ + /* int overflow; */ + /* int inherit; */ + int being_measured[LIBMSR_MAX_COUNTERS]; + int which_counter[LIBMSR_MAX_COUNTERS]; + long long count[LIBMSR_MAX_COUNTERS]; + /* The following is boolean: Is package NN active in for event */ + int package_being_measured[LIBMSR_MAX_PACKAGES]; +} _libmsr_control_state_t; + +typedef struct _libmsr_context { + _libmsr_control_state_t state; +} _libmsr_context_t; + +papi_vector_t _libmsr_vector; + +static _libmsr_native_event_entry_t *libmsr_native_events = NULL; +static int num_events_global = 0; +static int already_called_libmsr_rapl_initialized_global = 0; + +/***************************************************************************/ + +/* For dynamic linking to libmsr */ +/* Using weak symbols allows PAPI to be built with the component, but + * installed in a system without the required library */ +#include +static void* dllib1 = NULL; +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); + +/* Functions pointers */ +static int (*init_msr_ptr)(); +static int (*finalize_msr_ptr)(); +static int (*rapl_init_ptr)(struct rapl_data ** rapl, uint64_t ** rapl_flags); +static int (*poll_rapl_data_ptr) ( ); +static void (*set_pkg_rapl_limit_ptr) ( const int socket, struct rapl_limit* limit1, struct rapl_limit* limit2 ); +static void (*get_pkg_rapl_limit_ptr) ( const int socket, struct rapl_limit* limit1, struct rapl_limit* limit2 ); +static int (*core_config_ptr) (uint64_t * coresPerSocket, uint64_t * threadsPerCore, uint64_t * sysSockets, int * HTenabled); +static int (*rapl_storage_ptr) (struct rapl_data ** data, uint64_t ** flags); +static int (*get_rapl_power_info_ptr) ( const unsigned socket, struct rapl_power_info *info); + +/* Local wrappers for function pointers */ +static int libmsr_init_msr () { return ((*init_msr_ptr)()); } +static int libmsr_finalize_msr () { return ((*finalize_msr_ptr)()); } +static int libmsr_rapl_init (struct rapl_data ** rapl_data, uint64_t ** rapl_flags) { return (*rapl_init_ptr)( rapl_data, rapl_flags ); } +static int libmsr_poll_rapl_data ( ) { return (*poll_rapl_data_ptr) (); } +static void libmsr_set_pkg_rapl_limit ( const int socket, struct rapl_limit* limit1, struct rapl_limit* limit2 ) { return (*set_pkg_rapl_limit_ptr) ( socket, limit1, limit2 ); } +static void libmsr_get_pkg_rapl_limit ( const int socket, struct rapl_limit* limit1, struct rapl_limit* limit2 ) { return (*get_pkg_rapl_limit_ptr) ( socket, limit1, limit2 ); } +static int libmsr_core_config(uint64_t * coresPerSocket, uint64_t * threadsPerCore, uint64_t * sysSockets, int * HTenabled) { return (*core_config_ptr) ( coresPerSocket, threadsPerCore, sysSockets, HTenabled ); } +static int libmsr_rapl_storage(struct rapl_data ** data, uint64_t ** flags) { return (*rapl_storage_ptr) (data, flags); } +static int libmsr_get_rapl_power_info( const unsigned socket, struct rapl_power_info *info) { return (*get_rapl_power_info_ptr) ( socket, info); } + + +#define CHECK_DL_STATUS( err, str ) if( err ) { strncpy( _libmsr_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } +static int _local_linkDynamicLibraries() +{ + if ( _dl_non_dynamic_init != NULL ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "The libmsr component REQUIRES dynamic linking capabilities.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + dllib1 = dlopen("libmsr.so", RTLD_NOW | RTLD_GLOBAL); + CHECK_DL_STATUS( !dllib1 , "Component library libmsr.so not found." ); + init_msr_ptr = dlsym( dllib1, "init_msr" ); + CHECK_DL_STATUS( dlerror()!=NULL , "libmsr function init_msr not found." ); + finalize_msr_ptr = dlsym( dllib1, "finalize_msr" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function finalize_msr not found." ); + rapl_init_ptr = dlsym( dllib1, "rapl_init" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function rapl_init not found." ); + poll_rapl_data_ptr = dlsym( dllib1, "poll_rapl_data" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function poll_rapl_data not found." ); + set_pkg_rapl_limit_ptr = dlsym( dllib1, "set_pkg_rapl_limit" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function set_pkg_rapl_limit not found." ); + get_pkg_rapl_limit_ptr = dlsym( dllib1, "get_pkg_rapl_limit" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function get_pkg_rapl_limit not found." ); + core_config_ptr = dlsym( dllib1, "core_config" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function core_config not found." ); + rapl_storage_ptr = dlsym( dllib1, "rapl_storage" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function rapl_storage not found." ); + get_rapl_power_info_ptr = dlsym( dllib1, "get_rapl_power_info" ); + CHECK_DL_STATUS( dlerror()!=NULL, "libmsr function get_rapl_power_info not found." ); + return( PAPI_OK); +} + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +/* Null terminated version of strncpy */ +static char * _local_strlcpy( char *dst, const char *src, size_t size ) +{ + char *retval = strncpy( dst, src, size ); + if ( size>0 ) dst[size-1] = '\0'; + return( retval ); +} + + +void _local_set_to_defaults() +{ + uint64_t socket, numSockets; + struct rapl_power_info raplinfo; + struct rapl_limit socketlim, socketlim2; + + SUBDBG("Enter: Resetting the sockets to defaults\n"); + libmsr_core_config( NULL, NULL, &numSockets, NULL); + for (socket = 0; socket < numSockets; socket++) { + libmsr_get_rapl_power_info(socket, &raplinfo); + socketlim.bits = 0; + socketlim.watts = raplinfo.pkg_therm_power; + socketlim.seconds = 1; + socketlim2.bits = 0; + socketlim2.watts = raplinfo.pkg_therm_power * 1.2; + socketlim2.seconds = 3; + SUBDBG("Resetting socket %ld to defaults (%f,%f) (%f,%f)\n", socket, socketlim.watts, socketlim.seconds, socketlim2.watts, socketlim2.seconds); + libmsr_set_pkg_rapl_limit(socket, &socketlim, &socketlim2); + } +} + + +/************************* PAPI Functions **********************************/ + +/* + * This is called whenever a thread is initialized + */ +int _libmsr_init_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + +/* + * Called when PAPI process is initialized (i.e. PAPI_library_init) + */ +int _libmsr_init_component( int cidx ) +{ + SUBDBG( "Enter: cidx: %d\n", cidx ); + int i, j; + /* int package; */ + /* FILE *fff; */ + /* char filename[BUFSIZ]; */ + int num_packages; + /* int num_cpus; */ + const PAPI_hw_info_t *hw_info; + int retval; + struct rapl_data * libmsr_rapl_data; + uint64_t * libmsr_rapl_flags; + uint64_t coresPerSocket, threadsPerCore, numSockets; + int HTenabled; + + /* check if Intel processor */ + hw_info = &( _papi_hwi_system_info.hw_info ); + /* Can't use PAPI_get_hardware_info() if PAPI library not done initializing yet */ + if( hw_info->vendor != PAPI_VENDOR_INTEL ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Not an Intel processor", PAPI_MAX_STR_LEN ); + return PAPI_ENOSUPP; + } + + /* Dynamically load libmsr API and libraries */ + retval = _local_linkDynamicLibraries(); + if ( retval!=PAPI_OK ) { + SUBDBG ("Dynamic link of libmsr.so libraries failed, component will be disabled.\n"); + SUBDBG ("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + /* initialize libmsr */ + if ( libmsr_init_msr() != 0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Library libmsr could not initialize (libmsr/init_msr failed)", PAPI_MAX_STR_LEN ); + SUBDBG( "init_msr (libmsr) returned error. Possible problems accessing /dev/cpu//msr_safe or /dev/cpu//msr"); + return PAPI_ENOSUPP; + } + + /* Initialize libmsr RAPL */ + if ( already_called_libmsr_rapl_initialized_global==0 ) { + if ( libmsr_rapl_init( &libmsr_rapl_data, &libmsr_rapl_flags ) < 0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Library libmsr could not initialize RAPL (libmsr/rapl_init failed)", PAPI_MAX_STR_LEN ); + SUBDBG( "Library libmsr could not initialize RAPL (libmsr/rapl_init failed)"); + return PAPI_ENOSUPP; + } + already_called_libmsr_rapl_initialized_global = 1; + } + + /* Get the numbers of cores, threads, sockets, ht */ + libmsr_core_config(&coresPerSocket, &threadsPerCore, &numSockets, &HTenabled); + + /* Fill packages and cpus with sentinel values */ + /* int packages[numSockets]; */ + /* for( i = 0; i < numSockets; ++i ) packages[i] = -1; */ + /* num_cpus = numSockets*coresPerSocket; */ + num_packages = numSockets; + + /* /\* Detect how many packages and count num_cpus *\/ */ + /* num_cpus = 0; */ + /* while( 1 ) { */ + /* int num_read; */ + /* sprintf( filename, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", num_cpus ); */ + /* fff = fopen( filename, "r" ); */ + /* if( fff == NULL ) break; */ + /* num_read = fscanf( fff, "%d", &package ); */ + /* fclose( fff ); */ + /* if( num_read != 1 ) { */ + /* strcpy( _libmsr_vector.cmp_info.disabled_reason, "Error reading file: " ); */ + /* strncat( _libmsr_vector.cmp_info.disabled_reason, filename, PAPI_MAX_STR_LEN - strlen( _libmsr_vector.cmp_info.disabled_reason ) - 1 ); */ + /* _libmsr_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = '\0'; */ + /* return PAPI_ESYS; */ + /* } */ + /* /\* Check if a new package *\/ */ + /* if( ( package >= 0 ) && ( package < nr_cpus ) ) { */ + /* if( packages[package] == -1 ) { */ + /* SUBDBG( "Found package %d out of total %d\n", package, num_packages ); */ + /* packages[package] = package; */ + /* num_packages++; */ + /* } */ + /* } else { */ + /* SUBDBG( "Package outside of allowed range\n" ); */ + /* strncpy( _libmsr_vector.cmp_info.disabled_reason, "Package outside of allowed range", PAPI_MAX_STR_LEN ); */ + /* return PAPI_ESYS; */ + /* } */ + /* num_cpus++; */ + /* } */ + + /* /\* Error if no accessible packages *\/ */ + /* if( num_packages == 0 ) { */ + /* SUBDBG( "Can't access any physical packages\n" ); */ + /* strncpy( _libmsr_vector.cmp_info.disabled_reason, "Can't access /sys/devices/system/cpu/cpu/topology/physical_package_id", PAPI_MAX_STR_LEN ); */ + /* return PAPI_ESYS; */ + /* } */ + /* SUBDBG( "Found %d packages with %d cpus\n", num_packages, num_cpus ); */ + + int max_num_events = ( NUM_OF_EVENTTYPES * num_packages ); + /* Allocate space for events */ + libmsr_native_events = ( _libmsr_native_event_entry_t * ) calloc( sizeof( _libmsr_native_event_entry_t ), max_num_events ); + if ( !libmsr_native_events ) SUBDBG("Could not allocate memory\n" ); + + /* Create events for package power info */ + num_events_global = 0; + i = 0; + for( j = 0; j < num_packages; j++ ) { + + sprintf( libmsr_native_events[i].name, "PKG_ENERGY:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "J", PAPI_MIN_STR_LEN ); + sprintf(libmsr_native_events[i].description,"Number of Joules consumed by all cores and last level cache on package. Unit is Joules (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_ENERGY; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_WATTS:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "W", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Watts consumed by package. Unit is Watts (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_WATTS; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_ELAPSED:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "S", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Time elapsed since last LIBMSR data reading from package. Unit is seconds (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_ELAPSED; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_DELTA_ENERGY:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "J", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Number of Joules consumed by package since last LIBMSR data reading. Unit is Joules (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_DELTA_ENERGY; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_POWER_LIMIT_1:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "W", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Average power limit over PKG_TIME_WINDOW_POWER_LIMIT_1 for package. Read/Write. Unit is Watts (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_POWER_LIMIT_1; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_TIME_WINDOW_POWER_LIMIT_1:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "S", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Time window used for averaging PACKAGE_POWER_LIMIT_1 for package. Read/Write. Unit is seconds (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_TIME_WINDOW_POWER_LIMIT_1; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_POWER_LIMIT_2:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "W", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Average power limit over PKG_TIME_WINDOW_POWER_LIMIT_2 for package. Read/Write. Unit is Watts (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_POWER_LIMIT_2; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + sprintf( libmsr_native_events[i].name, "PKG_TIME_WINDOW_POWER_LIMIT_2:PACKAGE%d", j ); + strncpy( libmsr_native_events[i].units, "S", PAPI_MIN_STR_LEN ); + sprintf( libmsr_native_events[i].description, "Time window used for averaging PACKAGE_POWER_LIMIT_2 for package. Read/Write. Unit is seconds (double precision)."); + libmsr_native_events[i].package_num = j; + libmsr_native_events[i].resources.selector = i + 1; + libmsr_native_events[i].eventtype = PKG_TIME_WINDOW_POWER_LIMIT_2; + libmsr_native_events[i].return_type = PAPI_DATATYPE_FP64; + i++; + + // TODO Add DRAM values + // DRAM_ENERGY + // DRAM_DELTA_ENERGY + // DRAM_WATTS + // TODO Add PP0, PP1 events + } + num_events_global = i; + + /* Export the total number of events available */ + _libmsr_vector.cmp_info.num_native_events = num_events_global; + _libmsr_vector.cmp_info.num_cntrs = _libmsr_vector.cmp_info.num_native_events; + _libmsr_vector.cmp_info.num_mpx_cntrs = _libmsr_vector.cmp_info.num_native_events; + + /* Export the component id */ + _libmsr_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + */ +int _libmsr_init_control_state( hwd_control_state_t * ctl ) +{ + SUBDBG( "Enter: ctl: %p\n", ctl ); + _libmsr_control_state_t *control = ( _libmsr_control_state_t * ) ctl; + int i; + + for( i = 0; i < LIBMSR_MAX_COUNTERS; i++ ) + control->which_counter[i] = 0; + for( i = 0; i < LIBMSR_MAX_PACKAGES; i++ ) + control->package_being_measured[i] = 0; + control->num_events_measured = 0; + + return PAPI_OK; +} + + +int _libmsr_update_control_state( hwd_control_state_t * ctl, NativeInfo_t * native, int count, hwd_context_t * ctx ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + int nn, index; + ( void ) ctx; + _libmsr_control_state_t *control = ( _libmsr_control_state_t * ) ctl; + + control->num_events_measured = 0; + /* Track which events need to be measured */ + for( nn = 0; nn < count; nn++ ) { + index = native[nn].ni_event & PAPI_NATIVE_AND_MASK; + native[nn].ni_position = nn; + control->which_counter[nn] = index; + control->count[nn] = 0; + /* Track (on/off vector) which packages/sockets need to be measured for these events */ + control->package_being_measured[libmsr_native_events[index].package_num] = 1; + control->num_events_measured++; + } + return PAPI_OK; +} + + +int _libmsr_start( hwd_context_t * ctx, hwd_control_state_t * ctl ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + ( void ) ctx; + ( void ) ctl; + + /* Read once to get initial data */ + if ( libmsr_poll_rapl_data() < 0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Function libmsr.so:poll_rapl_data failed. ", PAPI_MAX_STR_LEN ); + return PAPI_ESYS; + } + return PAPI_OK; +} + + +int _libmsr_read( hwd_context_t * ctx, hwd_control_state_t * ctl, long long **events, int flags ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + ( void ) flags; + ( void ) ctx; + _libmsr_control_state_t *control = ( _libmsr_control_state_t * ) ctl; + int nn, pp, ee; /* native, package, event indices */ + union { long long ll; double dbl; } event_value_union; + struct rapl_limit limit1, limit2; + eventtype_enum eventtype; + struct rapl_data * libmsr_rapl_data; + uint64_t * libmsr_rapl_flags; + + /* Get a pointer to the rapl_data data storage */ + if ( libmsr_rapl_storage( &libmsr_rapl_data, &libmsr_rapl_flags)!=0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Function libmsr.so:rapl_storage failed. ", PAPI_MAX_STR_LEN ); + return PAPI_ESYS; + } + + /* If any socket/package needs to be read, call the poll once to read all packages */ + for ( pp = 0; pp < LIBMSR_MAX_PACKAGES; pp++ ) { + if ( control->package_being_measured[pp] ) { + SUBDBG("Calling poll_rapl_data to read state from all sockets\n"); + if ( libmsr_poll_rapl_data()!= 0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Function libmsr.so:poll_rapl_data failed. ", PAPI_MAX_STR_LEN ); + return PAPI_ESYS; + } + break; + } + } + + /* Go thru events, assign package data to events as needed */ + SUBDBG("Go thru events, assign package data to events as needed\n"); + for( nn = 0; nn < control->num_events_measured; nn++ ) { + ee = control->which_counter[nn]; + pp = libmsr_native_events[ee].package_num; + event_value_union.ll = 0LL; + eventtype = libmsr_native_events[ee].eventtype; + SUBDBG("nn %d ee %d pp %d eventtype %d\n", nn, ee, pp, eventtype); + switch (eventtype) { + case PKG_ENERGY: + event_value_union.dbl = libmsr_rapl_data->pkg_joules[pp]; + break; + case PKG_ELAPSED: + event_value_union.dbl = libmsr_rapl_data->elapsed; + break; + case PKG_DELTA_ENERGY: + event_value_union.dbl = libmsr_rapl_data->pkg_delta_joules[pp]; + break; + case PKG_WATTS: + event_value_union.dbl = libmsr_rapl_data->pkg_watts[pp]; + break; + case PKG_POWER_LIMIT_1: + limit1.bits = 0; limit1.watts = 0; limit1.seconds = 0; + libmsr_get_pkg_rapl_limit( pp, &limit1, NULL ); + event_value_union.dbl = limit1.watts; + break; + case PKG_TIME_WINDOW_POWER_LIMIT_1: + limit1.bits = 0; limit1.watts = 0; limit1.seconds = 0; + libmsr_get_pkg_rapl_limit( pp, &limit1, NULL ); + event_value_union.dbl = limit1.seconds; + break; + case PKG_POWER_LIMIT_2: + limit2.bits = 0; limit2.watts = 0; limit2.seconds = 0; + libmsr_get_pkg_rapl_limit( pp, NULL, &limit2 ); + event_value_union.dbl = limit2.watts; + break; + case PKG_TIME_WINDOW_POWER_LIMIT_2: + limit2.bits = 0; limit2.watts = 0; limit2.seconds = 0; + libmsr_get_pkg_rapl_limit( pp, NULL, &limit2 ); + event_value_union.dbl = limit2.seconds; + break; + default: + SUBDBG("This LIBMSR event is unknown\n"); + /* error here */ + } + control->count[nn] = event_value_union.ll; + } + /* Pass back a pointer to our results */ + if ( events!=NULL ) *events = ( ( _libmsr_control_state_t * ) ctl )->count; + return PAPI_OK; +} + + +static long long _local_get_eventval_from_values( _libmsr_control_state_t *control, long long *invalues, int package_num, eventtype_enum eventtype, long long defaultval ) +{ + int nn, pp, ee; /* native, package, event indices */ + /* Loop thru all the events, if package and repltype match, return the value */ + for( nn = 0; nn < control->num_events_measured; nn++ ) { + ee = control->which_counter[nn]; + pp = libmsr_native_events[ee].package_num; + if ( pp == package_num && libmsr_native_events[ee].eventtype == eventtype ) + return invalues[ee]; + } + return defaultval; +} + + +int _libmsr_write( hwd_context_t * ctx, hwd_control_state_t * ctl, long long *values ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + /* write values */ + ( void ) ctx; + _libmsr_control_state_t *control = ( _libmsr_control_state_t * ) ctl; + //long long now = PAPI_get_real_usec(); + int nn, pp, ee; /* native, package, event indices */ + union { long long ll; double dbl; } event_value_union; + union { long long ll; double dbl; } timewin_union; + struct rapl_limit limit1, limit2; + eventtype_enum eventtype; + + /* Go thru events, assign package data to events as needed */ + for( nn = 0; nn < control->num_events_measured; nn++ ) { + ee = control->which_counter[nn]; + pp = libmsr_native_events[ee].package_num; + /* grab value and put into the union structure */ + event_value_union.ll = values[nn]; + /* If this is a NULL value, it means that the user does not want to write this value */ + if ( event_value_union.ll == PAPI_NULL ) continue; + eventtype = libmsr_native_events[ee].eventtype; + SUBDBG("nn %d ee %d pp %d eventtype %d\n", nn, ee, pp, eventtype); + switch (eventtype) { + case PKG_ENERGY: + case PKG_ELAPSED: + case PKG_WATTS: + case PKG_DELTA_ENERGY: + /* Read only so do nothing */ + break; + case PKG_POWER_LIMIT_1: + timewin_union.ll = _local_get_eventval_from_values( control, values, pp, PKG_TIME_WINDOW_POWER_LIMIT_1, -1 ); + if ( timewin_union.ll > 0 ) { + limit1.watts = event_value_union.dbl; + limit1.seconds = timewin_union.dbl; + limit1.bits = 0; + //printf("set_libmsr_limit package %d limit1 %lf %lf\n", pp, limit1.watts, limit1.seconds); + libmsr_set_pkg_rapl_limit( pp, &limit1, NULL ); + } else { + // Note error - power limit1 is not updated + SUBDBG("PACKAGE_POWER_LIMIT_1 needs PKG_TIME_WINDOW_POWER_LIMIT_1: Power cap not updated. "); + } + break; + case PKG_POWER_LIMIT_2: + timewin_union.ll = _local_get_eventval_from_values( control, values, pp, PKG_TIME_WINDOW_POWER_LIMIT_2, -1 ); + if ( timewin_union.ll > 0 ) { + limit2.watts = event_value_union.dbl; + limit2.seconds = timewin_union.dbl; + limit2.bits = 0; + //printf("set_libmsr_limit package %d limit2 %lf %lf \n", pp, limit2.watts, limit2.seconds); + libmsr_set_pkg_rapl_limit( pp, NULL, &limit2 ); + } else { + // Write error + PAPIERROR("PACKAGE_POWER_LIMIT_1 needs PKG_TIME_WINDOW_POWER_LIMIT_1: Powercap not updated."); + } + break; + case PKG_TIME_WINDOW_POWER_LIMIT_1: + case PKG_TIME_WINDOW_POWER_LIMIT_2: + /* These are only meaningful (and looked up) if the power limits are set */ + break; + default: + SUBDBG("This LIBMSR information type is unknown\n"); + /* error here */ + } + } + return PAPI_OK; +} + + +int _libmsr_stop( hwd_context_t * ctx, hwd_control_state_t * ctl ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + ( void ) ctx; + ( void ) ctl; + _local_set_to_defaults(); + return PAPI_OK; +} + + +/* Shutdown a thread */ +int _libmsr_shutdown_thread( hwd_context_t * ctx ) +{ + SUBDBG( "Enter: ctl: %p\n", ctx ); + ( void ) ctx; + return PAPI_OK; +} + + +/* + * Clean up what was setup in libmsr_init_component(). + */ +int _libmsr_shutdown_component( void ) +{ + SUBDBG( "Enter\n" ); + + _local_set_to_defaults(); + + if ( libmsr_finalize_msr()!=0 ) { + strncpy( _libmsr_vector.cmp_info.disabled_reason, "Function libmsr.so:finalize_msr failed. ", PAPI_MAX_STR_LEN ); + return PAPI_ESYS; + } + if( libmsr_native_events ) { + free( libmsr_native_events ); + libmsr_native_events = NULL; + } + dlclose( dllib1 ); + return PAPI_OK; +} + + +/* This function sets various options in the component The valid codes + * being passed in are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, + * PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT */ +int _libmsr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + SUBDBG( "Enter: ctx: %p\n", ctx ); + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int _libmsr_set_domain( hwd_control_state_t * ctl, int domain ) +{ + SUBDBG( "Enter: ctl: %p\n", ctl ); + ( void ) ctl; + /* In theory we only support system-wide mode */ + /* How to best handle that? */ + if( domain != PAPI_DOM_ALL ) + return PAPI_EINVAL; + return PAPI_OK; +} + + +int _libmsr_reset( hwd_context_t * ctx, hwd_control_state_t * ctl ) +{ + SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +int _libmsr_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + SUBDBG( "Enter: EventCode: %d\n", *EventCode ); + int index; + if ( num_events_global == 0 ) + return PAPI_ENOEVNT; + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + break; + case PAPI_ENUM_EVENTS: + index = *EventCode & PAPI_NATIVE_AND_MASK; + if ( index < num_events_global - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + // case PAPI_NTV_ENUM_UMASKS: + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + + +/* + * + */ +int _libmsr_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + SUBDBG( "Enter: EventCode: %d\n", EventCode ); + int index = EventCode & PAPI_NATIVE_AND_MASK; + + if( index >= 0 && index < num_events_global ) { + _local_strlcpy( name, libmsr_native_events[index].name, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + + +int _libmsr_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + SUBDBG( "Enter: EventCode: %d\n", EventCode ); + int index = EventCode; + + if( ( index < 0 ) || ( index >= num_events_global ) ) + return PAPI_ENOEVNT; + + _local_strlcpy( name, libmsr_native_events[index].description, len ); + return PAPI_OK; +} + + +int _libmsr_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t * info ) +{ + SUBDBG( "Enter: EventCode: %d\n", EventCode ); + int index = EventCode; + + if( ( index < 0 ) || ( index >= num_events_global ) ) + return PAPI_ENOEVNT; + + _local_strlcpy( info->symbol, libmsr_native_events[index].name, sizeof( info->symbol ) ); + _local_strlcpy( info->long_descr, libmsr_native_events[index].description, sizeof( info->long_descr ) ); + _local_strlcpy( info->units, libmsr_native_events[index].units, sizeof( info->units ) ); + info->data_type = libmsr_native_events[index].return_type; + return PAPI_OK; +} + + +papi_vector_t _libmsr_vector = { + .cmp_info = { /* (unspecified values are initialized to 0) */ + .name = "libmsr", + .short_name = "libmsr", + .description = "PAPI component for libmsr from LANL for power (RAPL) read/write", + .version = "5.3.0", + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .available_domains = PAPI_DOM_ALL, + }, + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof( _libmsr_context_t ), + .control_state = sizeof( _libmsr_control_state_t ), + .reg_value = sizeof( _libmsr_register_t ), + .reg_alloc = sizeof( _libmsr_reg_alloc_t ), + }, + /* function pointers in this component */ + .start = _libmsr_start, + .stop = _libmsr_stop, + .read = _libmsr_read, + .reset = _libmsr_reset, + .write = _libmsr_write, + .init_component = _libmsr_init_component, + .init_thread = _libmsr_init_thread, + .init_control_state = _libmsr_init_control_state, + .update_control_state = _libmsr_update_control_state, + .ctl = _libmsr_ctl, + .set_domain = _libmsr_set_domain, + .ntv_enum_events = _libmsr_ntv_enum_events, + .ntv_code_to_name = _libmsr_ntv_code_to_name, + .ntv_code_to_descr = _libmsr_ntv_code_to_descr, + .ntv_code_to_info = _libmsr_ntv_code_to_info, + .shutdown_thread = _libmsr_shutdown_thread, + .shutdown_component = _libmsr_shutdown_component, +}; diff --git a/src/components/libmsr/tests/libmsr_basic.c b/src/components/libmsr/tests/libmsr_basic.c new file mode 100644 index 0000000..68a86e9 --- /dev/null +++ b/src/components/libmsr/tests/libmsr_basic.c @@ -0,0 +1,206 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Asim YarKhan + * @author Vince Weaver (original rapl version) + * + * Test case for libmsr component + * + * @brief + * Tests basic functionality of libmsr component + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_LIBMSR_EVENTS 64 + +typedef union { long long ll; double dbl; } ll_dbl_union_t; + +#ifdef SLEEP_TEST +void run_test( int quiet ) +{ + if ( !quiet ) + printf( "Sleeping 2 second...\n" ); + sleep( 2 ); +} + +#else /* e.g. nothing defined or BUSY_TEST / WRAP_TEST */ + +#define MATRIX_SIZE 1024 + +static double a[MATRIX_SIZE][MATRIX_SIZE]; +static double b[MATRIX_SIZE][MATRIX_SIZE]; +static double c[MATRIX_SIZE][MATRIX_SIZE]; + +/* Naive matrix multiply */ +void run_test( int quiet ) +{ + double s; + int i,j,k; + if ( !quiet ) + printf( "Doing a naive %dx%d MMM...\n",MATRIX_SIZE,MATRIX_SIZE ); + for( i=0; i 1 && strstr( argv[1], "-w" ) ) + do_wrap = 1; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__,"PAPI_library_init failed\n",retval ); + + if ( !TESTS_QUIET ) + printf( "Trying all LIBMSR events\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"libmsr" ) ) { + libmsr_cid=cid; + if ( !TESTS_QUIET ) printf( "Found libmsr component at cid %d\n",libmsr_cid ); + if ( cmpinfo->disabled ) { + if ( !TESTS_QUIET ) printf( "libmsr component disabled: %s\n",cmpinfo->disabled_reason ); + test_skip( __FILE__,__LINE__, "libmsr component disabled", 0 ); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + test_skip( __FILE__,__LINE__,"No libmsr component found\n",0 ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); + + /* Add all events */ + num_events = 0; + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, libmsr_cid ); + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + retval = PAPI_get_event_info( code,&evinfo ); + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__,"Error getting event info\n",retval ); + strncpy( units[num_events], evinfo.units, sizeof( units[0] )-1 ); + // buffer must be null terminated to safely use strstr operation on it below + units[num_events][sizeof( units[0] )-1] = '\0'; + data_type[num_events] = evinfo.data_type; + + retval = PAPI_add_event( EventSet, code ); + if ( retval != PAPI_OK ) + break; /* We've hit an event limit */ + num_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, libmsr_cid ); + } + + values=calloc( num_events,sizeof( long long ) ); + if ( values==NULL ) + test_fail( __FILE__, __LINE__,"No memory - calloc failed",retval ); + + if ( !TESTS_QUIET ) printf( "Starting measurements...\n" ); + + /* Start Counting */ + before_time=PAPI_get_real_nsec(); + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); + + for ( repeat=0; repeat<3; repeat++ ) { + + /* Run test */ + run_test( TESTS_QUIET ); + + /* Stop Counting */ + after_time=PAPI_get_real_nsec(); + retval = PAPI_read( EventSet, values ); + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_read()",retval ); + + elapsed_time=( ( double )( after_time-before_time ) )/1.0e9; + + if ( !TESTS_QUIET ) { + printf( "Stopping measurements, took %.3fs, gathering results...\n", elapsed_time ); + for( i=0; i +#include +#include +#include + +#include "papi.h" +#include "msr/msr_core.h" +#include "msr/msr_rapl.h" + +#define MAX_EVENTS 128 + +char events[MAX_EVENTS][BUFSIZ]; +char filenames[MAX_EVENTS][BUFSIZ]; + +int ompcpuloadprimes( int limit ) +{ + int num, primes=0; +#pragma omp parallel for schedule(dynamic) reduction(+ : primes) + for (num = 1; num <= limit; num++) { + int i = 2; + while(i <= num) { + if(num % i == 0) + break; + i++; + } + if(i == num) + primes++; + } + return primes; +} + + +int main (int argc, char **argv) +{ + int retval,cid,rapl_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_EVENTS]; + int i,code,enum_retval; + const PAPI_component_info_t *cmpinfo = NULL; + long long start_time,write_start_time,write_end_time,read_start_time,read_end_time; + char event_name[BUFSIZ]; + union { long long ll; double dbl; } event_value_union; + static int num_events=0; + FILE *fileout; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + fprintf(stderr,"PAPI_library_init failed\n"); + exit(1); + } + + /* Find the libmsr component */ + numcmp = PAPI_num_components(); + for(cid=0; cidname,"libmsr")) { + rapl_cid=cid; + printf("Found libmsr component at cid %d\n", rapl_cid); + if (cmpinfo->disabled) { + fprintf(stderr,"No libmsr events found: %s\n", cmpinfo->disabled_reason); + exit(1); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + fprintf(stderr,"No libmsr component found\n"); + exit(1); + } + + /* Find events in the component */ + code = PAPI_NATIVE_MASK; + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( enum_retval == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + exit(1); + } + printf("Found: %s\n",event_name); + strncpy(events[num_events],event_name,BUFSIZ); + sprintf(filenames[num_events],"results.%s",event_name); + num_events++; + if (num_events==MAX_EVENTS) { + printf("Too many events! %d\n",num_events); + exit(1); + } + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + if (num_events==0) { + printf("Error! No libmsr events found!\n"); + exit(1); + } + + /* Open output file */ + char fileoutname[]="libmsr_write_test_output.txt"; + fileout=fopen( fileoutname ,"w" ); + if ( fileout==NULL) { fprintf( stderr,"Could not open %s\n",fileoutname ); exit(1); } + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + fprintf(stderr,"Error creating eventset!\n"); + } + + for(i=0;i libmsr_write_test_output_tmp_set.dat +cat libmsr_write_test_output.txt | grep READ > libmsr_write_test_output_tmp_read.dat + +graph='plot-libmsr' +gnuplot << EOF +set title "Using PAPI libmsr component to read and set power caps\n 2x8 cores Xeon E5-2690 SandyBridge-EP at 2.9GHz" #[scooter] +${gpconfig} +set xlabel "Elapsed time (seconds)" +set ylabel "Watts" +set yrange [0:] +set y2range [0:] +set y2label "Unit Work Time (seconds) +set xrange [0:] +set key bottom right +plot \ + 'libmsr_write_test_output_tmp_read.dat' index 0 using (\$2):(\$6) title "Power Consumpution (watts)" smooth unique with linespoints ls ${ls_2}, \ + 'libmsr_write_test_output_tmp_read.dat' index 0 using (\$2):(\$3) axes x1y2 title "Time for Unit Work (seconds on y2 axis)" smooth unique with linespoints ls ${ls_3},\ + 'libmsr_write_test_output_tmp_set.dat' index 0 using (\$2):(\$9) title "Set Avg Power Cap (watts in 1 sec)" smooth unique with points ls ${ls_1} + + + +${gp_pause} +set terminal jpeg; set output "${graph}.jpg"; replot; +#${set_term_postscript}; set output "${graph}.eps"; replot; +print 'Saving files ${graph}' +EOF + +#( epstopdf ${graph}.eps; rm ${graph}.eps ) + diff --git a/src/components/lmsensors/Makefile.lmsensors.in b/src/components/lmsensors/Makefile.lmsensors.in new file mode 100644 index 0000000..5716fc3 --- /dev/null +++ b/src/components/lmsensors/Makefile.lmsensors.in @@ -0,0 +1 @@ +SENSORS_INCDIR = @SENSORS_INCDIR@ diff --git a/src/components/lmsensors/README b/src/components/lmsensors/README new file mode 100644 index 0000000..ad2a4d0 --- /dev/null +++ b/src/components/lmsensors/README @@ -0,0 +1,17 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Dan Terpstra +* terpstra@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: lmsensors +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +lmsensors/ +The PAPI lmsensors component requires lmsensors version >= 3.0.0. + +*/ diff --git a/src/components/lmsensors/Rules.lmsensors b/src/components/lmsensors/Rules.lmsensors new file mode 100644 index 0000000..5b8d2df --- /dev/null +++ b/src/components/lmsensors/Rules.lmsensors @@ -0,0 +1,11 @@ +# $Id$ + +include components/lmsensors/Makefile.lmsensors + +COMPSRCS += components/lmsensors/linux-lmsensors.c +COMPOBJS += linux-lmsensors.o +CFLAGS += -I$(SENSORS_INCDIR) +LDFLAGS += -L$(SENSORS_LIBDIR) -lsensors + +linux-lmsensors.o: components/lmsensors/linux-lmsensors.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/lmsensors/linux-lmsensors.c -o linux-lmsensors.o diff --git a/src/components/lmsensors/configure b/src/components/lmsensors/configure new file mode 100755 index 0000000..c5f2bb8 --- /dev/null +++ b/src/components/lmsensors/configure @@ -0,0 +1,3728 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= +PACKAGE_URL= + +ac_subst_vars='LTLIBOBJS +LIBOBJS +SENSORS_INCDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_sensors_incdir +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) +--with-sensors_incdir= Specify path to sensors includes + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +configure +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +# Check whether --with-sensors_incdir was given. +if test "${with_sensors_incdir+set}" = set; then : + withval=$with_sensors_incdir; SENSORS_INCDIR=$withval + CFLAGS="$CFLAGS -I$withval" + ac_fn_c_check_header_compile "$LINENO" "sensors.h" "ac_cv_header_sensors_h" "#include +" +if test "x$ac_cv_header_sensors_h" = xyes; then : + +else + as_fn_error $? "sensors.h not found" "$LINENO" 5 +fi + + +else + as_fn_error $? "Component requires path to sensors includes" "$LINENO" 5 +fi + + + +ac_config_files="$ac_config_files Makefile.lmsensors" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by $as_me, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "Makefile.lmsensors") CONFIG_FILES="$CONFIG_FILES Makefile.lmsensors" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/components/lmsensors/configure.in b/src/components/lmsensors/configure.in new file mode 100644 index 0000000..e46e326 --- /dev/null +++ b/src/components/lmsensors/configure.in @@ -0,0 +1,18 @@ +# Process this file with autoconf to produce a configure script. +# File: components/lmsensors/configure.in +# CVS: $Id$ + +AC_INIT +AC_ARG_WITH(sensors_incdir, + [--with-sensors_incdir= Specify path to sensors includes], + [SENSORS_INCDIR=$withval + CFLAGS="$CFLAGS -I$withval" + AC_CHECK_HEADER([sensors.h], + [], + [AC_MSG_ERROR([sensors.h not found])], + [#include ])], + [AC_MSG_ERROR([Component requires path to sensors includes])]) + +AC_SUBST(SENSORS_INCDIR) +AC_CONFIG_FILES([Makefile.lmsensors]) +AC_OUTPUT diff --git a/src/components/lmsensors/linux-lmsensors.c b/src/components/lmsensors/linux-lmsensors.c new file mode 100644 index 0000000..8ab5654 --- /dev/null +++ b/src/components/lmsensors/linux-lmsensors.c @@ -0,0 +1,679 @@ +/** + * @file linux-lmsensors.c + * @author Daniel Lucio + * @author Joachim Protze + * @author Heike Jagode + * jagode@eecs.utk.edu + * + * @ingroup papi_components + * + * + * LM_SENSORS component + * + * Tested version of lm_sensors: 3.1.1 + * + * @brief + * This file has the source code for a component that enables PAPI-C to access + * hardware monitoring sensors through the libsensors library. This code will + * dynamically create a native events table for all the sensors that can be + * accesed by the libsensors library. + * In order to learn more about libsensors, visit: (http://www.lm-sensors.org) + * + * Notes: + * - I used the ACPI and MX components to write this component. A lot of the + * code in this file mimics what other components already do. + * - The return values are scaled by 1000 because PAPI can not return decimals. + * - A call of PAPI_read can take up to 2 seconds while using lm_sensors! + * - Please remember that libsensors uses the GPL license. + */ + + +/* Headers required by libsensors */ +#include +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ +// time in usecs +#define LM_SENSORS_REFRESHTIME 200000 + +/** Structure that stores private information of each event */ +typedef struct _lmsensors_register +{ + /* This is used by the framework.It likes it to be !=0 to do somehting */ + unsigned int selector; + /* These are the only information needed to locate a libsensors event */ + const sensors_chip_name *name; + int subfeat_nr; +} _lmsensors_register_t; + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + +/** This structure is used to build the table of events */ +typedef struct _lmsensors_native_event_entry +{ + _lmsensors_register_t resources; + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + unsigned int count; +} _lmsensors_native_event_entry_t; + + +typedef struct _lmsensors_reg_alloc +{ + _lmsensors_register_t ra_bits; +} _lmsensors_reg_alloc_t; + + +typedef struct _lmsensors_control_state +{ + long_long lastupdate; +} _lmsensors_control_state_t; + + +typedef struct _lmsensors_context +{ + _lmsensors_control_state_t state; +} _lmsensors_context_t; + + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ +/* This table contains the LM_SENSORS native events */ +static _lmsensors_native_event_entry_t *lm_sensors_native_table; +/* number of events in the table*/ +static int num_events = 0; +static long_long *cached_counts = NULL; // used for caching readings + + +static int (*sensors_initPtr)(FILE *input); +static void (*sensors_cleanupPtr)(void); +static int (*sensors_snprintf_chip_namePtr)(char *str, size_t size, + const sensors_chip_name *chip); +static char *(*sensors_get_labelPtr)(const sensors_chip_name *name, const sensors_feature *feature); +static int (*sensors_get_valuePtr)(const sensors_chip_name *name, int subfeat_nr, + double *value); +static const sensors_chip_name *(*sensors_get_detected_chipsPtr)(const sensors_chip_name + *match, int *nr); +static const sensors_feature *(*sensors_get_featuresPtr)(const sensors_chip_name *name, int *nr); +static const sensors_subfeature *(*sensors_get_all_subfeaturesPtr)(const sensors_chip_name *name, + const sensors_feature *feature, int *nr); + +// file handles used to access lmsensors libraries with dlopen +static void* dl1 = NULL; + +static int link_lmsensors_libraries (); + +papi_vector_t _lmsensors_vector; + +/****************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** + *****************************************************************************/ +/* + * Counts number of events available in this system + */ +static unsigned +detectSensors( void ) +{ + unsigned id = 0; + int chip_nr = 0; + const sensors_chip_name *chip_name; + + /* Loop through all the chips, features, subfeatures found */ + while ( ( chip_name = + sensors_get_detected_chipsPtr( NULL, &chip_nr ) ) != NULL ) { + int a = 0, b; + const sensors_feature *feature; + + while ( ( feature = sensors_get_featuresPtr( chip_name, &a ) ) ) { + b = 0; + while ( ( sensors_get_all_subfeaturesPtr( chip_name, feature, + &b ) ) ) { + id++; + } + } + } + + return id; +} + + +/* + * Create the native events for particulare component (!= 0) + */ +static unsigned +createNativeEvents( void ) +{ + unsigned id = 0; + unsigned int count; + + int chip_nr = 0; + const sensors_chip_name *chip_name; + + /* component name and description */ + strcpy( _lmsensors_vector.cmp_info.short_name, "lm_sensors" ); + strcpy( _lmsensors_vector.cmp_info.description, + "lm-sensors provides tools for monitoring the hardware health" ); + + + /* Loop through all the chips found */ + while ( ( chip_name = + sensors_get_detected_chipsPtr( NULL, &chip_nr ) ) != NULL ) { + int a, b; + const sensors_feature *feature; + const sensors_subfeature *sub; + char chipnamestring[PAPI_MIN_STR_LEN]; + + // lm_sensors_native_table[id].count = 0; + + /* get chip name from its internal representation */ + sensors_snprintf_chip_namePtr( chipnamestring, + PAPI_MIN_STR_LEN, chip_name ); + + a = 0; + + /* Loop through all the features found */ + while ( ( feature = sensors_get_featuresPtr( chip_name, &a ) ) ) { + char *featurelabel; + + if ( !( featurelabel = sensors_get_labelPtr( chip_name, feature ))) { + fprintf( stderr, "ERROR: Can't get label of feature %s!\n", + feature->name ); + continue; + } + + b = 0; + + /* Loop through all the subfeatures found */ + while ((sub=sensors_get_all_subfeaturesPtr(chip_name,feature,&b))) { + + count = 0; + + /* Save native event data */ + sprintf( lm_sensors_native_table[id].name, "%s.%s.%s", + chipnamestring, featurelabel, sub->name ); + + strncpy( lm_sensors_native_table[id].description, + lm_sensors_native_table[id].name, PAPI_MAX_STR_LEN ); + lm_sensors_native_table[id].description[PAPI_MAX_STR_LEN-1] = '\0'; + + /* The selector has to be !=0 . Starts with 1 */ + lm_sensors_native_table[id].resources.selector = id + 1; + + /* Save the actual references to this event */ + lm_sensors_native_table[id].resources.name = chip_name; + lm_sensors_native_table[id].resources.subfeat_nr = sub->number; + + count = sub->number; + + /* increment the table index counter */ + id++; + } + + // lm_sensors_native_table[id].count = count + 1; + free( featurelabel ); + } + } + + /* Return the number of events created */ + return id; +} + +/* + * Returns the value of the event with index 'i' in lm_sensors_native_table + * This value is scaled by 1000 to cope with the lack to return decimal numbers + * with PAPI + */ + +static long_long +getEventValue( unsigned event_id ) +{ + double value; + int res; + + res = sensors_get_valuePtr( lm_sensors_native_table[event_id].resources.name, + lm_sensors_native_table[event_id].resources. + subfeat_nr, &value ); + + if ( res < 0 ) { + fprintf( stderr, "libsensors(): Could not read event #%d!\n", + event_id ); + return -1; + } + + return ( ( long_long ) ( value * 1000 ) ); +} + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_lmsensors_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_lmsensors_init_component( int cidx ) +{ + int res; + (void) cidx; + + /* link in all the lmsensor libraries and resolve the symbols we need to use */ + if (link_lmsensors_libraries() != PAPI_OK) { + SUBDBG ("Dynamic link of lmsensors libraries failed, component will be disabled.\n"); + SUBDBG ("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + /* Initialize libsensors library */ + if ( ( res = sensors_initPtr( NULL ) ) != 0 ) { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "Cannot enable libsensors",PAPI_MAX_STR_LEN); + return res; + } + + /* Create dyanmic events table */ + num_events = detectSensors( ); + SUBDBG("Found %d sensors\n",num_events); + + _lmsensors_vector.cmp_info.num_mpx_cntrs = num_events; + _lmsensors_vector.cmp_info.num_cntrs = num_events; + + if ( ( lm_sensors_native_table = + calloc( num_events, sizeof ( _lmsensors_native_event_entry_t ))) + == NULL ) { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "Could not malloc room",PAPI_MAX_STR_LEN); + return PAPI_ENOMEM; + } + + cached_counts = (long long*) calloc(num_events, sizeof(long long)); + + if (cached_counts == NULL) { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "Could not malloc room",PAPI_MAX_STR_LEN); + return PAPI_ENOMEM; + } + + if ( ( unsigned ) num_events != createNativeEvents( ) ) { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "LM_SENSOR number mismatch",PAPI_MAX_STR_LEN); + return PAPI_ECMP; + } + + _lmsensors_vector.cmp_info.num_native_events=num_events; + _lmsensors_vector.cmp_info.num_cntrs=num_events; + + return PAPI_OK; +} + +/* + * Link the necessary lmsensors libraries to use the lmsensors + * component. If any of them can not be found, then the lmsensors + * component will just be disabled. This is done at runtime so that a + * version of PAPI built with the Infiniband component can be + * installed and used on systems which have the lmsensors libraries + * installed and on systems where these libraries are not installed. + */ +static int +link_lmsensors_libraries () +{ + /* Need to link in the lmsensors libraries, if not found disable the component */ + dl1 = dlopen("libsensors.so", RTLD_NOW | RTLD_GLOBAL); + if (!dl1) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensors library libsensors.so not found.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_initPtr = dlsym(dl1, "sensors_init"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_init.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_cleanupPtr = dlsym(dl1, "sensors_cleanup"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_cleanup.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_snprintf_chip_namePtr = dlsym(dl1, "sensors_snprintf_chip_name"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_snprintf_chip_name.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_get_labelPtr = dlsym(dl1, "sensors_get_label"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_get_label.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_get_valuePtr = dlsym(dl1, "sensors_get_value"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_get_value.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_get_detected_chipsPtr = dlsym(dl1, "sensors_get_detected_chips"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_get_detected_chips.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_get_featuresPtr = dlsym(dl1, "sensors_get_features"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_get_features.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + sensors_get_all_subfeaturesPtr = dlsym(dl1, "sensors_get_all_subfeatures"); + if (dlerror() != NULL) + { + strncpy(_lmsensors_vector.cmp_info.disabled_reason, + "lmsensor function sensors_get_all_subfeatures.",PAPI_MAX_STR_LEN); + return ( PAPI_ENOSUPP ); + } + + return ( PAPI_OK ); +} + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_lmsensors_init_control_state( hwd_control_state_t *ctl ) +{ + int i; + + for ( i = 0; i < num_events; i++ ) + cached_counts[i] = getEventValue( i ); + + ( ( _lmsensors_control_state_t * ) ctl )->lastupdate = + PAPI_get_real_usec( ); + return PAPI_OK; +} + + +/* + * + */ +static int +_lmsensors_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * + */ +static int +_lmsensors_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * + */ +static int +_lmsensors_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long_long ** events, int flags ) +{ + ( void ) ctx; + ( void ) flags; + long long start = PAPI_get_real_usec( ); + int i; + + _lmsensors_control_state_t *control=(_lmsensors_control_state_t *)ctl; + + if ( start - control->lastupdate > 200000 ) { // cache refresh + + for ( i = 0; i < num_events; i++ ) { + cached_counts[i] = getEventValue( i ); + } + control->lastupdate = PAPI_get_real_usec( ); + } + + *events = cached_counts; + return PAPI_OK; +} + + +static int +_lmsensors_shutdown_component( void ) +{ + if (cached_counts) + free(cached_counts); + + /* Call the libsensors cleaning function before leaving */ + sensors_cleanupPtr( ); + + return PAPI_OK; +} + +static int +_lmsensors_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_lmsensors_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + return PAPI_OK; +} + + +static int +_lmsensors_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t * native, + int count, + hwd_context_t *ctx ) +{ + int i, index; + ( void ) ctx; + ( void ) ctl; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + native[i].ni_position = + lm_sensors_native_table[index].resources.selector - 1; + } + return PAPI_OK; +} + + +/* + * As I understand it, all data reported by these interfaces will be system wide + */ +static int +_lmsensors_set_domain( hwd_control_state_t *ctl, int domain ) +{ + (void) ctl; + if ( PAPI_DOM_ALL != domain ) + return ( PAPI_EINVAL ); + + return ( PAPI_OK ); +} + + +/* + * + */ +static int +_lmsensors_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_lmsensors_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + *EventCode = 0; + + return PAPI_OK; + break; + + case PAPI_ENUM_EVENTS: + { + int index = *EventCode; + + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else + return PAPI_ENOEVNT; + + break; + } + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +/* + * + */ +static int +_lmsensors_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if (index>=0 && index=0 && index +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +/** describes a single counter with its properties */ +typedef struct counter_info_struct +{ + int idx; + char *name; + char *description; + char *unit; + unsigned long long value; +} counter_info; + +typedef struct +{ + int count; + char **data; +} string_list; + + +/** describes the infos collected from a mounted Lustre filesystem */ +typedef struct lustre_fs_struct +{ + char *proc_file; + char *proc_file_readahead; + counter_info *write_cntr; + counter_info *read_cntr; + counter_info *readahead_cntr; + struct lustre_fs_struct *next; +} lustre_fs; + +#define LUSTRE_MAX_COUNTERS 100 +#define LUSTRE_MAX_COUNTER_TERMS LUSTRE_MAX_COUNTERS + +typedef counter_info LUSTRE_register_t; +typedef counter_info LUSTRE_native_event_entry_t; +typedef counter_info LUSTRE_reg_alloc_t; + + +typedef struct LUSTRE_control_state +{ + long long start_count[LUSTRE_MAX_COUNTERS]; + long long current_count[LUSTRE_MAX_COUNTERS]; + long long difference[LUSTRE_MAX_COUNTERS]; + int which_counter[LUSTRE_MAX_COUNTERS]; + int num_events; +} LUSTRE_control_state_t; + + +typedef struct LUSTRE_context +{ + LUSTRE_control_state_t state; +} LUSTRE_context_t; + +/* Default path to lustre stats */ +#ifdef FAKE_LUSTRE +const char proc_base_path[] = "./components/lustre/fake_proc/fs/lustre/"; +#else +const char proc_base_path[] = "/proc/fs/lustre/"; +#endif + +static counter_info **lustre_native_table = NULL; +static int num_events = 0; +static int table_size = 32; + +/* mount Lustre fs are kept in a list */ +static lustre_fs *root_lustre_fs = NULL; + +papi_vector_t _lustre_vector; + +/****************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** + *****************************************************************************/ +static int resize_native_table() { + SUBDBG("ENTER:\n"); + counter_info** new_table; + int new_size = table_size*2; + new_table = (counter_info**)papi_calloc(new_size, sizeof(counter_info*)); + if (NULL==new_table) { + SUBDBG("EXIT: PAPI_ENOMEM\n"); + return PAPI_ENOMEM; + } + if ( lustre_native_table) { + memcpy(new_table, lustre_native_table, sizeof(counter_info*) * table_size ); + papi_free(lustre_native_table); + } + lustre_native_table = new_table; + table_size*=2; + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + +/** + * add a counter to the list of available counters + * @param name the short name of the counter + * @param desc a longer description + * @param unit the unit for this counter + */ +static counter_info * +addCounter( const char *name, const char *desc, const char *unit ) +{ + SUBDBG("ENTER: name: %s, desc: %s, unit: %s\n", name, desc, unit); + + counter_info *cntr; + + if ( num_events >= table_size ) + if (PAPI_OK != resize_native_table()) { + SUBDBG("EXIT: can not resize native table\n" ); + return NULL; + } + + cntr = malloc( sizeof ( counter_info ) ); + + if ( cntr == NULL ) { + SUBDBG("EXIT: can not allocate memory for new counter\n" ); + return NULL; + } + + cntr->idx=num_events; + cntr->name = strdup( name ); + cntr->description = strdup( desc ); + cntr->unit = strdup( unit ); + cntr->value = 0; + + lustre_native_table[num_events]=cntr; + + num_events++; + +SUBDBG("EXIT: cntr: %p\n", cntr); + return cntr; +} + +/** + * adds a Lustre fs to the fs list and creates the counters for it + * @param name fs name + * @param procpath_general path to the 'stats' file in /proc/fs/lustre/... for this fs + * @param procpath_readahead path to the 'readahead' file in /proc/fs/lustre/... for this fs + */ +static int +addLustreFS( const char *name, + const char *procpath_general, + const char *procpath_readahead ) +{ + lustre_fs *fs, *last; + char counter_name[512]; + FILE *fff; + + SUBDBG("Adding lustre fs\n"); + + fs = malloc( sizeof ( lustre_fs ) ); + if ( fs == NULL ) { + SUBDBG("can not allocate memory for new Lustre FS description\n" ); + return PAPI_ENOMEM; + } + + fs->proc_file=strdup(procpath_general); + fff = fopen( procpath_general, "r" ); + if ( fff == NULL ) { + SUBDBG("can not open '%s'\n", procpath_general ); + free(fs); + return PAPI_ESYS; + } + fclose(fff); + + fs->proc_file_readahead = strdup(procpath_readahead); + fff = fopen( procpath_readahead, "r" ); + if ( fff == NULL ) { + SUBDBG("can not open '%s'\n", procpath_readahead ); + free(fs); + return PAPI_ESYS; + } + fclose(fff); + + sprintf( counter_name, "%s_llread", name ); + if (NULL == (fs->read_cntr = addCounter( counter_name, + "bytes read on this lustre client", + "bytes" ))) { + free(fs); + return PAPI_ENOMEM; + } + + sprintf( counter_name, "%s_llwrite", name ); + if ( NULL == (fs->write_cntr = addCounter( counter_name, + "bytes written on this lustre client", + "bytes" ))) { + free(fs->read_cntr); + free(fs); + return PAPI_ENOMEM; + } + + sprintf( counter_name, "%s_wrong_readahead", name ); + if ( NULL == (fs->readahead_cntr = addCounter( counter_name, + "bytes read but discarded due to readahead", + "bytes" ))) { + free(fs->read_cntr); + free(fs->write_cntr); + free(fs); + return PAPI_ENOMEM; + } + + fs->next = NULL; + + /* Insert into the linked list */ + /* Does this need locking? */ + if ( root_lustre_fs == NULL ) { + root_lustre_fs = fs; + } else { + last = root_lustre_fs; + + while ( last->next != NULL ) + last = last->next; + + last->next = fs; + } + return PAPI_OK; +} + + +/** + * goes through proc and tries to discover all mounted Lustre fs + */ +static int +init_lustre_counters( void ) +{ + SUBDBG("ENTER:\n"); + char lustre_dir[PATH_MAX]; + char path[PATH_MAX]; + char path_readahead[PATH_MAX],path_stats[PATH_MAX]; + char *ptr; + char fs_name[100]; + int found_luster_fs = 0; + int idx = 0; + int tmp_fd; + DIR *proc_dir; + struct dirent *entry; + + sprintf(lustre_dir,"%s/llite",proc_base_path); + + proc_dir = opendir( lustre_dir ); + if ( proc_dir == NULL ) { + SUBDBG("EXIT: PAPI_ESYS (Cannot open %s)\n",lustre_dir); + return PAPI_ESYS; + } + + while ( (entry = readdir( proc_dir )) != NULL ) { + memset( path, 0, PATH_MAX ); + snprintf( path, PATH_MAX - 1, "%s/%s/stats", lustre_dir, + entry->d_name ); + SUBDBG("checking for file %s\n", path); + + if ( ( tmp_fd = open( path, O_RDONLY ) ) == -1 ) { + SUBDBG("Path: %s, can not be opened.\n", path); + continue; + } + + close( tmp_fd ); + + /* erase \r and \n at the end of path */ + /* why is this necessary? */ + + idx = strlen( path ); + idx--; + + while ( path[idx] == '\r' || path[idx] == '\n' ) + path[idx--] = 0; + + /* Lustre paths are of type server-UUID */ + + idx = 0; + + ptr = strstr(path,"llite/") + 6; + if (ptr == NULL) { + SUBDBG("Path: %s, missing llite directory, performance event not created.\n", path); + continue; + } + + strncpy(fs_name, ptr, sizeof(fs_name)-1); + fs_name[sizeof(fs_name)-1] = '\0'; + + SUBDBG("found Lustre FS: %s\n", fs_name); + + snprintf( path_stats, PATH_MAX - 1, + "%s/%s/stats", + lustre_dir, + entry->d_name ); + SUBDBG("Found file %s\n", path_stats); + + snprintf( path_readahead, PATH_MAX - 1, + "%s/%s/read_ahead_stats", + lustre_dir, + entry->d_name ); + SUBDBG("Now checking for file %s\n", path_readahead); + + strcpy( ptr, "read_ahead_stats" ); + addLustreFS( fs_name, path_stats, path_readahead ); + found_luster_fs++; + } + closedir( proc_dir ); + + if (found_luster_fs == 0) { + SUBDBG("EXIT: PAPI_ESYS (No luster file systems found)\n"); + return PAPI_ESYS; + } + + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + +/** + * updates all Lustre related counters + */ +static void +read_lustre_counter( ) +{ + lustre_fs *fs = root_lustre_fs; + FILE *fff; + char buffer[BUFSIZ]; + + while ( fs != NULL ) { + + /* read values from stats file */ + fff=fopen(fs->proc_file,"r" ); + if (fff != NULL) { + while(1) { + if (fgets(buffer,BUFSIZ,fff)==NULL) break; + + if (strstr( buffer, "write_bytes" )) { + sscanf(buffer,"%*s %*d %*s %*s %*d %*d %llu",&fs->write_cntr->value); + SUBDBG("Read %llu write_bytes\n",fs->write_cntr->value); + } + + if (strstr( buffer, "read_bytes" )) { + sscanf(buffer,"%*s %*d %*s %*s %*d %*d %llu",&fs->read_cntr->value); + SUBDBG("Read %llu read_bytes\n",fs->read_cntr->value); + } + } + fclose(fff); + } + + fff=fopen(fs->proc_file_readahead,"r"); + if (fff != NULL) { + while(1) { + if (fgets(buffer,BUFSIZ,fff)==NULL) break; + + if (strstr( buffer, "read but discarded")) { + sscanf(buffer,"%*s %*s %*s %llu",&fs->readahead_cntr->value); + SUBDBG("Read %llu discared\n",fs->readahead_cntr->value); + break; + } + } + fclose(fff); + } + fs = fs->next; + } +} + + +/** + * frees all allocated resources + */ +static void +host_finalize( void ) +{ + int i; + lustre_fs *fs, *next_fs; + counter_info *cntr; + + for(i=0;iname ); + free( cntr->description ); + free( cntr->unit ); + free( cntr ); + } + lustre_native_table[i]=NULL; + } + + fs = root_lustre_fs; + + while ( fs != NULL ) { + next_fs = fs->next; + free(fs->proc_file); + free(fs->proc_file_readahead); + free( fs ); + fs = next_fs; + } + + root_lustre_fs = NULL; +} + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * Component setup and shutdown + */ + +static int +_lustre_init_component( int cidx ) +{ + SUBDBG("ENTER:\n"); + int ret = PAPI_OK; + + resize_native_table(); + ret=init_lustre_counters(); + if (ret!=PAPI_OK) { + strncpy(_lustre_vector.cmp_info.disabled_reason, + "No lustre filesystems found",PAPI_MAX_STR_LEN); + SUBDBG("EXIT: ret: %d\n", ret); + return ret; + } + + _lustre_vector.cmp_info.num_native_events=num_events; + _lustre_vector.cmp_info.CmpIdx = cidx; + + SUBDBG("EXIT: ret: %d\n", ret); + return ret; +} + + + + + +/* + * This is called whenever a thread is initialized + */ +static int +_lustre_init_thread( hwd_context_t * ctx ) +{ + (void) ctx; + + return PAPI_OK; +} + + +/* + * + */ +static int +_lustre_shutdown_component( void ) +{ + SUBDBG("ENTER:\n"); + host_finalize( ); + papi_free( lustre_native_table ); + lustre_native_table = NULL; + num_events = 0; + table_size = 32; + SUBDBG("EXIT:\n"); + return PAPI_OK; +} + +/* + * + */ +static int +_lustre_shutdown_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) functions + */ +static int +_lustre_init_control_state( hwd_control_state_t *ctl ) +{ + LUSTRE_control_state_t *lustre_ctl = (LUSTRE_control_state_t *)ctl; + + memset(lustre_ctl->start_count,0,sizeof(long long)*LUSTRE_MAX_COUNTERS); + memset(lustre_ctl->current_count,0,sizeof(long long)*LUSTRE_MAX_COUNTERS); + + return PAPI_OK; +} + + +/* + * + */ +static int +_lustre_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + SUBDBG("ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n", ctl, native, count, ctx); + LUSTRE_control_state_t *lustre_ctl = (LUSTRE_control_state_t *)ctl; + ( void ) ctx; + int i, index; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + lustre_ctl->which_counter[i]=index; + native[i].ni_position = i; + } + + lustre_ctl->num_events=count; + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + + +/* + * + */ +static int +_lustre_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + + LUSTRE_control_state_t *lustre_ctl = (LUSTRE_control_state_t *)ctl; + int i; + + read_lustre_counter( ); + + for(i=0;inum_events;i++) { + lustre_ctl->current_count[i]= + lustre_native_table[lustre_ctl->which_counter[i]]->value; + } + + memcpy( lustre_ctl->start_count, + lustre_ctl->current_count, + LUSTRE_MAX_COUNTERS * sizeof ( long long ) ); + + return PAPI_OK; +} + + +/* + * + */ +static int +_lustre_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void) ctx; + LUSTRE_control_state_t *lustre_ctl = (LUSTRE_control_state_t *)ctl; + int i; + + read_lustre_counter( ); + + for(i=0;inum_events;i++) { + lustre_ctl->current_count[i]= + lustre_native_table[lustre_ctl->which_counter[i]]->value; + } + + return PAPI_OK; + +} + + + +/* + * + */ +static int +_lustre_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + (void) ctx; + ( void ) flags; + + LUSTRE_control_state_t *lustre_ctl = (LUSTRE_control_state_t *)ctl; + int i; + + read_lustre_counter( ); + + for(i=0;inum_events;i++) { + lustre_ctl->current_count[i]= + lustre_native_table[lustre_ctl->which_counter[i]]->value; + lustre_ctl->difference[i]=lustre_ctl->current_count[i]- + lustre_ctl->start_count[i]; + } + + *events = lustre_ctl->difference; + + return PAPI_OK; + +} + + + + +/* + * + */ +static int +_lustre_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + + /* re-initializes counter_start values to current */ + + _lustre_start(ctx,ctrl); + + return PAPI_OK; +} + + +/* + * Unused lustre write function + */ +/* static int */ +/* _lustre_write( hwd_context_t * ctx, hwd_control_state_t * ctrl, long long *from ) */ +/* { */ +/* ( void ) ctx; */ +/* ( void ) ctrl; */ +/* ( void ) from; */ + +/* return PAPI_OK; */ +/* } */ + + +/* + * Functions for setting up various options + */ + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_lustre_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +/* + * This function can be used to set the event set level domains + * where the events should be counted. In particular: PAPI_DOM_USER, + * PAPI_DOM_KERNEL PAPI_DOM_OTHER. But the lustre component does not + * provide a field in its control_state (LUSTRE_control_state_t) to + * save this information. It would also need some way to control when + * the counts get updated in order to support domain filters for + * event counting. + * + * So we just ignore this call. + */ +static int +_lustre_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + ( void ) cntrl; + ( void ) domain; + SUBDBG("ENTER: \n"); + + // this component does not allow limiting which domains will increment event counts + + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + + +/* + * + */ +static int +_lustre_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + SUBDBG("ENTER: EventCode: %#x, name: %p, len: %d\n", EventCode, name, len); + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, lustre_native_table[event]->name, len-1 ); + name[len-1] = '\0'; + SUBDBG("EXIT: event name: %s\n", name); + return PAPI_OK; + } + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_lustre_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + SUBDBG("ENTER: EventCode: %#x, name: %p, len: %d\n", EventCode, name, len); + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, lustre_native_table[event]->description, len-1 ); + name[len-1] = '\0'; + SUBDBG("EXIT: description: %s\n", name); + return PAPI_OK; + } + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_lustre_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + SUBDBG("ENTER: EventCode: %p, modifier: %d\n", EventCode, modifier); + + if ( modifier == PAPI_ENUM_FIRST ) { + if (num_events==0) return PAPI_ENOEVNT; + *EventCode = 0; + SUBDBG("EXIT: *EventCode: %#x\n", *EventCode); + return PAPI_OK; + } + + if ( modifier == PAPI_ENUM_EVENTS ) { + int index = *EventCode; + + if ((index+1 < num_events) && lustre_native_table[index + 1]) { + *EventCode = *EventCode + 1; + SUBDBG("EXIT: *EventCode: %#x\n", *EventCode); + return PAPI_OK; + } else { + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + } + } + + + SUBDBG("EXIT: PAPI_EINVAL\n"); + return PAPI_EINVAL; +} + + +/* + * + */ +papi_vector_t _lustre_vector = { + .cmp_info = { + /* component information (unspecified values initialized to 0) */ + .name = "lustre", + .short_name = "lustre", + .version = "1.9", + .description = "Lustre filesystem statistics", + .num_mpx_cntrs = LUSTRE_MAX_COUNTERS, + .num_cntrs = LUSTRE_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( LUSTRE_context_t ), + .control_state = sizeof ( LUSTRE_control_state_t ), + .reg_value = sizeof ( LUSTRE_register_t ), + .reg_alloc = sizeof ( LUSTRE_reg_alloc_t ), + }, + + /* function pointers in this component */ + .init_thread = _lustre_init_thread, + .init_component = _lustre_init_component, + .init_control_state = _lustre_init_control_state, + .start = _lustre_start, + .stop = _lustre_stop, + .read = _lustre_read, + .shutdown_thread = _lustre_shutdown_thread, + .shutdown_component = _lustre_shutdown_component, + .ctl = _lustre_ctl, + .update_control_state = _lustre_update_control_state, + .set_domain = _lustre_set_domain, + .reset = _lustre_reset, + + .ntv_enum_events = _lustre_ntv_enum_events, + .ntv_code_to_name = _lustre_ntv_code_to_name, + .ntv_code_to_descr = _lustre_ntv_code_to_descr, + +}; + + + + diff --git a/src/components/lustre/tests/Makefile b/src/components/lustre/tests/Makefile new file mode 100644 index 0000000..197505f --- /dev/null +++ b/src/components/lustre/tests/Makefile @@ -0,0 +1,20 @@ +NAME=lustre +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = lustre_basic + +lustre_tests: $(TESTS) + +lustre_basic: lustre_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o lustre_basic lustre_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/lustre/tests/lustre_basic.c b/src/components/lustre/tests/lustre_basic.c new file mode 100644 index 0000000..a214ab0 --- /dev/null +++ b/src/components/lustre/tests/lustre_basic.c @@ -0,0 +1,131 @@ +/** + * @author Vince Weaver + * + * test case for lustre component + * + * + * @brief + * Tests basic lustre functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf("Trying all lustre events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"lustre")) { + if (!quiet) printf("\tFound lustre component %d - %s\n", cid, cmpinfo->name); + } + else { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!quiet) printf(" %s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!quiet) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No lustre events found",0); + } + + if (!quiet) { + printf("Note: for this test the values are expected to all be 0 as no I/O happens during the test.\n"); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/micpower/README b/src/components/micpower/README new file mode 100644 index 0000000..ba48059 --- /dev/null +++ b/src/components/micpower/README @@ -0,0 +1,44 @@ +This is a component for accessing power readings reported on Intel MIC cards. +The values are reported in /sys/class/micras/power + +# cat /sys/class/micras/power +115000000 +113000000 +113000000 +129000000 +38000000 +29000000 +46000000 +0 0 1033000 +0 0 1501000 +0 0 1000000 + +This corresponds to the reading portions of the following MrRspPower structure. + +typedef struct mr_rsp_pws { /* Power status */ + uint32_t prr; /* Current reading, in uW */ + uint8_t p_val; /* Valid bits, power */ +} MrRspPws; + +typedef struct mr_rsp_vrr { /* Voltage regulator status */ + uint32_t pwr; /* Power reading, in uW */ + uint32_t cur; /* Current, in uA */ + uint32_t volt; /* Voltage, in uV */ + uint8_t p_val; /* Valid bits, power */ + uint8_t c_val; /* Valid bits, current */ + uint8_t v_val; /* Valid bits, voltage */ +} MrRspVrr; + +typedef struct mr_rsp_power { + MrRspPws tot0; /* Total power, win 0 */ + MrRspPws tot1; /* Total power, win 1 */ + MrRspPws pcie; /* PCI-E connector power */ + MrRspPws inst; /* Instantaneous power */ + MrRspPws imax; /* Max Instantaneous power */ + MrRspPws c2x3; /* 2x3 connector power */ + MrRspPws c2x4; /* 2x4 connector power */ + MrRspVrr vccp; /* Core rail */ + MrRspVrr vddg; /* Uncore rail */ + MrRspVrr vddq; /* Memory subsystem rail */ +} MrRspPower; + diff --git a/src/components/micpower/Rules.micpower b/src/components/micpower/Rules.micpower new file mode 100644 index 0000000..3c0e851 --- /dev/null +++ b/src/components/micpower/Rules.micpower @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/micpower/linux-micpower.c +COMPOBJS += linux-micpower.o + +linux-micpower.o: components/micpower/linux-micpower.c components/micpower/linux-micpower.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/micpower/linux-micpower.c -o linux-micpower.o diff --git a/src/components/micpower/linux-micpower.c b/src/components/micpower/linux-micpower.c new file mode 100644 index 0000000..83cb472 --- /dev/null +++ b/src/components/micpower/linux-micpower.c @@ -0,0 +1,516 @@ +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "linux-micpower.h" + +/* Intel says +---- +The power measurements can be obtained from the host as well as the MIC card +over a 50msec interval. The SMC is designed to sample power consumption only +every 50mSecs. +---- +**/ +#define REFRESH_LAT 50000 + +#define INVALID_RESULT -1000000L +#define MICPOWER_NUMBER_OF_NATIVE_EVENTS 16 + +papi_vector_t _micpower_vector; + +static MICPOWER_native_event_entry_t _micpower_native_events[] = { + { .name = "tot0", + .units = "uW", + .description = "Total power, win 0", + .resources.selector = 1 + }, + { .name = "tot1", + .units = "uW", + .description = "Total power, win 1", + .resources.selector = 2 + }, + { .name = "pcie", + .units = "uW", + .description = "PCI-E connector power", + .resources.selector = 3 + }, + { .name = "inst", + .units = "uW", + .description = "Instantaneous power", + .resources.selector = 4 + }, + { .name = "imax", + .units = "uW", + .description = "Max Instantaneous power", + .resources.selector = 5 + }, + { .name = "c2x3", + .units = "uW", + .description = "2x3 connector power", + .resources.selector = 6 + }, + { .name = "c2x4", + .units = "uW", + .description = "2x4 connector power", + .resources.selector = 7 + }, + { .name = "vccp:pwr", + .units = "uW", + .description = "Core rail; Power reading", + .resources.selector = 8 + }, + { .name = "vccp:cur", + .units = "uA", + .description = "Core rail; Current", + .resources.selector = 9 + }, + { .name = "vccp:volt", + .units = "uV", + .description = "Core rail; Voltage", + .resources.selector = 10 + }, + { .name = "vddg:pwr", + .units = "uW", + .description = "Uncore rail; Power reading", + .resources.selector = 11 + }, + { .name = "vddg:cur", + .units = "uA", + .description = "Uncore rail; Current", + .resources.selector = 12 + }, + { .name = "vddg:volt", + .units = "uV", + .description = "Uncore rail; Voltage", + .resources.selector = 13 + }, + { .name = "vddq:pwr", + .units = "uW", + .description = "Memory subsystem rail; Power reading", + .resources.selector = 14 + }, + { .name = "vddq:cur", + .units = "uA", + .description = "Memory subsystem rail; Current", + .resources.selector = 15 + }, + { .name = "vddq:volt", + .units = "uV", + .description = "Memory subsystem rail; Voltage", + .resources.selector = 16 + } +}; + +static int num_events = 0; +static int is_initialized = 0; + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +#if 0 +From Intel docs, power readings are exported via sysfs at +/sys/class/micras/power + +typedeftruct mr_rsp_pws { /* Power status */ + uint32_t prr; /* Current reading, in uW */ + uint8_t p_val; /* Valid bits, power */ +} MrRspPws; + +typedef struct mr_rsp_vrr { /* Voltage regulator status */ + uint32_t pwr; /* Power reading, in uW */ + uint32_t cur; /* Current, in uA */ + uint32_t volt; /* Voltage, in uV */ + uint8_t p_val; /* Valid bits, power */ + uint8_t c_val; /* Valid bits, current */ + uint8_t v_val; /* Valid bits, voltage */ +} MrRspVrr; + + +I am assuming for the purposes of this component that only +the readings are exported. +typedef struct mr_rsp_power { + MrRspPws tot0; /* Total power, win 0 */ + MrRspPws tot1; /* Total power, win 1 */ + MrRspPws pcie; /* PCI-E connector power */ + MrRspPws inst; /* Instantaneous power */ + MrRspPws imax; /* Max Instantaneous power */ + MrRspPws c2x3; /* 2x3 connector power */ + MrRspPws c2x4; /* 2x4 connector power */ + MrRspVrr vccp; /* Core rail */ + MrRspVrr vddg; /* Uncore rail */ + MrRspVrr vddq; /* Memory subsystem rail */ +} MrRspPower; + +#endif +static int +read_sysfs_file( long long* counts) +{ + FILE* fp = NULL; + int i; + int retval = 1; + fp = fopen( "/sys/class/micras/power", "r" ); + if (!fp) + return 0; + + for (i=0; i < MICPOWER_MAX_COUNTERS-9; i++) { + retval&= fscanf(fp, "%lld", &counts[i]); + } + for (i=MICPOWER_MAX_COUNTERS-9; i < MICPOWER_MAX_COUNTERS; i+=3) { + retval&= fscanf(fp, "%lld %lld %lld", &counts[i], &counts[i+1], &counts[i+2] ); + } + + fclose(fp); + return retval; +} + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_micpower_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_micpower_init_component( int cidx ) +{ + if ( is_initialized ) + return (PAPI_OK ); + + is_initialized = 1; + + /* Check that /sys/class/micras/power is readable */ + if ( 0 != access( "/sys/class/micras/power", R_OK ) ) { + strncpy(_micpower_vector.cmp_info.disabled_reason, + "Cannot read /sys/class/micras/power",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + + /* Export the total number of events available */ + num_events = + _micpower_vector.cmp_info.num_native_events = MICPOWER_NUMBER_OF_NATIVE_EVENTS; + + /* Export the component id */ + _micpower_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_micpower_init_control_state( hwd_control_state_t * ctl) +{ + int retval = 0; + MICPOWER_control_state_t *micpower_ctl = (MICPOWER_control_state_t *) ctl; + + retval = read_sysfs_file(micpower_ctl->counts); + + /* Set last access time for caching results */ + micpower_ctl->lastupdate = PAPI_get_real_usec(); + + return (retval)?PAPI_OK:PAPI_ESYS; +} + +static int +_micpower_start( hwd_context_t *ctx, hwd_control_state_t *ctl) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + +static int +_micpower_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long ** events, int flags) +{ + (void) flags; + (void) ctx; + int retval = 1; + + MICPOWER_control_state_t* control = (MICPOWER_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + + /* Only read the values from the kernel if enough time has passed */ + /* since the last read. Otherwise return cached values. */ + + if ( now - control->lastupdate > REFRESH_LAT ) { + retval = read_sysfs_file(control->counts); + control->lastupdate = now; + } + + /* Pass back a pointer to our results */ + *events = control->counts; + + return (retval)?PAPI_OK:PAPI_ESYS; +} + +static int +_micpower_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + int retval = 1; + long long now = PAPI_get_real_usec(); + /* read values */ + MICPOWER_control_state_t* control = (MICPOWER_control_state_t*) ctl; + + if ( now - control->lastupdate > REFRESH_LAT ) { + retval = read_sysfs_file(control->counts); + control->lastupdate = now; + } + return (retval)?PAPI_OK:PAPI_ESYS; +} + +/* Shutdown a thread */ +static int +_micpower_shutdown_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + + +/* + * Clean up what was setup in micpower_init_component(). + */ +static int +_micpower_shutdown_component( ) +{ + if ( is_initialized ) { + is_initialized = 0; + num_events = 0; + } + return PAPI_OK; +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_micpower_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +static int +_micpower_update_control_state( hwd_control_state_t *ptr, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ + int i, index; + ( void ) ctx; + ( void ) ptr; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event&PAPI_NATIVE_AND_MASK; + native[i].ni_position = _micpower_native_events[index].resources.selector - 1; + } + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_micpower_set_domain( hwd_control_state_t * cntl, int domain ) +{ + ( void ) cntl; + + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int +_micpower_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_micpower_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + int index; + + switch ( modifier ) { + + case PAPI_ENUM_FIRST: + + if (num_events==0) { + return PAPI_ENOEVNT; + } + *EventCode = 0; + + return PAPI_OK; + + + case PAPI_ENUM_EVENTS: + + index = *EventCode&PAPI_NATIVE_AND_MASK; + + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +/* + * + */ +static int +_micpower_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode&PAPI_NATIVE_AND_MASK; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _micpower_native_events[index].name, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +/* + * + */ +static int +_micpower_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode&PAPI_NATIVE_AND_MASK; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _micpower_native_events[index].description, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +static int +_micpower_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode&PAPI_NATIVE_AND_MASK; + + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; + + strncpy( info->symbol, _micpower_native_events[index].name, sizeof(info->symbol)); + strncpy( info->long_descr, _micpower_native_events[index].description, sizeof(info->long_descr)); + strncpy( info->units, _micpower_native_events[index].units, sizeof(info->units)); + info->units[sizeof(info->units)-1] = '\0'; + + return PAPI_OK; +} + + + +/* + * + */ +papi_vector_t _micpower_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "micpower", + .short_name = "micpower", + .description = "Component for reading power on Intel Xeon Phi (MIC)", + .version = "5.1", + .num_mpx_cntrs = MICPOWER_NUMBER_OF_NATIVE_EVENTS, + .num_cntrs = MICPOWER_NUMBER_OF_NATIVE_EVENTS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( MICPOWER_context_t ), + .control_state = sizeof ( MICPOWER_control_state_t ), + .reg_value = sizeof ( MICPOWER_register_t ), + .reg_alloc = sizeof ( MICPOWER_reg_alloc_t ), + } + , + /* function pointers in this component */ + .init_thread = _micpower_init_thread, + .init_component = _micpower_init_component, + .init_control_state = _micpower_init_control_state, + .start = _micpower_start, + .stop = _micpower_stop, + .read = _micpower_read, + .shutdown_thread = _micpower_shutdown_thread, + .shutdown_component = _micpower_shutdown_component, + .ctl = _micpower_ctl, + + .update_control_state = _micpower_update_control_state, + .set_domain = _micpower_set_domain, + .reset = _micpower_reset, + + .ntv_enum_events = _micpower_ntv_enum_events, + .ntv_code_to_name = _micpower_ntv_code_to_name, + .ntv_code_to_descr = _micpower_ntv_code_to_descr, + .ntv_code_to_info = _micpower_ntv_code_to_info, +}; diff --git a/src/components/micpower/linux-micpower.h b/src/components/micpower/linux-micpower.h new file mode 100644 index 0000000..1a6ea5d --- /dev/null +++ b/src/components/micpower/linux-micpower.h @@ -0,0 +1,81 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-micpower.h + * @author James Ralph + * ralph@eecs.utk.edu + * + * @ingroup papi_components + * + * @brief Mic power component + * This file has the source code for a component that enables PAPI-C to access + * hardware monitoring sensors through a sysfs interface. This code + * will dynamically create a native events table for all the sensors that can + * be found under /sys/class/hwmon/hwmon[0-9]+. + * + * Notes: + * - Based heavily upon the lm-sensors component by Heike Jagode. + */ + +#ifndef _PAPI_MICPOWER_H_ +#define _PAPI_MICPOWER_H_ + +#include +#include + + + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ +/* this number assumes that there will never be more events than indicated */ +#define MICPOWER_MAX_COUNTERS 16 + +/** Structure that stores private information of each event */ +typedef struct { + unsigned int selector; +} MICPOWER_register_t; + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + + + +/** This structure is used to build the table of events */ +typedef struct MICPOWER_native_event_entry +{ + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + MICPOWER_register_t resources; +} MICPOWER_native_event_entry_t; + +typedef struct MICPOWER_reg_alloc +{ + MICPOWER_register_t ra_bits; +} MICPOWER_reg_alloc_t; + + +typedef struct MICPOWER_control_state +{ + long long counts[MICPOWER_MAX_COUNTERS]; // used for caching + long long lastupdate; +} MICPOWER_control_state_t; + + +typedef struct MICPOWER_context +{ + MICPOWER_control_state_t state; +} MICPOWER_context_t; + + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ + + +#endif /* _PAPI_MICPOWER_H_ */ diff --git a/src/components/micpower/tests/Makefile b/src/components/micpower/tests/Makefile new file mode 100644 index 0000000..b5df7e3 --- /dev/null +++ b/src/components/micpower/tests/Makefile @@ -0,0 +1,20 @@ +NAME=micpower +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = micpower_basic + +micpower_tests: $(TESTS) + +micpower_basic: micpower_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o micpower_basic micpower_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/micpower/tests/micpower_basic.c b/src/components/micpower/tests/micpower_basic.c new file mode 100644 index 0000000..d50acf3 --- /dev/null +++ b/src/components/micpower/tests/micpower_basic.c @@ -0,0 +1,125 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Vince Weaver + * + * test case for micpower component + * Based on coretemp test code by Vince Weaver + * + * + * @brief + * Tests basic component functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname); + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!strncmp(event_name,"micpower",8)) { + if (!TESTS_QUIET) printf("%#x %s ",code,event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!TESTS_QUIET) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + } + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + + test_skip(__FILE__,__LINE__,"No coretemp events found",0); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/mx/Rules.mx b/src/components/mx/Rules.mx new file mode 100644 index 0000000..72fb0ac --- /dev/null +++ b/src/components/mx/Rules.mx @@ -0,0 +1,7 @@ +# $Id$ + +COMPSRCS += components/mx/linux-mx.c +COMPOBJS += linux-mx.o + +linux-mx.o: components/mx/linux-mx.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/mx/linux-mx.c -o linux-mx.o \ No newline at end of file diff --git a/src/components/mx/linux-mx.c b/src/components/mx/linux-mx.c new file mode 100644 index 0000000..2da406d --- /dev/null +++ b/src/components/mx/linux-mx.c @@ -0,0 +1,547 @@ +/** + * @file linux-mx.c + * @brief A component for Myricom MX (Myrinet Express) + */ + + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" + +#include +#include +#include +#include +#include + +#define MX_MAX_COUNTERS 100 +#define MX_MAX_COUNTER_TERMS MX_MAX_COUNTERS + +#define LINELEN 128 + +typedef struct MX_register +{ + /* indicate which counters this event can live on */ + unsigned int selector; +} MX_register_t; + +typedef struct MX_native_event_entry +{ + /* description of the resources required by this native event */ + MX_register_t resources; + /* If it exists, then this is the name of this event */ + char *name; + /* If it exists, then this is the description of this event */ + char *description; +} MX_native_event_entry_t; + +typedef struct MX_reg_alloc +{ + MX_register_t ra_bits; +} MX_reg_alloc_t; + +typedef struct MX_control_state +{ + long long start_count[MX_MAX_COUNTERS]; + long long current_count[MX_MAX_COUNTERS]; + long long difference[MX_MAX_COUNTERS]; + int which_counter[MX_MAX_COUNTERS]; + int num_events; +} MX_control_state_t; + +typedef struct MX_context +{ + MX_control_state_t state; +} MX_context_t; + + +static const MX_native_event_entry_t mx_native_table[] = { + {{1, }, "LANAI_UPTIME", "Lanai uptime (seconds)"}, + {{2, }, "COUNTERS_UPTIME", "Counters uptime (seconds)"}, + {{3, }, "BAD_CRC8", "Bad CRC8 (Port 0)"}, + {{4, }, "BAD_CRC32", "Bad CRC32 (Port 0)"}, + {{5, }, "UNSTRIPPED_ROUTE", "Unstripped route (Port 0)"}, + {{6, }, "PKT_DESC_INVALID", "pkt_desc_invalid (Port 0)"}, + {{7, }, "RECV_PKT_ERRORS", "recv_pkt_errors (Port 0)"}, + {{8, }, "PKT_MISROUTED", "pkt_misrouted (Port 0)"}, + {{9, }, "DATA_SRC_UNKNOWN", "data_src_unknown"}, + {{10, }, "DATA_BAD_ENDPT", "data_bad_endpt"}, + {{11, }, "DATA_ENDPT_CLOSED", "data_endpt_closed"}, + {{12, }, "DATA_BAD_SESSION", "data_bad_session"}, + {{13, }, "PUSH_BAD_WINDOW", "push_bad_window"}, + {{14, }, "PUSH_DUPLICATE", "push_duplicate"}, + {{15, }, "PUSH_OBSOLETE", "push_obsolete"}, + {{16, }, "PUSH_RACE_DRIVER", "push_race_driver"}, + {{17, }, "PUSH_BAD_SEND_HANDLE_MAGIC", "push_bad_send_handle_magic"}, + {{18, }, "PUSH_BAD_SRC_MAGIC", "push_bad_src_magic"}, + {{19, }, "PULL_OBSOLETE", "pull_obsolete"}, + {{20, }, "PULL_NOTIFY_OBSOLETE", "pull_notify_obsolete"}, + {{21, }, "PULL_RACE_DRIVER", "pull_race_driver"}, + {{22, }, "ACK_BAD_TYPE", "ack_bad_type"}, + {{23, }, "ACK_BAD_MAGIC", "ack_bad_magic"}, + {{24, }, "ACK_RESEND_RACE", "ack_resend_race"}, + {{25, }, "LATE_ACK", "Late ack"}, + {{26, }, "ACK_NACK_FRAMES_IN_PIPE", "ack_nack_frames_in_pipe"}, + {{27, }, "NACK_BAD_ENDPT", "nack_bad_endpt"}, + {{28, }, "NACK_ENDPT_CLOSED", "nack_endpt_closed"}, + {{29, }, "NACK_BAD_SESSION", "nack_bad_session"}, + {{30, }, "NACK_BAD_RDMAWIN", "nack_bad_rdmawin"}, + {{31, }, "NACK_EVENTQ_FULL", "nack_eventq_full"}, + {{32, }, "SEND_BAD_RDMAWIN", "send_bad_rdmawin"}, + {{33, }, "CONNECT_TIMEOUT", "connect_timeout"}, + {{34, }, "CONNECT_SRC_UNKNOWN", "connect_src_unknown"}, + {{35, }, "QUERY_BAD_MAGIC", "query_bad_magic"}, + {{36, }, "QUERY_TIMED_OUT", "query_timed_out"}, + {{37, }, "QUERY_SRC_UNKNOWN", "query_src_unknown"}, + {{38, }, "RAW_SENDS", "Raw sends (Port 0)"}, + {{39, }, "RAW_RECEIVES", "Raw receives (Port 0)"}, + {{40, }, "RAW_OVERSIZED_PACKETS", "Raw oversized packets (Port 0)"}, + {{41, }, "RAW_RECV_OVERRUN", "raw_recv_overrun"}, + {{42, }, "RAW_DISABLED", "raw_disabled"}, + {{43, }, "CONNECT_SEND", "connect_send"}, + {{44, }, "CONNECT_RECV", "connect_recv"}, + {{45, }, "ACK_SEND", "ack_send (Port 0)"}, + {{46, }, "ACK_RECV", "ack_recv (Port 0)"}, + {{47, }, "PUSH_SEND", "push_send (Port 0)"}, + {{48, }, "PUSH_RECV", "push_recv (Port 0)"}, + {{49, }, "QUERY_SEND", "query_send (Port 0)"}, + {{50, }, "QUERY_RECV", "query_recv (Port 0)"}, + {{51, }, "REPLY_SEND", "reply_send (Port 0)"}, + {{52, }, "REPLY_RECV", "reply_recv (Port 0)"}, + {{53, }, "QUERY_UNKNOWN", "query_unknown (Port 0)"}, +/* {{ 54, }, "QUERY_UNKNOWN", "query_unknown (Port 0)"},*/ + {{55, }, "DATA_SEND_NULL", "data_send_null (Port 0)"}, + {{56, }, "DATA_SEND_SMALL", "data_send_small (Port 0)"}, + {{57, }, "DATA_SEND_MEDIUM", "data_send_medium (Port 0)"}, + {{58, }, "DATA_SEND_RNDV", "data_send_rndv (Port 0)"}, + {{59, }, "DATA_SEND_PULL", "data_send_pull (Port 0)"}, + {{60, }, "DATA_RECV_NULL", "data_recv_null (Port 0)"}, + {{61, }, "DATA_RECV_SMALL_INLINE", "data_recv_small_inline (Port 0)"}, + {{62, }, "DATA_RECV_SMALL_COPY", "data_recv_small_copy (Port 0)"}, + {{63, }, "DATA_RECV_MEDIUM", "data_recv_medium (Port 0)"}, + {{64, }, "DATA_RECV_RNDV", "data_recv_rndv (Port 0)"}, + {{65, }, "DATA_RECV_PULL", "data_recv_pull (Port 0)"}, + {{66, }, "ETHER_SEND_UNICAST_CNT", "ether_send_unicast_cnt (Port 0)"}, + {{67, }, "ETHER_SEND_MULTICAST_CNT", "ether_send_multicast_cnt (Port 0)"}, + {{68, }, "ETHER_RECV_SMALL_CNT", "ether_recv_small_cnt (Port 0)"}, + {{69, }, "ETHER_RECV_BIG_CNT", "ether_recv_big_cnt (Port 0)"}, + {{70, }, "ETHER_OVERRUN", "ether_overrun"}, + {{71, }, "ETHER_OVERSIZED", "ether_oversized"}, + {{72, }, "DATA_RECV_NO_CREDITS", "data_recv_no_credits"}, + {{73, }, "PACKETS_RECENT", "Packets resent"}, + {{74, }, "PACKETS_DROPPED", "Packets dropped (data send side)"}, + {{75, }, "MAPPER_ROUTES_UPDATE", "Mapper routes update"}, + {{76, }, "ROUTE_DISPERSION", "Route dispersion (Port 0)"}, + {{77, }, "OUT_OF_SEND_HANDLES", "out_of_send_handles"}, + {{78, }, "OUT_OF_PULL_HANDLES", "out_of_pull_handles"}, + {{79, }, "OUT_OF_PUSH_HANDLES", "out_of_push_handles"}, + {{80, }, "MEDIUM_CONT_RACE", "medium_cont_race"}, + {{81, }, "CMD_TYPE_UNKNOWN", "cmd_type_unknown"}, + {{82, }, "UREQ_TYPE_UNKNOWN", "ureq_type_unknown"}, + {{83, }, "INTERRUPTS_OVERRUN", "Interrupts overrun"}, + {{84, }, "WAITING_FOR_INTERRUPT_DMA", "Waiting for interrupt DMA"}, + {{85, }, "WAITING_FOR_INTERRUPT_ACK", "Waiting for interrupt Ack"}, + {{86, }, "WAITING_FOR_INTERRUPT_TIMER", "Waiting for interrupt Timer"}, + {{87, }, "SLABS_RECYCLING", "Slabs recycling"}, + {{88, }, "SLABS_PRESSURE", "Slabs pressure"}, + {{89, }, "SLABS_STARVATION", "Slabs starvation"}, + {{90, }, "OUT_OF_RDMA_HANDLES", "out_of_rdma handles"}, + {{91, }, "EVENTQ_FULL", "eventq_full"}, + {{92, }, "BUFFER_DROP", "buffer_drop (Port 0)"}, + {{93, }, "MEMORY_DROP", "memory_drop (Port 0)"}, + {{94, }, "HARDWARE_FLOW_CONTROL", "Hardware flow control (Port 0)"}, + {{95, }, "SIMULATED_PACKETS_LOST", "(Devel) Simulated packets lost (Port 0)"}, + {{96, }, "LOGGING_FRAMES_DUMPED", "(Logging) Logging frames dumped"}, + {{97, }, "WAKE_INTERRUPTS", "Wake interrupts"}, + {{98, }, "AVERTED_WAKEUP_RACE", "Averted wakeup race"}, + {{99, }, "DMA_METADATA_RACE", "Dma metadata race"}, + {{0, }, "", ""} +}; + +static int num_events=0; +papi_vector_t _mx_vector; + +static char mx_counters_exe[BUFSIZ]; + +static int +read_mx_counters( long long *counters ) +{ + FILE *fp; + char line[LINELEN]; + int i, linenum; + + /* Open a pipe to the mx_counters executable */ + + fp = popen( mx_counters_exe, "r" ); + if ( !fp ) { + perror( "popen" ); + return PAPI_ECMP; + } + + + /* A line of output looks something similar to: */ + /* " Lanai uptime (seconds): 766268 (0xbb13c)" */ + + /* This code may fail if number of ports on card > 1 */ + + linenum = 0; + while ( fgets( line, LINELEN, fp ) ) { + // printf("%s",line); + for(i=0; line[i]!= '\0' && i=MX_MAX_COUNTERS) break; + } + + pclose( fp ); + + return PAPI_OK; +} + + + +/* + * Component setup and shutdown + */ + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_mx_init_component( int cidx ) +{ + + FILE *fff; + char test_string[BUFSIZ]; + + /* detect if MX available */ + + strncpy(mx_counters_exe,"mx_counters 2> /dev/null",BUFSIZ); + fff=popen(mx_counters_exe,"r"); + /* popen only returns NULL if "sh" fails, not the actual command */ + if (fgets(test_string,BUFSIZ,fff)==NULL) { + pclose(fff); + strncpy(mx_counters_exe,"./components/mx/utils/fake_mx_counters 2> /dev/null",BUFSIZ); + fff=popen(mx_counters_exe,"r"); + if (fgets(test_string,BUFSIZ,fff)==NULL) { + pclose(fff); + /* neither real nor fake found */ + strncpy(_mx_vector.cmp_info.disabled_reason, + "No MX utilities found",PAPI_MAX_STR_LEN); + return PAPI_ECMP; + } + } + pclose(fff); + + num_events=MX_MAX_COUNTERS; + _mx_vector.cmp_info.num_native_events=num_events; + + /* Export the component id */ + _mx_vector.cmp_info.CmpIdx = cidx; + + + return PAPI_OK; +} + + +/* + * This is called whenever a thread is initialized + */ +static int +_mx_init_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; /*unused */ + return PAPI_OK; +} + + +static int +_mx_shutdown_component(void) +{ + return PAPI_OK; +} + +static int +_mx_shutdown_thread( hwd_context_t * ctx ) +{ + ( void ) ctx; /*unused */ + return PAPI_OK; +} + + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_mx_init_control_state( hwd_control_state_t *ctl ) +{ + ( void ) ctl; /*unused */ + + return PAPI_OK; +} + +static int +_mx_update_control_state( hwd_control_state_t *ctl, NativeInfo_t *native, + int count, hwd_context_t *ctx ) +{ + ( void ) ctx; /*unused */ + int i, index; + + MX_control_state_t *mx_ctl = (MX_control_state_t *)ctl; + + for(i=0; iwhich_counter[i]=index; + // printf("Mapping event# %d to HW counter %d (count=%d)\n", + // i,index,count); + native[i].ni_position = i; + } + + mx_ctl->num_events=count; + + return PAPI_OK; +} + + +static int +_mx_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + long long mx_counters[MX_MAX_COUNTERS]; + + ( void ) ctx; /*unused */ + + MX_control_state_t *mx_ctl = (MX_control_state_t *)ctl; + int i; + + read_mx_counters( mx_counters ); + + // for(i=0;inum_events;i++) { + mx_ctl->current_count[i]= + mx_counters[mx_ctl->which_counter[i]]; + mx_ctl->start_count[i]=mx_ctl->current_count[i]; + } + + return PAPI_OK; +} + + +static int +_mx_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; /*unused */ + + long long mx_counters[MX_MAX_COUNTERS]; + MX_control_state_t *mx_ctl = (MX_control_state_t *)ctl; + int i; + + read_mx_counters( mx_counters ); + + for(i=0;inum_events;i++) { + mx_ctl->current_count[i]= + mx_counters[mx_ctl->which_counter[i]]; + } + + return PAPI_OK; +} + +static int +_mx_read( hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, + int flags ) +{ + ( void ) ctx; /*unused */ + ( void ) flags; /*unused */ + int i; + long long mx_counters[MX_MAX_COUNTERS]; + + MX_control_state_t *mx_ctl = (MX_control_state_t *)ctl; + + read_mx_counters( mx_counters ); + + for ( i = 0; i < mx_ctl->num_events; i++ ) { + mx_ctl->current_count[i]= + mx_counters[mx_ctl->which_counter[i]]; + mx_ctl->difference[i] = mx_ctl->current_count[i]- + mx_ctl->start_count[i]; + } + *events = mx_ctl->difference; + + return PAPI_OK; +} + + +static int +_mx_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + _mx_start( ctx, ctrl ); + return PAPI_OK; +} + +/* Unused write function */ +/* static int */ +/* _mx_write( hwd_context_t * ctx, hwd_control_state_t * ctrl, long long *from ) */ +/* { */ +/* ( void ) ctx; /\*unused *\/ */ +/* ( void ) ctrl; /\*unused *\/ */ +/* ( void ) from; /\*unused *\/ */ + +/* return PAPI_OK; */ +/* } */ + +/* + * Functions for setting up various options + */ + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_mx_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + ( void ) ctx; /*unused */ + ( void ) code; /*unused */ + ( void ) option; /*unused */ + + return PAPI_OK; +} + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_mx_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + ( void ) cntrl; /*unused */ + if ( PAPI_DOM_ALL != domain ) { + return PAPI_EINVAL; + } + + return PAPI_OK; +} + + + +static int +_mx_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, mx_native_table[event].name, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; + + +} + +static int +_mx_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, mx_native_table[event].description, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + + + +static int +_mx_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + if ( modifier == PAPI_ENUM_FIRST ) { + if (num_events==0) return PAPI_ENOEVNT; + *EventCode = 0; + return PAPI_OK; + } + + if ( modifier == PAPI_ENUM_EVENTS ) { + int index = *EventCode; + + if ( mx_native_table[index + 1].resources.selector ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + } + + return PAPI_EINVAL; +} + + +papi_vector_t _mx_vector = { + .cmp_info = { + .name = "mx", + .short_name = "mx", + .version = "1.4", + .description = "Myricom MX (Myrinet Express) statistics", + .num_mpx_cntrs = MX_MAX_COUNTERS, + .num_cntrs = MX_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( MX_context_t ), + .control_state = sizeof ( MX_control_state_t ), + .reg_value = sizeof ( MX_register_t ), + .reg_alloc = sizeof ( MX_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_thread = _mx_init_thread, + .init_component = _mx_init_component, + .init_control_state = _mx_init_control_state, + .start = _mx_start, + .stop = _mx_stop, + .read = _mx_read, + .shutdown_thread = _mx_shutdown_thread, + .shutdown_component = _mx_shutdown_component, + .ctl = _mx_ctl, + .update_control_state = _mx_update_control_state, + .set_domain = _mx_set_domain, + .reset = _mx_reset, + + .ntv_enum_events = _mx_ntv_enum_events, + .ntv_code_to_name = _mx_ntv_code_to_name, + .ntv_code_to_descr = _mx_ntv_code_to_descr, +}; diff --git a/src/components/mx/tests/Makefile b/src/components/mx/tests/Makefile new file mode 100644 index 0000000..4713080 --- /dev/null +++ b/src/components/mx/tests/Makefile @@ -0,0 +1,25 @@ +NAME=mx +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = mx_basic mx_elapsed + +mx_tests: $(TESTS) + +mx_basic: mx_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o mx_basic mx_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +mx_elapsed: mx_elapsed.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o mx_elapsed mx_elapsed.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/mx/tests/mx_basic.c b/src/components/mx/tests/mx_basic.c new file mode 100644 index 0000000..8a81273 --- /dev/null +++ b/src/components/mx/tests/mx_basic.c @@ -0,0 +1,131 @@ +/** + * @author Vince Weaver + * + * test case for mx myrinet component + * + * + * @brief + * Tests basic mx myrinet functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf("Trying all MX events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"mx")) { + if (!quiet) printf("\tFound Myrinet component %d - %s\n", cid, cmpinfo->name); + } + else { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!quiet) printf(" %s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!quiet) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No myrinet events found",0); + } + + if (!quiet) { + printf("Note: for this test the values are expected to all be 0 as no I/O happens during the test.\n"); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/mx/tests/mx_elapsed.c b/src/components/mx/tests/mx_elapsed.c new file mode 100644 index 0000000..d9d0495 --- /dev/null +++ b/src/components/mx/tests/mx_elapsed.c @@ -0,0 +1,138 @@ +/** + * @author Vince Weaver + * + * test case for mx myrinet component + * + * + * @brief + * Tests basic mx myrinet functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 3 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp,our_cmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + const PAPI_component_info_t *cmpinfo = NULL; + int quiet=0; + + /* Set quiet variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf("Trying mutiple reads in MX component\n"); + } + + numcmp = PAPI_num_components(); + our_cmp=-1; + + for(cid=0; cidname,"mx")) { + if (!quiet) printf("\tFound Myrinet component %d - %s\n", cid, cmpinfo->name); + our_cmp=cid; + break; + } + + } + + if (our_cmp<0) { + test_skip(__FILE__, __LINE__,"MX component not found\n", 0); + } + + if (cmpinfo->num_native_events<=0) { + test_skip(__FILE__, __LINE__,"MX component not found\n", 0); + } + + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval=PAPI_event_name_to_code("mx:::COUNTERS_UPTIME",&code); + if (retval!=PAPI_OK) { + test_fail(__FILE__, __LINE__, + "could not add event COUNTERS_UPTIME",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval=PAPI_event_name_to_code("mx:::PUSH_OBSOLETE",&code); + if (retval!=PAPI_OK) { + test_fail(__FILE__, __LINE__, + "could not add event PUSH_OBSOLETE",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval=PAPI_event_name_to_code("mx:::PKT_MISROUTED",&code); + if (retval!=PAPI_OK) { + test_fail(__FILE__, __LINE__, + "could not add event PKT_MISROUTED",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_read( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_read()",retval); + } + + if (!quiet) printf("%lld %lld %lld\n",values[0],values[1],values[2]); + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!quiet) printf("%lld %lld %lld\n",values[0],values[1],values[2]); + + + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/mx/utils/fake_mx_counters.c b/src/components/mx/utils/fake_mx_counters.c new file mode 100644 index 0000000..f043411 --- /dev/null +++ b/src/components/mx/utils/fake_mx_counters.c @@ -0,0 +1,136 @@ +#include + +/* This program fakes the output of the mx_counters util */ +/* for testing purposes */ + +#define MAX_LABELS 101 + +char labels[MAX_LABELS][100]={ +" Lanai uptime (seconds)", +" Counters uptime (seconds)", +" Bad CRC8 (Port 0)", +" Bad CRC32 (Port 0)", +" Unstripped route (Port 0)", +" pkt_desc_invalid (Port 0)", +" recv_pkt_errors (Port 0)", +" pkt_misrouted (Port 0)", +" data_src_unknown", +" data_bad_endpt", +" data_endpt_closed", +" data_bad_session", +" push_bad_window", +" push_duplicate", +" push_obsolete", +" push_race_driver", +" push_bad_send_handle_magic", +" push_bad_src_magic", +" pull_obsolete", +" pull_notify_obsolete", +" pull_race_driver", +" pull_notify_race", +" ack_bad_type", +" ack_bad_magic", +" ack_resend_race", +" Late ack", +" ack_nack_frames_in_pipe", +" nack_bad_endpt", +" nack_endpt_closed", +" nack_bad_session", +" nack_bad_rdmawin", +" nack_eventq_full", +" send_bad_rdmawin", +" connect_timeout", +" connect_src_unknown", +" query_bad_magic", +" query_timed_out", +" query_src_unknown", +" Raw sends (Port 0)", +" Raw receives (Port 0)", +" Raw oversized packets (Port 0)", +" raw_recv_overrun", +" raw_disabled", +" connect_send", +" connect_recv", +" ack_send (Port 0)", +" ack_recv (Port 0)", +" push_send (Port 0)", +" push_recv (Port 0)", +" query_send (Port 0)", +" query_recv (Port 0)", +" reply_send (Port 0)", +" reply_recv (Port 0)", +" query_unknown (Port 0)", +" query_unknown (Port 0)", +" data_send_null (Port 0)", +" data_send_small (Port 0)", +" data_send_medium (Port 0)", +" data_send_rndv (Port 0)", +" data_send_pull (Port 0)", +" data_recv_null (Port 0)", +" data_recv_small_inline (Port 0)", +" data_recv_small_copy (Port 0)", +" data_recv_medium (Port 0)", +" data_recv_rndv (Port 0)", +" data_recv_pull (Port 0)", +" ether_send_unicast_cnt (Port 0)", +" ether_send_multicast_cnt (Port 0)", +" ether_recv_small_cnt (Port 0)", +" ether_recv_big_cnt (Port 0)", +" ether_overrun", +" ether_oversized", +" data_recv_no_credits", +" Packets resent", +" Packets dropped (data send side)", +" Mapper routes update", +" Route dispersion (Port 0)", +" out_of_send_handles", +" out_of_pull_handles", +" out_of_push_handles", +" medium_cont_race", +" cmd_type_unknown", +" ureq_type_unknown", +" Interrupts overrun", +" Waiting for interrupt DMA", +" Waiting for interrupt Ack", +" Waiting for interrupt Timer", +" Slabs recycling", +" Slabs pressure", +" Slabs starvation", +" out_of_rdma handles", +" eventq_full", +" buffer_drop (Port 0)", +" memory_drop (Port 0)", +" Hardware flow control (Port 0)", +"(Devel) Simulated packets lost (Port 0)", +" (Logging) Logging frames dumped", +" Wake interrupts", +" Averted wakeup race", +" Dma metadata race", +" foo", +}; + +int main(int argc, char **argv) { + + int i,multiplier=1; + + FILE *fff; + + fff=fopen("state","r"); + if (fff!=NULL) { + fscanf(fff,"%d",&multiplier); + fclose(fff); + } + + fff=fopen("state","w"); + if (fff!=NULL) { + fprintf(fff,"%d\n",multiplier+1); + fclose(fff); + } + + printf("1 ports\n"); + for(i=0;i.rx.bytes", + ".rx.packets", + ".rx.errors", + ".rx.dropped", + ".rx.fifo", + ".rx.frame", + ".rx.compressed", + ".rx.multicast", + ".tx.bytes", + ".tx.packets", + ".tx.errors", + ".tx.dropped", + ".tx.fifo", + ".tx.colls", + ".tx.carrier", + ".tx.compressed" + + By default the Linux kernel only updates the network statistics + once every second (see the references listed in the "SEE ALSO" + section for some problems you may come across and for how to + change the default polling period). + + Note: The Linux network statistics are updated by code that + resides in the file net/core/dev.c. + +AUTHOR + + Initial written by Haihang You . + Re-written by Jose Pedro Oliveira in order + to source data directly from /proc/net/dev. + +SEE ALSO + + * Network Stats Anomaly + http://collectl.sourceforge.net/NetworkStats.html + + * Occasionally corrupted network stats in /proc/net/dev + http://kerneltrap.org/mailarchive/linux-netdev/2008/1/14/566936 + http://kerneltrap.org/mailarchive/linux-netdev/2008/1/14/567512 + + +# 2011-11-05 jpo +# vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/net/Rules.net b/src/components/net/Rules.net new file mode 100644 index 0000000..45f5d04 --- /dev/null +++ b/src/components/net/Rules.net @@ -0,0 +1,8 @@ +# $Id$ + +COMPSRCS += components/net/linux-net.c +COMPOBJS += linux-net.o + +linux-net.o: components/net/linux-net.c components/net/linux-net.h $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/net/linux-net.c -o $@ + diff --git a/src/components/net/linux-net.c b/src/components/net/linux-net.c new file mode 100644 index 0000000..7388aa3 --- /dev/null +++ b/src/components/net/linux-net.c @@ -0,0 +1,691 @@ +/** + * @file linux-net.c + * + * @author Haihang You + * you@cs.utk.edu + * + * @author Jose Pedro Oliveira + * jpo@di.uminho.pt + * + * @ingroup papi_components + * + * @brief net component + * This file contains the source code for a component that enables + * PAPI-C to access network statistics through the /proc file system. + * This component will dynamically create a native events table for + * all the interfaces listed in /proc/net/dev (16 entries for each + * interface). + */ + + +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "linux-net.h" + + +papi_vector_t _net_vector; + +/********************************************************************* + * Private + ********************************************************************/ + +/* Network stats refresh latency in usec (default: 1 sec) */ +#define NET_REFRESH_LATENCY 1000000 + +#define NET_PROC_FILE "/proc/net/dev" + +/* /proc/net/dev line size + * interface name + 8 RX counters + 8 TX counters + separators + */ +#define NET_PROC_MAX_LINE (IFNAMSIZ + 16 * (20 + 1) + 16) + +#define NET_INVALID_RESULT -1 + + +static NET_native_event_entry_t * _net_native_events=NULL; + +static int num_events = 0; +static int is_initialized = 0; + +static long long _net_register_start[NET_MAX_COUNTERS]; +static long long _net_register_current[NET_MAX_COUNTERS]; + +/* temporary event */ +struct temp_event { + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + struct temp_event *next; +}; +static struct temp_event* root = NULL; + +/* /proc/net/dev: network counters by interface */ +#define NET_INTERFACE_COUNTERS 16 + +static const struct net_counters { + char *name; + char *description; +} _net_counter_info[NET_INTERFACE_COUNTERS] = { + /* Receive */ + { "rx:bytes", "receive bytes"}, + { "rx:packets", "receive packets"}, + { "rx:errors", "receive errors"}, + { "rx:dropped", "receive dropped"}, + { "rx:fifo", "receive fifo"}, + { "rx:frame", "receive frame"}, + { "rx:compressed", "receive compressed"}, + { "rx:multicast", "receive multicast"}, + /* Transmit */ + { "tx:bytes", "transmit bytes"}, + { "tx:packets", "transmit packets"}, + { "tx:errors", "transmit errors"}, + { "tx:dropped", "transmit dropped"}, + { "tx:fifo", "transmit fifo"}, + { "tx:colls", "transmit colls"}, + { "tx:carrier", "transmit carrier"}, + { "tx:compressed", "transmit compressed"}, +}; + + +/********************************************************************* + *** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT **** + ********************************************************************/ + +/* + * find all network interfaces listed in /proc/net/dev + */ +static int +generateNetEventList( void ) +{ + FILE *fin; + char line[NET_PROC_MAX_LINE]; + char *retval, *ifname; + int count = 0; + struct temp_event *temp; + struct temp_event *last = NULL; + int i, j; + + fin = fopen(NET_PROC_FILE, "r"); + if (fin == NULL) { + SUBDBG("Can't find %s, are you sure the /proc file-system is mounted?\n", + NET_PROC_FILE); + return 0; + } + + /* skip the 2 header lines */ + for (i=0; i<2; i++) { + retval = fgets (line, NET_PROC_MAX_LINE, fin); + if (retval == NULL) { + fclose(fin); + SUBDBG("Not enough lines in %s\n", NET_PROC_FILE); + return 0; + } + } + + while ((fgets (line, NET_PROC_MAX_LINE, fin)) == line) { + + /* split the interface name from the 16 counters */ + retval = strstr(line, ":"); + if (retval == NULL) { + SUBDBG("Wrong line format <%s>\n", line); + continue; + } + + *retval = '\0'; + ifname = line; + while (isspace(*ifname)) { ifname++; } + + for (j=0; jnext = NULL; + + if (root == NULL) { + root = temp; + } else if (last) { + last->next = temp; + } else { + free(temp); + fclose(fin); + PAPIERROR("This shouldn't be possible\n"); + return PAPI_ECMP; + } + last = temp; + + snprintf(temp->name, PAPI_MAX_STR_LEN, "%s:%s", + ifname, _net_counter_info[j].name); + snprintf(temp->description, PAPI_MAX_STR_LEN, "%s %s", + ifname, _net_counter_info[j].description); + + count++; + } + } + + fclose(fin); + + return count; +} + + +static int +getInterfaceBaseIndex(const char *ifname) +{ + int i; + + for ( i=0; i\n", line); + continue; + } + + *retval = '\0'; + data = retval + 1; + ifname = line; + while (isspace(*ifname)) { ifname++; } + + if_bidx = getInterfaceBaseIndex(ifname); + if (if_bidx < 0) { + SUBDBG("Interface <%s> not found\n", ifname); + } else { + nf = sscanf( data, + "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + &values[if_bidx + 0], &values[if_bidx + 1], + &values[if_bidx + 2], &values[if_bidx + 3], + &values[if_bidx + 4], &values[if_bidx + 5], + &values[if_bidx + 6], &values[if_bidx + 7], + &values[if_bidx + 8], &values[if_bidx + 9], + &values[if_bidx + 10], &values[if_bidx + 11], + &values[if_bidx + 12], &values[if_bidx + 13], + &values[if_bidx + 14], &values[if_bidx + 15]); + + SUBDBG("\nRead " + "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + values[if_bidx + 0], values[if_bidx + 1], + values[if_bidx + 2], values[if_bidx + 3], + values[if_bidx + 4], values[if_bidx + 5], + values[if_bidx + 6], values[if_bidx + 7], + values[if_bidx + 8], values[if_bidx + 9], + values[if_bidx + 10], values[if_bidx + 11], + values[if_bidx + 12], values[if_bidx + 13], + values[if_bidx + 14], values[if_bidx + 15]); + + if ( nf != NET_INTERFACE_COUNTERS ) { + /* This shouldn't happen */ + SUBDBG("/proc line with wrong number of fields\n"); + } + } + + } + + fclose(fin); + + return 0; +} + + +/********************************************************************* + *************** BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ********* + *********************************************************************/ + +/* + * This is called whenever a thread is initialized + */ +static int +_net_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +static int +_net_init_component( int cidx ) +{ + int i = 0; + struct temp_event *t, *last; + + if ( is_initialized ) + return PAPI_OK; + + memset(_net_register_start, 0, + NET_MAX_COUNTERS*sizeof(_net_register_start[0])); + memset(_net_register_current, 0, + NET_MAX_COUNTERS*sizeof(_net_register_current[0])); + + is_initialized = 1; + + /* The network interfaces are listed in /proc/net/dev */ + num_events = generateNetEventList(); + + if ( num_events < 0 ) /* PAPI errors */ + return num_events; + + if ( num_events == 0 ) /* No network interfaces found */ + return PAPI_OK; + + t = root; + _net_native_events = (NET_native_event_entry_t*) + papi_malloc(sizeof(NET_native_event_entry_t) * num_events); + do { + strncpy(_net_native_events[i].name, t->name, PAPI_MAX_STR_LEN-1); + _net_native_events[i].name[PAPI_MAX_STR_LEN-1] = '\0'; + strncpy(_net_native_events[i].description, t->description, PAPI_MAX_STR_LEN-1); + _net_native_events[i].description[PAPI_MAX_STR_LEN-1] = '\0'; + _net_native_events[i].resources.selector = i + 1; + last = t; + t = t->next; + papi_free(last); + i++; + } while (t != NULL); + root = NULL; + + /* Export the total number of events available */ + _net_vector.cmp_info.num_native_events = num_events; + + /* Export the component id */ + _net_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int +_net_init_control_state( hwd_control_state_t *ctl ) +{ + ( void ) ctl; + + return PAPI_OK; +} + + +static int +_net_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + + NET_control_state_t *net_ctl = (NET_control_state_t *) ctl; + long long now = PAPI_get_real_usec(); + + read_net_counters(_net_register_start); + memcpy(_net_register_current, _net_register_start, + NET_MAX_COUNTERS * sizeof(_net_register_start[0])); + + /* set initial values to 0 */ + memset(net_ctl->values, 0, NET_MAX_COUNTERS*sizeof(net_ctl->values[0])); + + /* Set last access time for caching purposes */ + net_ctl->lastupdate = now; + + return PAPI_OK; +} + + +static int +_net_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long ** events, int flags ) +{ + (void) flags; + (void) ctx; + + NET_control_state_t *net_ctl = (NET_control_state_t *) ctl; + long long now = PAPI_get_real_usec(); + int i; + + /* Caching + * Only read new values from /proc if enough time has passed + * since the last read. + */ + if ( now - net_ctl->lastupdate > NET_REFRESH_LATENCY ) { + read_net_counters(_net_register_current); + for ( i=0; ivalues[i] = _net_register_current[i] - _net_register_start[i]; + } + net_ctl->lastupdate = now; + } + *events = net_ctl->values; + + return PAPI_OK; +} + + +static int +_net_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + + NET_control_state_t *net_ctl = (NET_control_state_t *) ctl; + long long now = PAPI_get_real_usec(); + int i; + + read_net_counters(_net_register_current); + for ( i=0; ivalues[i] = _net_register_current[i] - _net_register_start[i]; + } + net_ctl->lastupdate = now; + + return PAPI_OK; +} + + +/* + * Thread shutdown + */ +static int +_net_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + +/* + * Clean up what was setup in net_init_component(). + */ +static int +_net_shutdown_component( void ) +{ + if ( is_initialized ) + { + is_initialized = 0; + if (_net_native_events != NULL) + { + papi_free(_net_native_events); + _net_native_events = NULL; + } + } + + return PAPI_OK; +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and + * PAPI_SET_INHERIT + */ +static int +_net_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +static int +_net_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, int count, hwd_context_t *ctx ) +{ + ( void ) ctx; + ( void ) ctl; + + int i, index; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + native[i].ni_position = _net_native_events[index].resources.selector - 1; + } + + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_net_set_domain( hwd_control_state_t *ctl, int domain ) +{ + ( void ) ctl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +int +_net_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_net_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + + switch ( modifier ) { + case PAPI_ENUM_FIRST: + if (num_events==0) { + return PAPI_ENOEVNT; + } + *EventCode = 0; + return PAPI_OK; + break; + + case PAPI_ENUM_EVENTS: + index = *EventCode; + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + break; + } + return PAPI_EINVAL; +} + + +/* + * + */ +static int +_net_ntv_name_to_code( const char *name, unsigned int *EventCode ) +{ + int i; + + for ( i=0; i= 0 && index < num_events ) { + strncpy( name, _net_native_events[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_net_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _net_native_events[index].description, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_net_ntv_code_to_bits( unsigned int EventCode, hwd_register_t *bits ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + memcpy( ( NET_register_t * ) bits, + &( _net_native_events[index].resources ), + sizeof ( NET_register_t ) ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +papi_vector_t _net_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "net", + .short_name = "net", + .version = "4.2.1", + .description = "Linux network driver statistics", + .num_mpx_cntrs = NET_MAX_COUNTERS, + .num_cntrs = NET_MAX_COUNTERS, + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( NET_context_t ), + .control_state = sizeof ( NET_control_state_t ), + .reg_value = sizeof ( NET_register_t ), + .reg_alloc = sizeof ( NET_reg_alloc_t ), + }, + + /* function pointers in this component */ + .init_thread = _net_init_thread, + .init_component = _net_init_component, + .init_control_state = _net_init_control_state, + .start = _net_start, + .stop = _net_stop, + .read = _net_read, + .shutdown_thread = _net_shutdown_thread, + .shutdown_component = _net_shutdown_component, + .ctl = _net_ctl, + + .update_control_state = _net_update_control_state, + .set_domain = _net_set_domain, + .reset = _net_reset, + + .ntv_enum_events = _net_ntv_enum_events, + .ntv_name_to_code = _net_ntv_name_to_code, + .ntv_code_to_name = _net_ntv_code_to_name, + .ntv_code_to_descr = _net_ntv_code_to_descr, + .ntv_code_to_bits = _net_ntv_code_to_bits, +}; + +/* vim:set ts=4 sw=4 sts=4 et: */ diff --git a/src/components/net/linux-net.h b/src/components/net/linux-net.h new file mode 100644 index 0000000..d615b94 --- /dev/null +++ b/src/components/net/linux-net.h @@ -0,0 +1,84 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file linux-net.h + * CVS: $Id$ + * + * @author Haihang You + * you@cs.utk.edu + * + * @author Jose Pedro Oliveira + * jpo@di.uminho.pt + * + * @ingroup papi_components + * + * @brief net component + * This file contains the source code for a component that enables + * PAPI-C to access network statistics through the /proc file system. + * This component will dynamically create a native events table for + * all the interfaces listed in /proc/net/dev (16 entries for each + * interface). + */ + +#ifndef _PAPI_NET_H +#define _PAPI_NET_H + +#include + +/************************* DEFINES SECTION *********************************** + *******************************************************************************/ +/* this number assumes that there will never be more events than indicated + * 20 INTERFACES * 16 COUNTERS = 320 */ +#define NET_MAX_COUNTERS 320 + +/** Structure that stores private information of each event */ +typedef struct NET_register +{ + /* This is used by the framework.It likes it to be !=0 to do somehting */ + unsigned int selector; +} NET_register_t; + + +/* + * The following structures mimic the ones used by other components. It is more + * convenient to use them like that as programming with PAPI makes specific + * assumptions for them. + */ + + +/** This structure is used to build the table of events */ +typedef struct NET_native_event_entry +{ + NET_register_t resources; + char name[PAPI_MAX_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; +} NET_native_event_entry_t; + + +typedef struct NET_reg_alloc +{ + NET_register_t ra_bits; +} NET_reg_alloc_t; + + +typedef struct NET_control_state +{ + long long values[NET_MAX_COUNTERS]; // used for caching + long long lastupdate; +} NET_control_state_t; + + +typedef struct NET_context +{ + NET_control_state_t state; +} NET_context_t; + + +/************************* GLOBALS SECTION *********************************** + *******************************************************************************/ + +#endif /* _PAPI_NET_H */ + +/* vim:set ts=4 sw=4 sts=4 et: */ diff --git a/src/components/net/tests/Makefile b/src/components/net/tests/Makefile new file mode 100644 index 0000000..e1b939b --- /dev/null +++ b/src/components/net/tests/Makefile @@ -0,0 +1,23 @@ +NAME=net +include ../../Makefile_comp_tests.target + +TESTS = net_list_events net_values_by_code net_values_by_name + +net_tests: $(TESTS) + + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +net_list_events: net_list_events.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_list_events.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +net_values_by_code: net_values_by_code.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_values_by_code.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +net_values_by_name: net_values_by_name.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o $@ net_values_by_name.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + diff --git a/src/components/net/tests/net_list_events.c b/src/components/net/tests/net_list_events.c new file mode 100644 index 0000000..69be654 --- /dev/null +++ b/src/components/net/tests/net_list_events.c @@ -0,0 +1,91 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-net component + * + * @brief + * List all net events codes and names + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int total_events=0; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Listing all net events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname, "net") == NULL) { + continue; + } + + if (!TESTS_QUIET) { + printf("Component %d (%d) - %d events - %s\n", + cid, cmpinfo->CmpIdx, + cmpinfo->num_native_events, cmpinfo->name); + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %s\n", code, event_name); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No net events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/net/tests/net_values_by_code.c b/src/components/net/tests/net_values_by_code.c new file mode 100644 index 0000000..7b17d9c --- /dev/null +++ b/src/components/net/tests/net_values_by_code.c @@ -0,0 +1,139 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-net component + * + * @brief + * Prints the value of every net event (by code) + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define IFNAME "lo" +#define PINGADDR "127.0.0.1" + +int main (int argc, char **argv) +{ + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long value; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all net events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidnum_native_events, cmpinfo->name); + } + + if ( strstr(cmpinfo->name, "net") == NULL) { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) { + printf("%#x %-24s = ", code, event_name); + } + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset()", retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_add_event()", retval); + } + + retval = PAPI_start( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + if (strcmp(event_name, IFNAME ".rx.packets") == 0) { + /* generate some traffic + * the operation should take more than one second in order + * to guarantee that the network counters are updated */ + retval = system("ping -c 4 " PINGADDR " > /dev/null"); + if (retval < 0) { + test_fail(__FILE__, __LINE__, "Unable to start ping", retval); + } + } + + retval = PAPI_stop( EventSet, &value ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()", retval); + } + + if (!TESTS_QUIET) printf("%lld\n", value); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset()", retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset()", retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No net events found", 0); + } + + test_pass( __FILE__ ); + + return 0; +} + +// vim:set ai ts=4 sw=4 sts=4 et: diff --git a/src/components/net/tests/net_values_by_name.c b/src/components/net/tests/net_values_by_name.c new file mode 100644 index 0000000..bfafa87 --- /dev/null +++ b/src/components/net/tests/net_values_by_name.c @@ -0,0 +1,119 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Jose Pedro Oliveira + * + * test case for the linux-net component + * + * @brief + * Prints the values of several net events specified by names + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +/* +#define IFNAME "eth0" +*/ +#define IFNAME "lo" +#define PINGADDR "127.0.0.1" + +#define NUM_EVENTS 4 + +int main (int argc, char **argv) +{ + int i, retval; + int EventSet = PAPI_NULL; + char *event_name[NUM_EVENTS] = { + IFNAME ":rx:bytes", + IFNAME ":rx:packets", + IFNAME ":tx:bytes", + IFNAME ":tx:packets", + }; + int event_code[NUM_EVENTS] = { 0, 0, 0, 0}; + long long event_value[NUM_EVENTS]; + int total_events=0; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Net events by name\n"); + } + + /* Map names to codes */ + for ( i=0; i /dev/null"); + if (retval < 0) { + test_fail(__FILE__, __LINE__, "Unable to start ping", retval); + } + + retval = PAPI_stop( EventSet, event_value ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()", retval); + } + + if (!TESTS_QUIET) { + for ( i=0; i/src/components/nvml + % ./configure --with-nvml-libdir=> --with-nvml-incdir= --with-cuda-dir= + +For example, one configuration may look like this + %./configure --with-nvml-libdir=/usr/lib64/nvidia --with-nvml-incdir=/usr/local/cuda/include --with-cuda-dir=/usr/local/cuda + +The NVML component is added to PAPI during the configuration of PAPI +by adding the '--with-components=nvml' command line option to +configure. + + % ./configure --with-components="nvml" + +At build-time the nVidia compiler, nvcc, needs to be in your path, as does the cuda run-time library (libcudart.so). + +Please refer to http://developer.download.nvidia.com/assets/cuda/files/CUDADownloads/NVML/nvml.pdf +for details about NVML library. + + + +Note: Power Limiting using NVML (aka power capping) requires root. + +PAPI has added support for power limiting using NVML (on supported +devices from the Kepler family or later). The executable needs to +have root permissions to change the power limits on the device. + +The power_management_limit can be written to set a limit (in +milliWatts) to the power consumption by DEVICE. The value that can +be written needs to be between the +power_management_limit_constraint_min and +power_management_limit_constraint_max. + +nvml:::DEVICE:power_management_limit +nvml:::DEVICE:power_management_limit_constraint_min +nvml:::DEVICE:power_management_limit_constraint_max + +A test for writing of the power_management_limit can be found in the +nvml/tests. diff --git a/src/components/nvml/Rules.nvml b/src/components/nvml/Rules.nvml new file mode 100644 index 0000000..f914b48 --- /dev/null +++ b/src/components/nvml/Rules.nvml @@ -0,0 +1,10 @@ +include components/nvml/Makefile.nvml + +COMPSRCS += components/nvml/linux-nvml.c +COMPOBJS += linux-nvml.o +CFLAGS += -I$(NVML_INCDIR) -I$(CUDA_DIR)/include +LDFLAGS += $(LDL) + +linux-nvml.o: components/nvml/linux-nvml.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/nvml/linux-nvml.c -o linux-nvml.o + diff --git a/src/components/nvml/configure b/src/components/nvml/configure new file mode 100755 index 0000000..f07690a --- /dev/null +++ b/src/components/nvml/configure @@ -0,0 +1,3983 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.59. +# +# Copyright (C) 2003 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_config_libobj_dir=. +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#if HAVE_SYS_TYPES_H +# include +#endif +#if HAVE_SYS_STAT_H +# include +#endif +#if STDC_HEADERS +# include +# include +#else +# if HAVE_STDLIB_H +# include +# endif +#endif +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include +# endif +# include +#endif +#if HAVE_STRINGS_H +# include +#endif +#if HAVE_INTTYPES_H +# include +#else +# if HAVE_STDINT_H +# include +# endif +#endif +#if HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP NVML_INCDIR NVML_LIBDIR CUDA_DIR LIBOBJS LTLIBOBJS' +ac_subst_files='' + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 + { (exit 1); exit 1; }; } +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CPP_set=${CPP+set} +ac_env_CPP_value=$CPP +ac_cv_env_CPP_set=${CPP+set} +ac_cv_env_CPP_value=$CPP + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-nvml-dir= Specify prefix to nvml libraries and headers + --with-nvml-incdir= Specify directory of nvml header files (nvml.h) in a specific location + --with-nvml-libdir= Specify directory of nvml library (libnvidia-ml.so) in a specific location +--with-cuda-dir= Specify path to cuda root directory + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have + headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd $ac_popdir + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF + +Copyright (C) 2003 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_sep= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------- ## +## Output files. ## +## ------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h | sort + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + + + +# Check whether --with-nvml_dir or --without-nvml_dir was given. +if test "${with_nvml_dir+set}" = set; then + withval="$with_nvml_dir" + nvml_dir=$withval +fi; + +# Check whether --with-nvml_incdir or --without-nvml_incdir was given. +if test "${with_nvml_incdir+set}" = set; then + withval="$with_nvml_incdir" + nvml_incdir=$withval +fi; + +# Check whether --with-nvml_libdir or --without-nvml_libdir was given. +if test "${with_nvml_libdir+set}" = set; then + withval="$with_nvml_libdir" + nvml_libdir=$withval +fi; + +nvml_dotest=0 +if test "x$nvml_dir" != "x"; then + nvml_incdir="$nvml_dir/include" + nvml_libdir="$nvml_dir/lib64" + nvml_dotest=1 +else + if test "x$nvml_libdir" != "x"; then + LIBS="-L$nvml_libdir" + nvml_dotest=1 + fi + if test "x$nvml_incdir" != "x"; then + CPPFLAGS="-I$nvml_incdir" + nvml_dotest=1 + fi +fi + +CFLAGS="$CFLAGS -I$nvml_incdir" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version &5\"") >&5 + (eval $ac_compiler --version &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v &5\"") >&5 + (eval $ac_compiler -v &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V &5\"") >&5 + (eval $ac_compiler -V &5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext + break;; + * ) + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6 +if test "${ac_cv_prog_egrep+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if echo a | (grep -E '(a|b)') >/dev/null 2>&1 + then ac_cv_prog_egrep='grep -E' + else ac_cv_prog_egrep='egrep' + fi +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 +echo "${ECHO_T}$ac_cv_prog_egrep" >&6 + EGREP=$ac_cv_prog_egrep + + +echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_header_stdc=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6 +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +if test "${ac_cv_header_nvml_h+set}" = set; then + echo "$as_me:$LINENO: checking for nvml.h" >&5 +echo $ECHO_N "checking for nvml.h... $ECHO_C" >&6 +if test "${ac_cv_header_nvml_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: $ac_cv_header_nvml_h" >&5 +echo "${ECHO_T}$ac_cv_header_nvml_h" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking nvml.h usability" >&5 +echo $ECHO_N "checking nvml.h usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking nvml.h presence" >&5 +echo $ECHO_N "checking nvml.h presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: nvml.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: nvml.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: nvml.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: nvml.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: nvml.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: nvml.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: nvml.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: nvml.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: nvml.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: nvml.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: nvml.h: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for nvml.h" >&5 +echo $ECHO_N "checking for nvml.h... $ECHO_C" >&6 +if test "${ac_cv_header_nvml_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_nvml_h=$ac_header_preproc +fi +echo "$as_me:$LINENO: result: $ac_cv_header_nvml_h" >&5 +echo "${ECHO_T}$ac_cv_header_nvml_h" >&6 + +fi +if test $ac_cv_header_nvml_h = yes; then + : +else + { { echo "$as_me:$LINENO: error: NVML component: nvml.h not found: use configure flags to set the path " >&5 +echo "$as_me: error: NVML component: nvml.h not found: use configure flags to set the path " >&2;} + { (exit 1); exit 1; }; } +fi + + +LDFLAGS="$LDFLAGS -L$nvml_libdir -Wl,-rpath,$nvml_libdir" + +echo "$as_me:$LINENO: checking for nvmlInit in -lnvidia-ml" >&5 +echo $ECHO_N "checking for nvmlInit in -lnvidia-ml... $ECHO_C" >&6 +if test "${ac_cv_lib_nvidia_ml_nvmlInit+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnvidia-ml $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char nvmlInit (); +int +main () +{ +nvmlInit (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_nvidia_ml_nvmlInit=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_nvidia_ml_nvmlInit=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_nvidia_ml_nvmlInit" >&5 +echo "${ECHO_T}$ac_cv_lib_nvidia_ml_nvmlInit" >&6 +if test $ac_cv_lib_nvidia_ml_nvmlInit = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBNVIDIA_ML 1 +_ACEOF + + LIBS="-lnvidia-ml $LIBS" + +else + { { echo "$as_me:$LINENO: error: NVML component: libnvidia-ml.so not found: use configure flags to set the path" >&5 +echo "$as_me: error: NVML component: libnvidia-ml.so not found: use configure flags to set the path" >&2;} + { (exit 1); exit 1; }; } +fi + +NVML_INCDIR=$nvml_incdir +NVML_LIBDIR=$nvml_libdir + + +# Check whether --with-cuda-dir or --without-cuda-dir was given. +if test "${with_cuda_dir+set}" = set; then + withval="$with_cuda_dir" + CUDA_DIR=$withval + LDFLAGS="$LDFLAGS -L$withval/lib64 -Wl,-rpath,$withval/lib64" + +echo "$as_me:$LINENO: checking for cudaMalloc in -lcudart" >&5 +echo $ECHO_N "checking for cudaMalloc in -lcudart... $ECHO_C" >&6 +if test "${ac_cv_lib_cudart_cudaMalloc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char cudaMalloc (); +int +main () +{ +cudaMalloc (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_cudart_cudaMalloc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_cudart_cudaMalloc=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_cudart_cudaMalloc" >&5 +echo "${ECHO_T}$ac_cv_lib_cudart_cudaMalloc" >&6 +if test $ac_cv_lib_cudart_cudaMalloc = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBCUDART 1 +_ACEOF + + LIBS="-lcudart $LIBS" + +else + { { echo "$as_me:$LINENO: error: CUDA cudart library not found!" >&5 +echo "$as_me: error: CUDA cudart library not found!" >&2;} + { (exit 1); exit 1; }; } +fi + +else + { { echo "$as_me:$LINENO: error: Component requires path to cuda library." >&5 +echo "$as_me: error: Component requires path to cuda library." >&2;} + { (exit 1); exit 1; }; } +fi; + + ac_config_files="$ac_config_files Makefile.nvml" + + + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then we branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +cat >confdef2opt.sed <<\_ACEOF +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\),-D\1=\2,g +t quote +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\),-D\1=\2,g +t quote +d +: quote +s,[ `~#$^&*(){}\\|;'"<>?],\\&,g +s,\[,\\&,g +s,\],\\&,g +s,\$,$$,g +p +_ACEOF +# We use echo to avoid assuming a particular line-breaking character. +# The extra dot is to prevent the shell from consuming trailing +# line-breaks from the sub-command output. A line-break within +# single-quotes doesn't work because, if this script is created in a +# platform that uses two characters for line-breaks (e.g., DOS), tr +# would break. +ac_LF_and_DOT=`echo; echo .` +DEFS=`sed -n -f confdef2opt.sed confdefs.h | tr "$ac_LF_and_DOT" ' .'` +rm -f confdef2opt.sed + + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to ." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF + + + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "Makefile.nvml" ) CONFIG_FILES="$CONFIG_FILES Makefile.nvml" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@CPP@,$CPP,;t t +s,@EGREP@,$EGREP,;t t +s,@NVML_INCDIR@,$NVML_INCDIR,;t t +s,@NVML_LIBDIR@,$NVML_LIBDIR,;t t +s,@CUDA_DIR@,$CUDA_DIR,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + diff --git a/src/components/nvml/configure.in b/src/components/nvml/configure.in new file mode 100644 index 0000000..7f2d5a4 --- /dev/null +++ b/src/components/nvml/configure.in @@ -0,0 +1,50 @@ +# Process this file with autoconf to produce a configure script. +# File: components/nvml/configure.in + +AC_INIT + +AC_ARG_WITH(nvml_dir, + [ --with-nvml-dir= Specify prefix to nvml libraries and headers ], + [nvml_dir=$withval]) +AC_ARG_WITH(nvml_incdir, + [ --with-nvml-incdir= Specify directory of nvml header files (nvml.h) in a specific location ], + [nvml_incdir=$withval]) +AC_ARG_WITH(nvml_libdir, + [ --with-nvml-libdir= Specify directory of nvml library (libnvidia-ml.so) in a specific location ], + [nvml_libdir=$withval]) + +nvml_dotest=0 +if test "x$nvml_dir" != "x"; then + nvml_incdir="$nvml_dir/include" + nvml_libdir="$nvml_dir/lib64" + nvml_dotest=1 +else + if test "x$nvml_libdir" != "x"; then + LIBS="-L$nvml_libdir" + nvml_dotest=1 + fi + if test "x$nvml_incdir" != "x"; then + CPPFLAGS="-I$nvml_incdir" + nvml_dotest=1 + fi +fi + +CFLAGS="$CFLAGS -I$nvml_incdir" +AC_CHECK_HEADER( [nvml.h], [], [AC_MSG_ERROR([NVML component: nvml.h not found: use configure flags to set the path ])], ) +LDFLAGS="$LDFLAGS -L$nvml_libdir -Wl,-rpath,$nvml_libdir" +AC_CHECK_LIB( [nvidia-ml], [nvmlInit], [], [AC_MSG_ERROR([NVML component: libnvidia-ml.so not found: use configure flags to set the path])] ) +NVML_INCDIR=$nvml_incdir +NVML_LIBDIR=$nvml_libdir + +AC_ARG_WITH(cuda-dir, + [--with-cuda-dir= Specify path to cuda root directory], + [ CUDA_DIR=$withval + LDFLAGS="$LDFLAGS -L$withval/lib64 -Wl,-rpath,$withval/lib64" + AC_CHECK_LIB( [cudart], [cudaMalloc], [], [AC_MSG_ERROR([CUDA cudart library not found!])])], + [ AC_MSG_ERROR([Component requires path to cuda library.])]) + +AC_CONFIG_FILES([Makefile.nvml]) +AC_SUBST(NVML_INCDIR) +AC_SUBST(NVML_LIBDIR) +AC_SUBST(CUDA_DIR) +AC_OUTPUT diff --git a/src/components/nvml/linux-nvml.c b/src/components/nvml/linux-nvml.c new file mode 100644 index 0000000..e6003b5 --- /dev/null +++ b/src/components/nvml/linux-nvml.c @@ -0,0 +1,1689 @@ +/**************************** +THIS IS OPEN SOURCE CODE + +Part of the PAPI software library. Copyright (c) 2005 - 2017, +Innovative Computing Laboratory, Dept of Electrical Engineering & +Computer Science University of Tennessee, Knoxville, TN. + +The open source software license conforms to the 2-clause BSD License +template. + +****************************/ + +/** + * @file linux-nvml.c + * @author Kiran Kumar Kasichayanula + * kkasicha@utk.edu + * @author James Ralph + * ralph@eecs.utk.edu + * @ingroup papi_components + * + * @brief This is an NVML component, it demos the component interface + * and implements a number of counters from the Nvidia Management + * Library. Please refer to NVML documentation for details about + * nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature. Power is + * reported in mW and temperature in Celcius. The counter + * descriptions should contain the units that the measurement + * returns. + */ +#include + +#include +#include +#include +#include +#include +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#include "linux-nvml.h" + +#include "nvml.h" +#include "cuda.h" +#include "cuda_runtime_api.h" + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); + +/***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK ***** + * This is done so that a version of PAPI built with the nvml component can * + * be installed on a system which does not have the cuda libraries installed. * + * * + * If this is done without these prototypes, then all papi services on the * + * system without the cuda libraries installed will fail. The PAPI libraries * + * contain references to the cuda libraries which are not installed. The * + * load of PAPI commands fails because the cuda library references can not be * + * resolved. * + * * + * This also defines pointers to the cuda library functions that we call. * + * These function pointers will be resolved with dlopen/dlsym calls at * + * component initialization time. The component then calls the cuda library * + * functions through these function pointers. * + ********************************************************************************/ +#undef CUDAAPI +#define CUDAAPI __attribute__((weak)) +CUresult CUDAAPI cuInit(unsigned int); + +CUresult(*cuInitPtr)(unsigned int); + +#undef CUDARTAPI +#define CUDARTAPI __attribute__((weak)) +cudaError_t CUDARTAPI cudaGetDevice(int *); +cudaError_t CUDARTAPI cudaGetDeviceCount(int *); +cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int); + +cudaError_t (*cudaGetDevicePtr)(int *); +cudaError_t (*cudaGetDeviceCountPtr)(int *); +cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int); + +#undef DECLDIR +#define DECLDIR __attribute__((weak)) +nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t, nvmlClockType_t, unsigned int *); +const char* DECLDIR nvmlErrorString(nvmlReturn_t); +nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t, unsigned int *); +nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t, nvmlMemory_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t, nvmlPstates_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t, unsigned int *); +nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *); +nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *); +nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t, nvmlUtilization_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int, nvmlDevice_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t, nvmlPciInfo_t *); +nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t, char *, unsigned int); +nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int); +nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *); +nvmlReturn_t DECLDIR nvmlInit(void); +nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *); +nvmlReturn_t DECLDIR nvmlShutdown(void); +nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int* limit); +nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit); +nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit); + +nvmlReturn_t (*nvmlDeviceGetClockInfoPtr)(nvmlDevice_t, nvmlClockType_t, unsigned int *); +char* (*nvmlErrorStringPtr)(nvmlReturn_t); +nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *); +nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr)(nvmlDevice_t, unsigned int *); +nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr)(nvmlDevice_t, nvmlMemory_t *); +nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr)(nvmlDevice_t, nvmlPstates_t *); +nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr)(nvmlDevice_t, unsigned int *); +nvmlReturn_t (*nvmlDeviceGetTemperaturePtr)(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *); +nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *); +nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr)(nvmlDevice_t, nvmlUtilization_t *); +nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr)(unsigned int, nvmlDevice_t *); +nvmlReturn_t (*nvmlDeviceGetPciInfoPtr)(nvmlDevice_t, nvmlPciInfo_t *); +nvmlReturn_t (*nvmlDeviceGetNamePtr)(nvmlDevice_t, char *, unsigned int); +nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr)(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int); +nvmlReturn_t (*nvmlDeviceGetEccModePtr)(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *); +nvmlReturn_t (*nvmlInitPtr)(void); +nvmlReturn_t (*nvmlDeviceGetCountPtr)(unsigned int *); +nvmlReturn_t (*nvmlShutdownPtr)(void); +nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int* limit); +nvmlReturn_t (*nvmlDeviceSetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int limit); +nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit); + +// file handles used to access cuda libraries with dlopen +static void* dl1 = NULL; +static void* dl2 = NULL; +static void* dl3 = NULL; + +static int linkCudaLibraries(); + +/* Declare our vector in advance */ +papi_vector_t _nvml_vector; + +/* upto 25 events per card how many cards per system should we allow for?! */ +#define NVML_MAX_COUNTERS 100 + +/** Holds control flags. Usually there's one of these per event-set. + * Usually this is out-of band configuration of the hardware + */ +typedef struct nvml_control_state { + int num_events; + int which_counter[NVML_MAX_COUNTERS]; + long long counter[NVML_MAX_COUNTERS]; /**< Copy of counts, holds results when stopped */ +} nvml_control_state_t; + +/** Holds per-thread information */ +typedef struct nvml_context { + nvml_control_state_t state; +} nvml_context_t; + +/** This table contains the native events */ +static nvml_native_event_entry_t *nvml_native_table = NULL; + +/** Number of devices detected at component_init time */ +static int device_count = 0; + +/** number of events in the table*/ +static int num_events = 0; + +static nvmlDevice_t* devices = NULL; +static int* features = NULL; +static unsigned int *power_management_initial_limit = NULL; +static unsigned int *power_management_limit_constraint_min = NULL; +static unsigned int *power_management_limit_constraint_max = NULL; + +unsigned long long +getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one) +{ + unsigned int ret = 0; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + + return (unsigned long long)ret; +} + +unsigned long long +getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one) +{ + nvmlEccErrorCounts_t counts; + + nvmlReturn_t bad; + bad = (*nvmlDeviceGetDetailedEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + switch (which_one) { + case LOCAL_ECC_REGFILE: + return counts.registerFile; + case LOCAL_ECC_L1: + return counts.l1Cache; + case LOCAL_ECC_L2: + return counts.l2Cache; + case LOCAL_ECC_MEM: + return counts.deviceMemory; + default: + ; + } + return (unsigned long long) - 1; +} + +unsigned long long +getFanSpeed(nvmlDevice_t dev) +{ + unsigned int ret = 0; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetFanSpeedPtr)(dev, &ret); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + return (unsigned long long)ret; +} + +unsigned long long +getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one) +{ + unsigned int ret = 0; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + return (unsigned long long) ret; +} + +unsigned long long +getMemoryInfo(nvmlDevice_t dev, int which_one) +{ + nvmlMemory_t meminfo; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetMemoryInfoPtr)(dev, &meminfo); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + + switch (which_one) { + case MEMINFO_TOTAL_MEMORY: + return meminfo.total; + case MEMINFO_UNALLOCED: + return meminfo.free; + case MEMINFO_ALLOCED: + return meminfo.used; + default: + ; + } + return (unsigned long long) - 1; +} + +unsigned long long +getPState(nvmlDevice_t dev) +{ + unsigned int ret = 0; + nvmlPstates_t state = NVML_PSTATE_15; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetPerformanceStatePtr)(dev, &state); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + switch (state) { + case NVML_PSTATE_15: + ret++; + case NVML_PSTATE_14: + ret++; + case NVML_PSTATE_13: + ret++; + case NVML_PSTATE_12: + ret++; + case NVML_PSTATE_11: + ret++; + case NVML_PSTATE_10: + ret++; + case NVML_PSTATE_9: + ret++; + case NVML_PSTATE_8: + ret++; + case NVML_PSTATE_7: + ret++; + case NVML_PSTATE_6: + ret++; + case NVML_PSTATE_5: + ret++; + case NVML_PSTATE_4: + ret++; + case NVML_PSTATE_3: + ret++; + case NVML_PSTATE_2: + ret++; + case NVML_PSTATE_1: + ret++; + case NVML_PSTATE_0: + break; + case NVML_PSTATE_UNKNOWN: + default: + /* This should never happen? + * The API docs just state Unknown performance state... */ + return (unsigned long long) - 1; + } + return (unsigned long long)ret; +} + +unsigned long long +getPowerUsage(nvmlDevice_t dev) +{ + unsigned int power; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetPowerUsagePtr)(dev, &power); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + return (unsigned long long) power; +} + +unsigned long long +getTemperature(nvmlDevice_t dev) +{ + unsigned int ret = 0; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetTemperaturePtr)(dev, NVML_TEMPERATURE_GPU, &ret); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + return (unsigned long long)ret; +} + +unsigned long long +getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits) +{ + unsigned long long counts = 0; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetTotalEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + return counts; +} + +/* 0 => gpu util + 1 => memory util + */ +unsigned long long +getUtilization(nvmlDevice_t dev, int which_one) +{ + nvmlUtilization_t util; + nvmlReturn_t bad; + bad = (*nvmlDeviceGetUtilizationRatesPtr)(dev, &util); + + if (NVML_SUCCESS != bad) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); + } + + switch (which_one) { + case GPU_UTILIZATION: + return (unsigned long long) util.gpu; + case MEMORY_UTILIZATION: + return (unsigned long long) util.memory; + default: + ; + } + + return (unsigned long long) - 1; +} + +unsigned long long getPowerManagementLimit(nvmlDevice_t dev) +{ + unsigned int limit; + nvmlReturn_t rv; + rv = (*nvmlDeviceGetPowerManagementLimitPtr)(dev, &limit); + if (NVML_SUCCESS != rv) { + SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(rv)); + return (unsigned long long) 0; + } + return (unsigned long long) limit; +} + +static void +nvml_hardware_reset() +{ + /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while + * possible to implement a reset on the ECC counters, we pass */ + /* + for ( i=0; i < device_count; i++ ) + nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); + */ + int i; + nvmlReturn_t ret; + unsigned int templimit = 0; + for (i = 0; i < device_count; i++) { + if (HAS_FEATURE(features[i], FEATURE_POWER_MANAGEMENT)) { + // if power management is available + if (power_management_initial_limit[i] != 0) { + ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit); + if ((ret == NVML_SUCCESS) && (templimit != power_management_initial_limit[i])) { + SUBDBG("Reset power_management_limit on device %d to initial value of %d \n", i, power_management_initial_limit[i]); + // if power is not at its initial value + // reset to initial value + ret = (*nvmlDeviceSetPowerManagementLimitPtr)(devices[i], power_management_initial_limit[i]); + if (ret != NVML_SUCCESS) + SUBDBG("Unable to reset the NVML power management limit on device %i to %ull (return code %d) \n", i, power_management_initial_limit[i] , ret); + } + } + } + } +} + +/** Code that reads event values. */ +/* You might replace this with code that accesses */ +/* hardware or reads values from the operatings system. */ +static int +nvml_hardware_read(long long *value, int which_one) +//, nvml_context_t *ctx) +{ + nvml_native_event_entry_t *entry; + nvmlDevice_t handle; + int cudaIdx = -1; + + entry = &nvml_native_table[which_one]; + *value = (long long) - 1; + /* replace entry->resources with the current cuda_device->nvml device */ + (*cudaGetDevicePtr)(&cudaIdx); + + if (cudaIdx < 0 || cudaIdx > device_count) + return PAPI_EINVAL; + + /* Make sure the device we are running on has the requested event */ + if (!HAS_FEATURE(features[cudaIdx] , entry->type)) + return PAPI_EINVAL; + + handle = devices[cudaIdx]; + + switch (entry->type) { + case FEATURE_CLOCK_INFO: + *value = getClockSpeed(handle, (nvmlClockType_t)entry->options.clock); + break; + case FEATURE_ECC_LOCAL_ERRORS: + *value = getEccLocalErrors(handle, + (nvmlEccBitType_t)entry->options.ecc_opts.bits, + (int)entry->options.ecc_opts.which_one); + break; + case FEATURE_FAN_SPEED: + *value = getFanSpeed(handle); + break; + case FEATURE_MAX_CLOCK: + *value = getMaxClockSpeed(handle, + (nvmlClockType_t)entry->options.clock); + break; + case FEATURE_MEMORY_INFO: + *value = getMemoryInfo(handle, + (int)entry->options.which_one); + break; + case FEATURE_PERF_STATES: + *value = getPState(handle); + break; + case FEATURE_POWER: + *value = getPowerUsage(handle); + break; + case FEATURE_TEMP: + *value = getTemperature(handle); + break; + case FEATURE_ECC_TOTAL_ERRORS: + *value = getTotalEccErrors(handle, + (nvmlEccBitType_t)entry->options.ecc_opts.bits); + break; + case FEATURE_UTILIZATION: + *value = getUtilization(handle, + (int)entry->options.which_one); + break; + case FEATURE_POWER_MANAGEMENT: + *value = getPowerManagementLimit(handle); + break; + + case FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN: + *value = power_management_limit_constraint_min[cudaIdx]; + break; + + case FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX: + *value = power_management_limit_constraint_max[cudaIdx]; + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_OK; +} + +/** Code that reads event values. */ +/* You might replace this with code that accesses */ +/* hardware or reads values from the operatings system. */ +static int nvml_hardware_write(long long *value, int which_one) +{ + nvml_native_event_entry_t *entry; + nvmlDevice_t handle; + int cudaIdx = -1; + nvmlReturn_t nvret; + + entry = &nvml_native_table[which_one]; + /* replace entry->resources with the current cuda_device->nvml device */ + (*cudaGetDevicePtr)(&cudaIdx); + + if (cudaIdx < 0 || cudaIdx > device_count) + return PAPI_EINVAL; + + /* Make sure the device we are running on has the requested event */ + if (!HAS_FEATURE(features[cudaIdx] , entry->type)) + return PAPI_EINVAL; + + handle = devices[cudaIdx]; + + switch (entry->type) { + case FEATURE_POWER_MANAGEMENT: { + unsigned int setToPower = (unsigned int) * value; + if (setToPower < power_management_limit_constraint_min[cudaIdx]) { + SUBDBG("Error: Desired power %u mW < minimum %u mW on device %d\n", setToPower, power_management_limit_constraint_min[cudaIdx], cudaIdx); + return PAPI_EINVAL; + } + if (setToPower > power_management_limit_constraint_max[cudaIdx]) { + SUBDBG("Error: Desired power %u mW > maximum %u mW on device %d\n", setToPower, power_management_limit_constraint_max[cudaIdx], cudaIdx); + return PAPI_EINVAL; + } + if ((nvret = (*nvmlDeviceSetPowerManagementLimitPtr)(handle, setToPower)) != NVML_SUCCESS) { + SUBDBG("Error: %s\n", (*nvmlErrorStringPtr)(nvret)); + return PAPI_EINVAL; + } + } + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_OK; +} + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + +/** This is called whenever a thread is initialized */ +int +_papi_nvml_init_thread(hwd_context_t * ctx) +{ + (void) ctx; + + SUBDBG("Enter: ctx: %p\n", ctx); + + return PAPI_OK; +} + +static int +detectDevices() +{ + nvmlReturn_t ret; + nvmlEnableState_t mode = NVML_FEATURE_DISABLED; + + char name[64]; + char inforomECC[16]; + char inforomPower[16]; + char names[device_count][64]; + + float ecc_version = 0.0; + float power_version = 0.0; + + int i = 0; + int isTesla = 0; + int isFermi = 0; + + unsigned int temp = 0; + + memset(names, 0x0, device_count * 64); + + /* So for each card, check whats querable */ + for (i = 0; i < device_count; i++) { + isTesla = 0; + isFermi = 1; + features[i] = 0; + + ret = (*nvmlDeviceGetHandleByIndexPtr)(i, &devices[i]); + if (NVML_SUCCESS != ret) { + SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", i, i); + return PAPI_ESYS; + } + + ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1); + if (NVML_SUCCESS != ret) { + SUBDBG("nvmlDeviceGetName failed \n"); + strncpy(name, "deviceNameUnknown", 17); + } + + name[sizeof(name) - 1] = '\0'; // to safely use strstr operation below, the variable 'name' must be null terminated + + ret = (*nvmlDeviceGetInforomVersionPtr)(devices[i], NVML_INFOROM_ECC, inforomECC, 16); + if (NVML_SUCCESS != ret) { + SUBDBG("nvmlGetInforomVersion fails %s\n", (*nvmlErrorStringPtr)(ret)); + isFermi = 0; + } + ret = (*nvmlDeviceGetInforomVersionPtr)(devices[i], NVML_INFOROM_POWER, inforomPower, 16); + if (NVML_SUCCESS != ret) { + /* This implies the card is older then Fermi */ + SUBDBG("nvmlGetInforomVersion fails %s\n", (*nvmlErrorStringPtr)(ret)); + SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n"); + isFermi = 0; + } + + ecc_version = strtof(inforomECC, NULL); + power_version = strtof(inforomPower, NULL); + + isTesla = (NULL == strstr(name, "Tesla")) ? 0 : 1; + + /* For Tesla and Quadro products from Fermi and Kepler families. */ + if (isFermi) { + features[i] |= FEATURE_CLOCK_INFO; + num_events += 3; + } + + /* For Tesla and Quadro products from Fermi and Kepler families. + requires NVML_INFOROM_ECC 2.0 or higher for location-based counts + requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts + requires ECC mode to be enabled. */ + ret = (*nvmlDeviceGetEccModePtr)(devices[i], &mode, NULL); + if (NVML_SUCCESS == ret) { + if (NVML_FEATURE_ENABLED == mode) { + if (ecc_version >= 2.0) { + features[i] |= FEATURE_ECC_LOCAL_ERRORS; + num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */ + } + if (ecc_version >= 1.0) { + features[i] |= FEATURE_ECC_TOTAL_ERRORS; + num_events += 2; /* single bit errors, double bit errors */ + } + } + } else { + SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml return code %d)\n", ret); + } + + /* For all discrete products with dedicated fans */ + features[i] |= FEATURE_FAN_SPEED; + num_events++; + + /* For Tesla and Quadro products from Fermi and Kepler families. */ + if (isFermi) { + features[i] |= FEATURE_MAX_CLOCK; + num_events += 3; + } + + /* For all products */ + features[i] |= FEATURE_MEMORY_INFO; + num_events += 3; /* total, free, used */ + + /* For Tesla and Quadro products from the Fermi and Kepler families. */ + if (isFermi) { + features[i] |= FEATURE_PERF_STATES; + num_events++; + } + + /* For "GF11x" Tesla and Quadro products from the Fermi family + requires NVML_INFOROM_POWER 3.0 or higher + For Tesla and Quadro products from the Kepler family + does not require NVML_INFOROM_POWER */ + /* Just try reading power, if it works, enable it*/ + ret = (*nvmlDeviceGetPowerUsagePtr)(devices[i], &temp); + if (NVML_SUCCESS == ret) { + features[i] |= FEATURE_POWER; + num_events++; + } else { + SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on this card. (nvml return code %d)\n", ret); + } + + /* For all discrete and S-class products. */ + features[i] |= FEATURE_TEMP; + num_events++; + + // For power_management_limit + { + // Just try the call to see if it works + unsigned int templimit = 0; + ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit); + if (ret == NVML_SUCCESS && templimit > 0) { + power_management_initial_limit[i] = templimit; + features[i] |= FEATURE_POWER_MANAGEMENT; + num_events += 1; + } else { + power_management_initial_limit[i] = 0; + SUBDBG("nvmlDeviceGetPowerManagementLimit not appear to be supported on this card. (NVML code %d)\n", ret); + } + } + + // For power_management_limit_constraints, minimum and maximum + { + unsigned int minLimit = 0, maxLimit = 0; + ret = (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(devices[i], &minLimit, &maxLimit); + if (ret == NVML_SUCCESS) { + power_management_limit_constraint_min[i] = minLimit; + features[i] |= FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN; + num_events += 1; + power_management_limit_constraint_max[i] = maxLimit; + features[i] |= FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX; + num_events += 1; + } else { + power_management_limit_constraint_min[i] = 0; + power_management_limit_constraint_max[i] = INT_MAX; + } + SUBDBG("Done nvmlDeviceGetPowerManagementLimitConstraintsPtr\n"); + } + + /* For Tesla and Quadro products from the Fermi and Kepler families */ + if (isFermi) { + features[i] |= FEATURE_UTILIZATION; + num_events += 2; + } + + int retval = snprintf(names[i], sizeof(name), "%s:device:%d", name, i); + if (retval > (int)sizeof(name)) { + SUBDBG("Device name is too long %s:device%d", name, i); + return (PAPI_EINVAL); + } + names[i][sizeof(name) - 1] = '\0'; + } + return PAPI_OK; +} + +static void +createNativeEvents() +{ + char name[64]; + char sanitized_name[PAPI_MAX_STR_LEN]; + char names[device_count][64]; + + int i, nameLen = 0, j; + + nvml_native_event_entry_t* entry; + nvmlReturn_t ret; + + nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( + sizeof(nvml_native_event_entry_t) * num_events); + memset(nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events); + entry = &nvml_native_table[0]; + + for (i = 0; i < device_count; i++) { + memset(names[i], 0x0, 64); + ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1); + if (NVML_SUCCESS != ret) { + SUBDBG("nvmlDeviceGetName failed \n"); + strncpy(name, "deviceNameUnknown", 17); + } + name[sizeof(name) - 1] = '\0'; // to safely use strlen operation below, the variable 'name' must be null terminated + + nameLen = strlen(name); + strncpy(sanitized_name, name, PAPI_MAX_STR_LEN); + + int retval = snprintf(sanitized_name, sizeof(name), "%s:device_%d", name, i); + if (retval > (int)sizeof(name)) { + SUBDBG("Device name is too long %s:device%d", name, i); + return; + } + sanitized_name[sizeof(name) - 1] = '\0'; + + for (j = 0; j < nameLen; j++) + if (' ' == sanitized_name[j]) + sanitized_name[j] = '_'; + + if (HAS_FEATURE(features[i], FEATURE_CLOCK_INFO)) { + sprintf(entry->name, "%s:graphics_clock", sanitized_name); + strncpy(entry->description, "Graphics clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_GRAPHICS; + entry->type = FEATURE_CLOCK_INFO; + entry++; + + sprintf(entry->name, "%s:sm_clock", sanitized_name); + strncpy(entry->description, "SM clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_SM; + entry->type = FEATURE_CLOCK_INFO; + entry++; + + sprintf(entry->name, "%s:memory_clock", sanitized_name); + strncpy(entry->description, "Memory clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_MEM; + entry->type = FEATURE_CLOCK_INFO; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_ECC_LOCAL_ERRORS)) { + sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name); + strncpy(entry->description, "L1 cache single bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_SINGLE_BIT_ECC, + .which_one = LOCAL_ECC_L1, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name); + strncpy(entry->description, "L2 cache single bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_SINGLE_BIT_ECC, + .which_one = LOCAL_ECC_L2, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name); + strncpy(entry->description, "Device memory single bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_SINGLE_BIT_ECC, + .which_one = LOCAL_ECC_MEM, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name); + strncpy(entry->description, "Register file single bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_SINGLE_BIT_ECC, + .which_one = LOCAL_ECC_REGFILE, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name); + strncpy(entry->description, "L1 cache double bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_DOUBLE_BIT_ECC, + .which_one = LOCAL_ECC_L1, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name); + strncpy(entry->description, "L2 cache double bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_DOUBLE_BIT_ECC, + .which_one = LOCAL_ECC_L2, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name); + strncpy(entry->description, "Device memory double bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_DOUBLE_BIT_ECC, + .which_one = LOCAL_ECC_MEM, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name); + strncpy(entry->description, "Register file double bit ECC", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_DOUBLE_BIT_ECC, + .which_one = LOCAL_ECC_REGFILE, + }; + entry->type = FEATURE_ECC_LOCAL_ERRORS; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_FAN_SPEED)) { + sprintf(entry->name, "%s:fan_speed", sanitized_name); + strncpy(entry->description, "The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN); + entry->type = FEATURE_FAN_SPEED; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_MAX_CLOCK)) { + sprintf(entry->name, "%s:graphics_max_clock", sanitized_name); + strncpy(entry->description, "Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_GRAPHICS; + entry->type = FEATURE_MAX_CLOCK; + entry++; + + sprintf(entry->name, "%s:sm_max_clock", sanitized_name); + strncpy(entry->description, "Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_SM; + entry->type = FEATURE_MAX_CLOCK; + entry++; + + sprintf(entry->name, "%s:memory_max_clock", sanitized_name); + strncpy(entry->description, "Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN); + entry->options.clock = NVML_CLOCK_MEM; + entry->type = FEATURE_MAX_CLOCK; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_MEMORY_INFO)) { + sprintf(entry->name, "%s:total_memory", sanitized_name); + strncpy(entry->description, "Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN); + entry->options.which_one = MEMINFO_TOTAL_MEMORY; + entry->type = FEATURE_MEMORY_INFO; + entry++; + + sprintf(entry->name, "%s:unallocated_memory", sanitized_name); + strncpy(entry->description, "Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN); + entry->options.which_one = MEMINFO_UNALLOCED; + entry->type = FEATURE_MEMORY_INFO; + entry++; + + sprintf(entry->name, "%s:allocated_memory", sanitized_name); + strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN); + entry->options.which_one = MEMINFO_ALLOCED; + entry->type = FEATURE_MEMORY_INFO; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_PERF_STATES)) { + sprintf(entry->name, "%s:pstate", sanitized_name); + strncpy(entry->description, "The performance state of the device.", PAPI_MAX_STR_LEN); + entry->type = FEATURE_PERF_STATES; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_POWER)) { + sprintf(entry->name, "%s:power", sanitized_name); + // set the power event units value to "mW" for miliwatts + strncpy(entry->units, "mW", PAPI_MIN_STR_LEN); + strncpy(entry->description, "Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN); + entry->type = FEATURE_POWER; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_TEMP)) { + sprintf(entry->name, "%s:temperature", sanitized_name); + strncpy(entry->description, "Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN); + entry->type = FEATURE_TEMP; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_ECC_TOTAL_ERRORS)) { + sprintf(entry->name, "%s:total_ecc_errors", sanitized_name); + strncpy(entry->description, "Total single bit errors.", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_SINGLE_BIT_ECC, + }; + entry->type = FEATURE_ECC_TOTAL_ERRORS; + entry++; + + sprintf(entry->name, "%s:total_ecc_errors", sanitized_name); + strncpy(entry->description, "Total double bit errors.", PAPI_MAX_STR_LEN); + entry->options.ecc_opts = (struct local_ecc) { + .bits = NVML_DOUBLE_BIT_ECC, + }; + entry->type = FEATURE_ECC_TOTAL_ERRORS; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_UTILIZATION)) { + sprintf(entry->name, "%s:gpu_utilization", sanitized_name); + strncpy(entry->description, "Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN); + entry->options.which_one = GPU_UTILIZATION; + entry->type = FEATURE_UTILIZATION; + entry++; + + sprintf(entry->name, "%s:memory_utilization", sanitized_name); + strncpy(entry->description, "Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN); + entry->options.which_one = MEMORY_UTILIZATION; + entry->type = FEATURE_UTILIZATION; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_POWER_MANAGEMENT)) { + sprintf(entry->name, "%s:power_management_limit", sanitized_name); + // set the power event units value to "mW" for milliwatts + strncpy(entry->units, "mW", PAPI_MIN_STR_LEN); + strncpy(entry->description, "Power management limit in milliwatts associated with the device. The power limit defines the upper boundary for the cards power draw. If the cards total power draw reaches this limit the power management algorithm kicks in. This should be writable (with appropriate privileges) on supported Kepler or later (unit milliWatts). ", PAPI_MAX_STR_LEN); + entry->type = FEATURE_POWER_MANAGEMENT; + entry++; + } + if (HAS_FEATURE(features[i], FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN)) { + sprintf(entry->name, "%s:power_management_limit_constraint_min", sanitized_name); + strncpy(entry->units, "mW", PAPI_MIN_STR_LEN); + strncpy(entry->description, "The minimum power management limit in milliwatts.", PAPI_MAX_STR_LEN); + entry->type = FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN; + entry++; + } + + if (HAS_FEATURE(features[i], FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX)) { + sprintf(entry->name, "%s:power_management_limit_constraint_max", sanitized_name); + strncpy(entry->units, "mW", PAPI_MIN_STR_LEN); + strncpy(entry->description, "The maximum power management limit in milliwatts.", PAPI_MAX_STR_LEN); + entry->type = FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX; + entry++; + } + + strncpy(names[i], name, sizeof(names[0]) - 1); + names[i][sizeof(names[0]) - 1] = '\0'; + } +} + +/** Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +_papi_nvml_init_component(int cidx) +{ + SUBDBG("Entry: cidx: %d\n", cidx); + nvmlReturn_t ret; + cudaError_t cuerr; + int papi_errorcode; + + int cuda_count = 0; + unsigned int nvml_count = 0; + + /* link in the cuda and nvml libraries and resolve the symbols we need to use */ + if (linkCudaLibraries() != PAPI_OK) { + SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n"); + SUBDBG("See disable reason in papi_component_avail output for more details.\n"); + return (PAPI_ENOSUPP); + } + + ret = (*nvmlInitPtr)(); + if (NVML_SUCCESS != ret) { + strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize."); + return PAPI_ENOSUPP; + } + + cuerr = (*cuInitPtr)(0); + if (cudaSuccess != cuerr) { + strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize."); + return PAPI_ENOSUPP; + } + + /* Figure out the number of CUDA devices in the system */ + ret = (*nvmlDeviceGetCountPtr)(&nvml_count); + if (NVML_SUCCESS != ret) { + strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library."); + return PAPI_ENOSUPP; + } + + cuerr = (*cudaGetDeviceCountPtr)(&cuda_count); + if (cudaSuccess != cuerr) { + strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA."); + return PAPI_ENOSUPP; + } + + /* We can probably recover from this, when we're clever */ + if ((cuda_count > 0) && (nvml_count != (unsigned int)cuda_count)) { + strcpy(_nvml_vector.cmp_info.disabled_reason, "CUDA and the NVIDIA managament library have different device counts."); + return PAPI_ENOSUPP; + } + + device_count = cuda_count; + SUBDBG("Need to setup NVML with %d devices\n", device_count); + + /* A per device representation of what events are present */ + features = (int*)papi_malloc(sizeof(int) * device_count); + + /* Handles to each device */ + devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count); + + /* For each device, store the intial power value to enable reset if power is altered */ + power_management_initial_limit = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count); + power_management_limit_constraint_min = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count); + power_management_limit_constraint_max = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count); + + /* Figure out what events are supported on each card. */ + if ((papi_errorcode = detectDevices()) != PAPI_OK) { + papi_free(features); + papi_free(devices); + sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install."); + return PAPI_ENOSUPP; + } + + /* The assumption is that if everything went swimmingly in detectDevices, + all nvml calls here should be fine. */ + createNativeEvents(); + + /* Export the total number of events available */ + _nvml_vector.cmp_info.num_native_events = num_events; + + /* Export the component id */ + _nvml_vector.cmp_info.CmpIdx = cidx; + + /* Export the number of 'counters' */ + _nvml_vector.cmp_info.num_cntrs = num_events; + _nvml_vector.cmp_info.num_mpx_cntrs = num_events; + + return PAPI_OK; +} + +/* + * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then + * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built + * with the CUDA component can be installed and used on systems which have the CUDA libraries installed + * and on systems where these libraries are not installed. + */ +static int +linkCudaLibraries() +{ + /* Attempt to guess if we were statically linked to libc, if so bail */ + if (_dl_non_dynamic_init != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + /* Need to link in the cuda libraries, if not found disable the component */ + dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); + if (!dl1) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + cuInitPtr = dlsym(dl1, "cuInit"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + + dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); + if (!dl2) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + + dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL); + if (!dl3) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlInitPtr = dlsym(dl3, "nvmlInit"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimit"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimit not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceSetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceSetPowerManagementLimit"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceSetPowerManagementLimit not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + nvmlDeviceGetPowerManagementLimitConstraintsPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimitConstraints"); + if (dlerror() != NULL) { + strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimitConstraints not found.", PAPI_MAX_STR_LEN); + return (PAPI_ENOSUPP); + } + return (PAPI_OK); +} + +/** Setup a counter control state. + * In general a control state holds the hardware info for an + * EventSet. + */ + +int +_papi_nvml_init_control_state(hwd_control_state_t * ctl) +{ + SUBDBG("nvml_init_control_state... %p\n", ctl); + nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl; + memset(nvml_ctl, 0, sizeof(nvml_control_state_t)); + + return PAPI_OK; +} + +/** Triggered by eventset operations like add or remove */ +int +_papi_nvml_update_control_state(hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx) +{ + SUBDBG("Enter: ctl: %p, ctx: %p\n", ctl, ctx); + int i, index; + + nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl; + (void) ctx; + + /* if no events, return */ + if (count == 0) return PAPI_OK; + + for (i = 0; i < count; i++) { + index = native[i].ni_event; + nvml_ctl->which_counter[i] = index; + /* We have no constraints on event position, so any event */ + /* can be in any slot. */ + native[i].ni_position = i; + } + nvml_ctl->num_events = count; + return PAPI_OK; +} +/** Triggered by PAPI_start() */ +int +_papi_nvml_start(hwd_context_t *ctx, hwd_control_state_t *ctl) +{ + SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl); + + (void) ctx; + (void) ctl; + + /* anything that would need to be set at counter start time */ + + /* reset */ + /* start the counting */ + + return PAPI_OK; +} + +/** Triggered by PAPI_stop() */ +int +_papi_nvml_stop(hwd_context_t *ctx, hwd_control_state_t *ctl) +{ + SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl); + + int i; + (void) ctx; + (void) ctl; + int ret; + + nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl; + + for (i = 0; i < nvml_ctl->num_events; i++) { + if (PAPI_OK != + (ret = nvml_hardware_read(&nvml_ctl->counter[i], + nvml_ctl->which_counter[i]))) + return ret; + + } + + return PAPI_OK; +} + +/** Triggered by PAPI_read() */ +int +_papi_nvml_read(hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags) +{ + SUBDBG("Enter: ctx: %p, flags: %d\n", ctx, flags); + + (void) ctx; + (void) flags; + int i; + int ret; + nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl; + + for (i = 0; i < nvml_ctl->num_events; i++) { + if (PAPI_OK != + (ret = nvml_hardware_read(&nvml_ctl->counter[i], + nvml_ctl->which_counter[i]))) + return ret; + + } + /* return pointer to the values we read */ + *events = nvml_ctl->counter; + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +int +_papi_nvml_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events) +{ + SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl); + (void) ctx; + nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl; + int i; + int ret; + + /* You can change ECC mode and compute exclusivity modes on the cards */ + /* But I don't see this as a function of a PAPI component at this time */ + /* All implementation issues aside. */ + + // Currently POWER_MANAGEMENT can be written + for (i = 0; i < nvml_ctl->num_events; i++) { + if (PAPI_OK != (ret = nvml_hardware_write(&events[i], nvml_ctl->which_counter[i]))) + return ret; + } + + /* return pointer to the values we read */ + return PAPI_OK; +} + +/** Triggered by PAPI_reset() but only if the EventSet is currently running */ +/* If the eventset is not currently running, then the saved value in the */ +/* EventSet is set to zero without calling this routine. */ +int +_papi_nvml_reset(hwd_context_t * ctx, hwd_control_state_t * ctl) +{ + SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl); + + (void) ctx; + (void) ctl; + + /* Reset the hardware */ + nvml_hardware_reset(); + + return PAPI_OK; +} + +/** Triggered by PAPI_shutdown() */ +int +_papi_nvml_shutdown_component() +{ + SUBDBG("Enter:\n"); + nvml_hardware_reset(); + if (nvml_native_table != NULL) papi_free(nvml_native_table); + if (devices != NULL) papi_free(devices); + if (features != NULL) papi_free(features); + if (power_management_initial_limit) papi_free(power_management_initial_limit); + if (power_management_limit_constraint_min) papi_free(power_management_limit_constraint_min); + if (power_management_limit_constraint_max) papi_free(power_management_limit_constraint_max); + (*nvmlShutdownPtr)(); + + device_count = 0; + num_events = 0; + + // close the dynamic libraries needed by this component (opened in the init component call) + if (dl3) dlclose(dl3); dl3=NULL; + if (dl2) dlclose(dl2); dl2=NULL; + if (dl1) dlclose(dl1); dl1=NULL; + + return PAPI_OK; +} + +/** Called at thread shutdown */ +int +_papi_nvml_shutdown_thread(hwd_context_t *ctx) +{ + SUBDBG("Enter: ctx: %p\n", ctx); + + (void) ctx; + + /* Last chance to clean up thread */ + + return PAPI_OK; +} + +/** This function sets various options in the component + @param code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + */ +int +_papi_nvml_ctl(hwd_context_t * ctx, int code, _papi_int_option_t * option) +{ + SUBDBG("Enter: ctx: %p, code: %d\n", ctx, code); + + (void) ctx; + (void) code; + (void) option; + + /* FIXME. This should maybe set up more state, such as which counters are active and */ + /* counter mappings. */ + + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +int +_papi_nvml_set_domain(hwd_control_state_t * cntrl, int domain) +{ + SUBDBG("Enter: cntrl: %p, domain: %d\n", cntrl, domain); + + (void) cntrl; + + int found = 0; + + if (PAPI_DOM_USER & domain) { + SUBDBG(" PAPI_DOM_USER \n"); + found = 1; + } + if (PAPI_DOM_KERNEL & domain) { + SUBDBG(" PAPI_DOM_KERNEL \n"); + found = 1; + } + if (PAPI_DOM_OTHER & domain) { + SUBDBG(" PAPI_DOM_OTHER \n"); + found = 1; + } + if (PAPI_DOM_ALL & domain) { + SUBDBG(" PAPI_DOM_ALL \n"); + found = 1; + } + if (!found) + return (PAPI_EINVAL); + + return PAPI_OK; +} + +/**************************************************************/ +/* Naming functions, used to translate event numbers to names */ +/**************************************************************/ + +/** Enumerate Native Events + * @param EventCode is the event of interest + * @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + * If your component has attribute masks then these need to + * be handled here as well. + */ +int +_papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier) +{ + int index; + + switch (modifier) { + + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + /* return the first event that we support */ + + *EventCode = 0; + return PAPI_OK; + + /* return EventCode of next available event */ + case PAPI_ENUM_EVENTS: + index = *EventCode; + + /* Make sure we are in range */ + if (index < num_events - 1) { + + /* This assumes a non-sparse mapping of the events */ + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + +/** Takes a native event code and passes back the name + * @param EventCode is the native event code + * @param name is a pointer for the name to be copied to + * @param len is the size of the name string + */ +int +_papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len) +{ + SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len); + int index; + + index = EventCode; + + /* Make sure we are in range */ + if (index >= num_events) return PAPI_ENOEVNT; + + strncpy(name, nvml_native_table[index].name, len); + + return PAPI_OK; +} + +/** Takes a native event code and passes back the event description + * @param EventCode is the native event code + * @param descr is a pointer for the description to be copied to + * @param len is the size of the descr string + */ +int +_papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len) +{ + int index; + index = EventCode; + + if (index >= num_events) return PAPI_ENOEVNT; + + strncpy(descr, nvml_native_table[index].description, len); + + return PAPI_OK; +} + +/** Takes a native event code and passes back the event info + * @param EventCode is the native event code + * @param info is a pointer for the info to be copied to + */ +int +_papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ((index < 0) || (index >= num_events)) return PAPI_ENOEVNT; + + strncpy(info->symbol, nvml_native_table[index].name, sizeof(info->symbol) - 1); + info->symbol[sizeof(info->symbol) - 1] = '\0'; + + strncpy(info->units, nvml_native_table[index].units, sizeof(info->units) - 1); + info->units[sizeof(info->units) - 1] = '\0'; + + strncpy(info->long_descr, nvml_native_table[index].description, sizeof(info->long_descr) - 1); + info->long_descr[sizeof(info->long_descr) - 1] = '\0'; + +// info->data_type = nvml_native_table[index].return_type; + + return PAPI_OK; +} + +/** Vector that points to entry points for our component */ +papi_vector_t _nvml_vector = { + .cmp_info = { + /* default component information */ + /* (unspecified values are initialized to 0) */ + + .name = "nvml", + .short_name = "nvml", + .version = "1.0", + .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)", + .support_version = "n/a", + .kernel_version = "n/a", + + .num_preset_events = 0, + .num_native_events = 0, /* set by init_component */ + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .hardware_intr = 0, + .precise_intr = 0, + .posix1b_timers = 0, + .kernel_profile = 0, + .kernel_multiplex = 0, + .fast_counter_read = 0, + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .cntr_umasks = 0, + .cpu = 0, + .inherit = 0, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof(nvml_context_t), + .control_state = sizeof(nvml_control_state_t), + .reg_value = sizeof(nvml_register_t), + // .reg_alloc = sizeof ( nvml_reg_alloc_t ), + }, + + /* function pointers */ + + /* Used for general PAPI interactions */ + .start = _papi_nvml_start, + .stop = _papi_nvml_stop, + .read = _papi_nvml_read, + .reset = _papi_nvml_reset, + .write = _papi_nvml_write, + .init_component = _papi_nvml_init_component, + .init_thread = _papi_nvml_init_thread, + .init_control_state = _papi_nvml_init_control_state, + .update_control_state = _papi_nvml_update_control_state, + .ctl = _papi_nvml_ctl, + .shutdown_thread = _papi_nvml_shutdown_thread, + .shutdown_component = _papi_nvml_shutdown_component, + .set_domain = _papi_nvml_set_domain, + .cleanup_eventset = NULL, + /* called in add_native_events() */ + .allocate_registers = NULL, + + /* Used for overflow/profiling */ + .dispatch_timer = NULL, + .get_overflow_address = NULL, + .stop_profiling = NULL, + .set_overflow = NULL, + .set_profile = NULL, + + /* Name Mapping Functions */ + .ntv_enum_events = _papi_nvml_ntv_enum_events, + .ntv_name_to_code = NULL, + .ntv_code_to_name = _papi_nvml_ntv_code_to_name, + .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr, + .ntv_code_to_info = _papi_nvml_ntv_code_to_info, + +}; + diff --git a/src/components/nvml/linux-nvml.h b/src/components/nvml/linux-nvml.h new file mode 100644 index 0000000..8a1b795 --- /dev/null +++ b/src/components/nvml/linux-nvml.h @@ -0,0 +1,56 @@ +#ifndef __LINUX_NVML_H__ +#define __LINUX_NVML_H__ + +#include "nvml.h" + +#define FEATURE_CLOCK_INFO 1 +#define FEATURE_ECC_LOCAL_ERRORS 2 +#define FEATURE_FAN_SPEED 4 +#define FEATURE_MAX_CLOCK 8 +#define FEATURE_MEMORY_INFO 16 +#define FEATURE_PERF_STATES 32 +#define FEATURE_POWER 64 +#define FEATURE_TEMP 128 +#define FEATURE_ECC_TOTAL_ERRORS 256 +#define FEATURE_UTILIZATION 512 +#define FEATURE_POWER_MANAGEMENT 1024 +#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN 2048 +#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX 4096 + +#define HAS_FEATURE( features, query ) ( features & query ) + +#define MEMINFO_TOTAL_MEMORY 0 +#define MEMINFO_UNALLOCED 1 +#define MEMINFO_ALLOCED 2 + +#define LOCAL_ECC_REGFILE 0 +#define LOCAL_ECC_L1 1 +#define LOCAL_ECC_L2 2 +#define LOCAL_ECC_MEM 3 + +#define GPU_UTILIZATION 0 +#define MEMORY_UTILIZATION 1 + +/* we lookup which card we are on at read time; this is a place holder */ +typedef int nvml_register_t; + +struct local_ecc { + nvmlEccBitType_t bits; + int which_one; +}; + +typedef union { + nvmlClockType_t clock; /* used in get[Max]ClockSpeed */ + struct local_ecc ecc_opts; /* local ecc errors, total ecc errors */ + int which_one; /* memory_info , utilization*/ +} nvml_resource_options_t; + +typedef struct nvml_native_event_entry { + nvml_resource_options_t options; + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int type; +} nvml_native_event_entry_t; + +#endif diff --git a/src/components/nvml/tests/HelloWorld.cu b/src/components/nvml/tests/HelloWorld.cu new file mode 100644 index 0000000..1c39df7 --- /dev/null +++ b/src/components/nvml/tests/HelloWorld.cu @@ -0,0 +1,163 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file HelloWorld.c + * CVS: $Id$ + * @author Heike Jagode + * jagode@eecs.utk.edu + * Mods: + * + * test case for Example component + * + * + * @brief + * This file is a very simple HelloWorld C example which serves (together + * with its Makefile) as a guideline on how to add tests to components. + * The papi configure and papi Makefile will take care of the compilation + * of the component tests (if all tests are added to a directory named + * 'tests' in the specific component dir). + * See components/README for more details. + * + * The string "Hello World!" is mangled and then restored. + */ + +#include +#include +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 +#define PAPI + +// Prototypes +__global__ void helloWorld(char*); + + +// Host function +int main(int argc, char** argv) +{ +#ifdef PAPI + int retval, i; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + /* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT + FOR THE CUDA DEVICE YOU ARE RUNNING ON. + RUN papi_native_avail to get a list of CUDA events that are + supported on your machine */ + // e.g. on a P100 nvml:::Tesla_P100-SXM2-16GB:power + char *EventName[] = { "PAPI_FP_OPS" }; + int events[NUM_EVENTS]; + int eventCount = 0; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) + fprintf( stderr, "PAPI_library_init failed\n" ); + + printf( "PAPI_VERSION : %4d %6d %7d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + + /* convert PAPI native events to PAPI code */ + for( i = 0; i < NUM_EVENTS; i++ ){ + retval = PAPI_event_name_to_code( EventName[i], &events[i] ); + if( retval != PAPI_OK ) { + fprintf( stderr, "PAPI_event_name_to_code failed\n" ); + continue; + } + eventCount++; + printf( "Name %s --- Code: %#x\n", EventName[i], events[i] ); + } + + /* if we did not find any valid events, just report test failed. */ + if (eventCount == 0) { + printf( "Test FAILED: no valid events found.\n"); + return 1; + } + + retval = PAPI_create_eventset( &EventSet ); + if( retval != PAPI_OK ) + fprintf( stderr, "PAPI_create_eventset failed\n" ); + + retval = PAPI_add_events( EventSet, events, eventCount ); + if( retval != PAPI_OK ) + fprintf( stderr, "PAPI_add_events failed\n" ); +#endif + + + int j; + int count; + int cuda_device; + + cudaGetDeviceCount( &count ); + for ( cuda_device = 0; cuda_device < count; cuda_device++ ) { + cudaSetDevice( cuda_device ); +#ifdef PAPI + retval = PAPI_start( EventSet ); + if( retval != PAPI_OK ) + fprintf( stderr, "PAPI_start failed\n" ); +#endif + + // desired output + char str[] = "Hello World!"; + + // mangle contents of output + // the null character is left intact for simplicity + for(j = 0; j < 12; j++) { + str[j] -= j; + //printf("str=%s\n", str); + } + + + // allocate memory on the device + char *d_str; + size_t size = sizeof(str); + cudaMalloc((void**)&d_str, size); + + // copy the string to the device + cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice); + + // set the grid and block sizes + dim3 dimGrid(2); // one block per word + dim3 dimBlock(6); // one thread per character + + + // invoke the kernel + helloWorld<<< dimGrid, dimBlock >>>(d_str); + + // retrieve the results from the device + cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost); + + // free up the allocated memory on the device + cudaFree(d_str); + + printf("END: %s\n", str); + + +#ifdef PAPI + retval = PAPI_stop( EventSet, values ); + if( retval != PAPI_OK ) + fprintf( stderr, "PAPI_stop failed\n" ); + + for( i = 0; i < eventCount; i++ ) + printf( "On device %d: %12lld \t\t --> %s \n", cuda_device, values[i], EventName[i] ); +#endif + } + + return 0; +} + + +// Device kernel +__global__ void +helloWorld(char* str) +{ + // determine where in the thread grid we are + int idx = blockIdx.x * blockDim.x + threadIdx.x; + // unmangle output + str[idx] += idx; +} + diff --git a/src/components/nvml/tests/Makefile b/src/components/nvml/tests/Makefile new file mode 100644 index 0000000..34ab9e3 --- /dev/null +++ b/src/components/nvml/tests/Makefile @@ -0,0 +1,27 @@ +NAME=nvml +include ../../Makefile_comp_tests +include ../Makefile.nvml + +NVCC = $(CUDA_DIR)/bin/nvcc +NVCFLAGS = -L$(NVML_INCDIR) -ccbin=$(CC) +CUDALIBS = -L$(NVML_LIBDIR) -L$(CUDA_DIR)/lib64 -lcuda -lcudart -lnvidia-ml +PAPILIB := ../../../libpapi.a -ldl + +%.o:%.cu + $(NVCC) $(NVCFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = HelloWorld nvml_power_limiting_test + +nvml_tests: $(TESTS) + +HelloWorld: HelloWorld.o $(UTILOBJS) + $(NVCC) $(NVCFLAGS) $(INCLUDE) -o HelloWorld HelloWorld.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) + +nvml_power_limiting_test: nvml_power_limiting_test.o $(UTILOBJS) + $(NVCC) $(NVCFLAGS) $(INCLUDE) -o nvml_power_limiting_test nvml_power_limiting_test.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) + cp nvml_power_limiting_test ~/nvml_power_limiting_test + +clean: + rm -f $(TESTS) *.o + +FORCE: diff --git a/src/components/nvml/tests/nvml_power_limiting_test.cu b/src/components/nvml/tests/nvml_power_limiting_test.cu new file mode 100644 index 0000000..407a3f6 --- /dev/null +++ b/src/components/nvml/tests/nvml_power_limiting_test.cu @@ -0,0 +1,234 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file HelloWorld.c + * CVS: $Id$ + * @author Asim YarKhan (yarkhan@icl.utk.edu) HelloWorld altered to test power capping (October 2017) + * @author Heike Jagode (jagode@icl.utk.edu) + * Mods: + * + * @brief + + * This file is a very simple HelloWorld C example which serves + * (together with its Makefile) as a guideline on how to add tests to + * components. This file tests the ability to do power control using + * NVML. + + * The papi configure and papi Makefile will take care of the + * compilation of the component tests (if all tests are added to a + * directory named 'tests' in the specific component dir). See + * components/README for more details. + * + * The string "Hello World!" is mangled and then restored. + */ + +#include +#include +#include "papi.h" +#include "papi_test.h" + +#define PAPI + +// Prototypes +__global__ void helloWorld( char* ); + +// Host function +int main( int argc, char** argv ) +{ + +#ifdef PAPI +#define NUM_EVENTS 1 + int retval, i; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + /* REPLACE THE EVENT NAME 'PAPI_FP_OPS' WITH A CUDA EVENT + FOR THE CUDA DEVICE YOU ARE RUNNING ON. + RUN papi_native_avail to get a list of CUDA events that are + supported on your machine */ + // e.g. on a P100 nvml:::Tesla_P100-SXM2-16GB:power + char *EventName[NUM_EVENTS]; + int events[NUM_EVENTS]; + int eventCount = 0; + const PAPI_component_info_t *cmpinfo; + char event_name[PAPI_MAX_STR_LEN]; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if( retval != PAPI_VER_CURRENT ) fprintf( stderr, "PAPI_library_init failed\n" ); + + printf( "PAPI_VERSION : %4d %6d %7d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ) ); + + int numcmp = PAPI_num_components(); + // printf( "Searching for nvml component among %d components\n", numcmp ); + int cid = 0; + for( cid=0; cidCmpIdx, cmpinfo->name, cmpinfo->num_native_events ); + if ( cmpinfo == NULL ) + test_fail( __FILE__, __LINE__,"PAPI_get_component_info failed\n",-1 ); + else if ( strstr( cmpinfo->name, "nvml" ) ) + break; + } + if ( cid==numcmp ) + test_skip( __FILE__, __LINE__,"Component nvml is not present\n",-1 ); + + printf( "nvml component found: Component Index %d: %s: %d events\n", cmpinfo->CmpIdx, cmpinfo->name, cmpinfo->num_native_events ); + if ( cmpinfo->disabled ) + test_skip( __FILE__,__LINE__,"Component nvml is disabled", 0 ); + + int code = PAPI_NATIVE_MASK; + int ii=0; + int event_modifier = PAPI_ENUM_FIRST; + for ( ii=0; iinum_native_events; ii++ ) { + retval = PAPI_enum_cmp_event( &code, event_modifier, cid ); + event_modifier = PAPI_ENUM_EVENTS; + if ( retval != PAPI_OK ) test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + retval = PAPI_event_code_to_name( code, event_name ); + // printf( "Look at event %d %d %s \n", ii, code, event_name ); + if ( strstr( event_name, "power_management_limit" ) ) + break; + } + if ( ii==cmpinfo->num_native_events ) + test_skip( __FILE__,__LINE__,"Component nvml does not have a power_management_limit event", 0 ); + printf( "nvml power_management_limit event found (%s)\n", event_name ); + + EventName[0] = event_name; + + /* convert PAPI native events to PAPI code */ + for( i = 0; i < NUM_EVENTS; i++ ) { + retval = PAPI_event_name_to_code( ( char * )EventName[i], &events[i] ); + if( retval != PAPI_OK ) + test_fail( __FILE__,__LINE__,"PAPI_event_name_to_code failed", retval ); + eventCount++; + // printf( "Event: %s: Code: %#x\n", EventName[i], events[i] ); + } + + /* if we did not find any valid events, just report test failed. */ + if ( eventCount == 0 ) + test_skip( __FILE__,__LINE__,"No valid events found", retval ); + + retval = PAPI_create_eventset( &EventSet ); + if( retval != PAPI_OK ) + test_fail( __FILE__,__LINE__,"PAPI_create_eventset failed", retval ); + + retval = PAPI_add_events( EventSet, events, eventCount ); + if( retval != PAPI_OK ) + test_fail( __FILE__,__LINE__,"PAPI_add_events failed", retval ); +#endif + + int j; + int device_count; + int cuda_device; + + cudaGetDeviceCount( &device_count ); + printf( "Found %d cuda devices\n", device_count ); + + +/////////////////////// AYK + for ( cuda_device = 0; cuda_device < device_count; cuda_device++ ) { + // for ( cuda_device = 0; cuda_device < 1; cuda_device++ ) { + printf( "cuda_device %d is being used\n", cuda_device ); + cudaSetDevice( cuda_device ); + +#ifdef PAPI + retval = PAPI_start( EventSet ); + if( retval != PAPI_OK ) + test_fail( __FILE__,__LINE__,"PAPI_start failed", retval ); + + retval = PAPI_read( EventSet, values ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_read failed\n" ); + for( i = 0; i < eventCount; i++ ) + printf( "%s = %lld (read initial power management limit)\n", EventName[i], values[i]); + long long int initial_power_management_limit = values[0]; + + if ( cuda_device==0 ) { + printf("On device_num %d the power_management_limit is going to be reduced by 30\n", cuda_device); + // values[0] = 235000 + values[0] = initial_power_management_limit - 30; + retval = PAPI_write( EventSet, values ); + if ( retval!=PAPI_OK ) { + test_skip( __FILE__,__LINE__,"Attempted write of power_management_limit failed: Possible reasons: Insufficient permissions; Power management unavailable. Outside min/max limits", retval ); + } else { + printf( "Set power_management_limit to %llu milliWatts\n", values[0] ); + } + } + + + +#endif + + // desired output + char str[] = "Hello World!"; + + // mangle contents of output + // the null character is left intact for simplicity + for(j = 0; j < 12; j++) + str[j] -= j; + printf( "This mangled string need to be fixed=%s\n", str ); + + // allocate memory on the device + char *d_str; + size_t size = sizeof( str ); + cudaMalloc( ( void** )&d_str, size ); + + // copy the string to the device + cudaMemcpy( d_str, str, size, cudaMemcpyHostToDevice ); + + // set the grid and block sizes + dim3 dimGrid( 2 ); // one block per word + dim3 dimBlock( sizeof( str )/2 ); // one thread per character + + // invoke the kernel + helloWorld<<< dimGrid, dimBlock >>>( d_str ); + + // retrieve the results from the device + cudaMemcpy( str, d_str, size, cudaMemcpyDeviceToHost ); + + // free up the allocated memory on the device + cudaFree( d_str ); + + printf( "Device %d Unmangled string = %s\n", cuda_device, str ); + +#ifdef PAPI + if ( cuda_device==0 ) { + retval = PAPI_read( EventSet, values ); + if( retval != PAPI_OK ) fprintf( stderr, "PAPI_read failed\n" ); + for( i = 0; i < eventCount; i++ ) + printf( "%s = %lld (read power management limit after reducing it... was it reduced?) \n", EventName[i], values[i] ); + + if ( values[0] != initial_power_management_limit - 30 ) { + printf( "Mismatch: power_management_limit on device %d set to %llu but read as %llu\n", cuda_device, initial_power_management_limit-30, values[0] ); + test_fail( __FILE__,__LINE__,"Mismatch: power_management_limit on device set to one value but read as a different value", -1 ); + + } + + // AYK papi_reset + long long resetvalues[NUM_EVENTS]; + resetvalues[0] = initial_power_management_limit; + retval = PAPI_write( EventSet, resetvalues ); + retval = PAPI_stop( EventSet, values ); + } +#endif + + } + + test_pass( __FILE__); + return 0; +} + + +// Device kernel +__global__ void +helloWorld( char* str ) +{ + // determine where in the thread grid we are + int idx = blockIdx.x * blockDim.x + threadIdx.x; + // unmangle output + str[idx] += idx; +} + diff --git a/src/components/perf_event/Rules.perf_event b/src/components/perf_event/Rules.perf_event new file mode 100644 index 0000000..a26861b --- /dev/null +++ b/src/components/perf_event/Rules.perf_event @@ -0,0 +1,9 @@ + +COMPSRCS += components/perf_event/perf_event.c components/perf_event/pe_libpfm4_events.c +COMPOBJS += perf_event.o pe_libpfm4_events.o + +perf_event.o: components/perf_event/perf_event.c components/perf_event/perf_event_lib.h components/perf_event/perf_helpers.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perf_event/perf_event.c -o perf_event.o + +pe_libpfm4_events.o: components/perf_event/pe_libpfm4_events.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perf_event/pe_libpfm4_events.c -o pe_libpfm4_events.o diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c new file mode 100644 index 0000000..9bb1fa6 --- /dev/null +++ b/src/components/perf_event/pe_libpfm4_events.c @@ -0,0 +1,1329 @@ +/* +* File: pe_libpfm4_events.c +* Author: Vince Weaver vincent.weaver@maine.edu +* Mods: Gary Mohr +* gary.mohr@bull.com +* Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4. +* This adds several new event masks, including cpu=, u=, and k= which give the user +* the ability to set cpu number to use or control the domain (user, kernel, or both) +* in which the counter should be incremented. These are event masks so it is now +* possible to have multiple events in the same event set that count activity from +* differennt cpu's or count activity in different domains. +* +* Handle the libpfm4 event interface for the perf_event component +*/ + +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" + +#include "papi_libpfm4_events.h" +#include "pe_libpfm4_events.h" +#include "perf_event_lib.h" + +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" + +#define NATIVE_EVENT_CHUNK 1024 + +// used to step through the attributes when enumerating events +static int attr_idx; + +/** @class find_existing_event + * @brief looks up an event, returns it if it exists + * + * @param[in] name + * -- name of the event + * @param[in] event_table + * -- native_event_table structure + * + * @returns returns offset in array + * + */ + +static int find_existing_event(const char *name, + struct native_event_table_t *event_table) { + SUBDBG("Entry: name: %s, event_table: %p, num_native_events: %d\n", name, event_table, event_table->num_native_events); + + int i,event=PAPI_ENOEVNT; + + _papi_hwi_lock( NAMELIB_LOCK ); + + for(i=0;inum_native_events;i++) { + // Most names passed in will contain the pmu name, so first we compare to the allocated name (it has pmu name on front) + if (!strcmp(name,event_table->native_events[i].allocated_name)) { + SUBDBG("Found allocated_name: %s, libpfm4_idx: %#x, papi_event_code: %#x\n", + event_table->native_events[i].allocated_name, event_table->native_events[i].libpfm4_idx, event_table->native_events[i].papi_event_code); + event=i; + break; + } + // some callers have an event name without the pmu name on the front, so we also compare to the base name (just the event name part) + if (!strcmp(name,event_table->native_events[i].base_name)) { + int nameLen = strlen(event_table->native_events[i].base_name); + // the name we are looking for must be the same length as this event table entry name for them to match + if (strlen(name) != nameLen + strlen(event_table->native_events[i].mask_string) + 1) { + continue; + } + if(!strcmp(name+nameLen+1, event_table->native_events[i].mask_string)) { + SUBDBG("Found base_name: %s, mask_string: %s, libpfm4_idx: %#x, papi_event_code: %#x\n", + event_table->native_events[i].base_name, event_table->native_events[i].mask_string , event_table->native_events[i].libpfm4_idx, event_table->native_events[i].papi_event_code); + event=i; + break; + } + } + } + _papi_hwi_unlock( NAMELIB_LOCK ); + + SUBDBG("EXIT: returned: %#x\n", event); + return event; +} + + +static int pmu_is_present_and_right_type(pfm_pmu_info_t *pinfo, int type) { + SUBDBG("ENTER: pinfo: %s %p, pinfo->is_present: %d, " + "pinfo->type: %#x, type: %#x\n", + pinfo->name, pinfo, pinfo->is_present, pinfo->type, type); + if (!pinfo->is_present) { +// SUBDBG("EXIT: not present\n"); + return 0; + } + + if ((pinfo->type==PFM_PMU_TYPE_UNCORE) && (type&PMU_TYPE_UNCORE)) { +// SUBDBG("EXIT: found PFM_PMU_TYPE_UNCORE\n"); + return 1; + } + if ((pinfo->type==PFM_PMU_TYPE_CORE) && (type&PMU_TYPE_CORE)) { +// SUBDBG("EXIT: found PFM_PMU_TYPE_CORE\n"); + return 1; + } + if ((pinfo->type==PFM_PMU_TYPE_OS_GENERIC) && (type&PMU_TYPE_OS)) { +// SUBDBG("EXIT: found PFM_PMU_TYPE_OS_GENERIC\n"); + return 1; + } + +// SUBDBG("EXIT: not right type\n"); + return 0; +} + + +/** @class allocate_native_event + * @brief Allocates a native event + * + * @param[in] name + * -- name of the event + * @param[in] libpfm4_idx + * -- libpfm4 identifier for the event + * @param[in] cidx + * -- PAPI component index + * @param[in] event_table + * -- native event table struct + * + * @returns returns a native_event_t or NULL + * + */ + +static struct native_event_t *allocate_native_event( + const char *name, + int libpfm4_index, int cidx, + struct native_event_table_t *event_table) { + + SUBDBG("ENTER: name: %s, libpfm4_index: %#x, event_table: %p, " + "event_table->pmu_type: %d\n", + name, libpfm4_index, event_table, event_table->pmu_type); + + int nevt_idx; + int event_num; + int encode_failed=0; + + pfm_err_t ret; + char *event_string=NULL; + char *pmu_name; + char *event; + char *masks; + char fullname[BUFSIZ]; + struct native_event_t *ntv_evt; + + pfm_perf_encode_arg_t perf_arg; + pfm_event_info_t einfo; + pfm_event_attr_info_t ainfo; + pfm_pmu_info_t pinfo; + + // if no place to put native events, report that allocate failed + if (event_table->native_events==NULL) { + SUBDBG("EXIT: no place to put native events\n"); + return NULL; + } + + // find out if this event is already known + event_num=find_existing_event(name, event_table); + + /* add the event to our event table */ + _papi_hwi_lock( NAMELIB_LOCK ); + + // if we already know this event name, + // it was created as part of setting up the preset tables + // we need to use the event table which is already created + if (event_num >= 0) { + nevt_idx = event_num; + ntv_evt = &(event_table->native_events[event_num]); + } else { + // set to use a new event table + // (count of used events not bumped + // until we are sure setting it up does not get an errror) + nevt_idx = event_table->num_native_events; + ntv_evt = &(event_table->native_events[nevt_idx]); + } + + SUBDBG("event_num: %d, nevt_idx: %d, ntv_evt: %p\n", + event_num, nevt_idx, ntv_evt); + + /* clear the argument and attribute structures */ + memset(&perf_arg,0,sizeof(pfm_perf_encode_arg_t)); + memset(&(ntv_evt->attr),0,sizeof(struct perf_event_attr)); + + // set argument structure fields so the encode + // function can give us what we need + perf_arg.attr=&ntv_evt->attr; + perf_arg.fstr=&event_string; + + /* use user provided name of the event to get the */ + /* perf_event encoding and a fully qualified event string */ + ret = pfm_get_os_event_encoding(name, + PFM_PLM0 | PFM_PLM3, + PFM_OS_PERF_EVENT_EXT, + &perf_arg); + + // If the encode function failed, skip processing of the event_string + if ((ret != PFM_SUCCESS) || (event_string == NULL)) { + SUBDBG("encode failed for event: %s, returned: %d\n", + name, ret); + + // we need to remember that this event encoding failed + // but still create the native event table + // the event table is used by the list so we put what we + // can get into it + // but the failure doing the encode causes us to + // return null to our caller + encode_failed = 1; + + // Noting the encode_failed error in the attr.config allows + // any later validate attempts to return an error value + + // ??? .config is 64-bits? --vmw + ntv_evt->attr.config = 0xFFFFFF; + + // we also want to make it look like a cpu number + // was not provided as an event mask + perf_arg.cpu = -1; + + // Why don't we just return NULL here? --vmw + //return NULL; + } + + // get a copy of the event name and break it up into its parts + event_string = strdup(name); + + SUBDBG("event_string: %s\n", event_string); + + // get the pmu name, event name and mask list pointers + // from the event string + event = strstr (event_string, "::"); + if (event != NULL) { + *event = 0; // null terminate pmu name + event += 2; // event name follows '::' + pmu_name = strdup(event_string); + } else { + // no pmu name in event string + pmu_name = malloc(2); + pmu_name[0] = 0; + event = event_string; + } + masks = strstr (event, ":"); + if (masks != NULL) { + *masks = 0; // null terminate event name + masks += 1; // masks follow : + } else { + masks = ""; + } + + // build event name to find, put a pmu name on it if we have one + if (strlen(pmu_name) == 0) { + sprintf(fullname,"%s", event); + } else { + sprintf(fullname,"%s::%s", pmu_name, event); + } + + SUBDBG("pmu_name: %s, event: %s, masks: %s, fullname: %s\n", + pmu_name, event, masks, fullname); + + // if the libpfm4 index was not provided, + // try to get one based on the event name passed in. + + /* This may return a value for a disabled PMU */ + if (libpfm4_index == -1) { + libpfm4_index = pfm_find_event(fullname); + if (libpfm4_index < 0) { + free(event_string); + free(pmu_name); + _papi_hwi_unlock( NAMELIB_LOCK ); + SUBDBG("EXIT: error from libpfm4 find event\n"); + return NULL; + } + SUBDBG("libpfm4_index: %#x\n", libpfm4_index); + } + + // get this events information from libpfm4, + // if unavailable return event not found (structure be zeroed) + memset( &einfo, 0, sizeof( pfm_event_info_t )); + einfo.size = sizeof(pfm_event_info_t); + if ((ret = pfm_get_event_info(libpfm4_index, + PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { + free(event_string); + free(pmu_name); + _papi_hwi_unlock( NAMELIB_LOCK ); + SUBDBG("EXIT: pfm_get_event_info failed with %d\n", ret); + return NULL; + } + + // if pmu type is not one supported by this component, + // return event not found (structure be zeroed) + memset(&pinfo,0,sizeof(pfm_pmu_info_t)); + pinfo.size = sizeof(pfm_pmu_info_t); + pfm_get_pmu_info(einfo.pmu, &pinfo); + if (pmu_is_present_and_right_type(&pinfo, event_table->pmu_type) == 0) { + free(event_string); + free(pmu_name); + _papi_hwi_unlock( NAMELIB_LOCK ); + SUBDBG("EXIT: PMU not supported by this component: einfo.pmu: %d, PFM_PMU_TYPE_CORE: %d\n", einfo.pmu, PFM_PMU_TYPE_CORE); + return NULL; + } + + ntv_evt->allocated_name=strdup(name); + ntv_evt->mask_string=strdup(masks); + ntv_evt->component=cidx; + ntv_evt->pmu=pmu_name; + ntv_evt->base_name=strdup(event); + ntv_evt->pmu_plus_name=strdup(fullname); + ntv_evt->libpfm4_idx=libpfm4_index; + ntv_evt->event_description=strdup(einfo.desc); + ntv_evt->users=0; /* is this needed? */ + ntv_evt->cpu=perf_arg.cpu; + + SUBDBG("ntv_evt->mask_string: %p (%s)\n", + ntv_evt->mask_string, ntv_evt->mask_string); + char *msk_ptr = strdup(masks); // get a work copy of the mask string before we free the space it was in + free(event_string); + + char mask_desc[PAPI_HUGE_STR_LEN] = ""; + + // if there is any mask data, collect their descriptions + if ((msk_ptr != NULL) && (strlen(msk_ptr) > 0)) { + // go get the descriptions for each of the + // masks provided with this event + char *ptr = msk_ptr; + SUBDBG("ptr: %p (%s)\n", ptr, ptr); + while (ptr != NULL) { + char *ptrm = strstr(ptr, ":"); + if (ptrm != NULL) { + *ptrm = '\0'; + ptrm++; + } + + // get the length of the mask name + char *wrk = strchr(ptr, '='); + unsigned int msk_name_len; + if (wrk != NULL) { + msk_name_len = wrk - ptr; + SUBDBG("Found =, length=%d\n",msk_name_len); + } else { + msk_name_len = strlen (ptr); + SUBDBG("No =, length=%d\n",msk_name_len); + } + + int i, mask_found=0; + for (i=0 ; i 0) { + strcat (mask_desc, ":"); + mskleft--; + } + // if new description will not all fit in buffer, report truncation + if (mskleft < (strlen(ainfo.desc) + 1)) { + SUBDBG("EXIT: Attribute description truncated: %s\n", ainfo.desc); + } + // move as much of this description as will fit + strncat (mask_desc, ainfo.desc, mskleft-1); + mask_desc[mskleft-1] = '\0'; + break; + } + } + + /* See if we had a mask that wasn't found */ + if (!mask_found) { + SUBDBG("Mask not found! %s\n",ptr); + /* FIXME: do we need to unlock here? */ + return NULL; + } + + // if we have filled the work buffer, we can quit now + if ( (sizeof(mask_desc) - strlen(mask_desc)) <= 1) { + break; + } + ptr = ptrm; + } + } + ntv_evt->mask_description=strdup(mask_desc); + SUBDBG("ntv_evt->mask_description: %p (%s)\n", ntv_evt->mask_description, ntv_evt->mask_description); + + // give back space if we got any + if (msk_ptr != NULL) { + free (msk_ptr); + } + + // create a papi table for this native event, put the index into the event sets array of native events into the papi table + int new_event_code = _papi_hwi_native_to_eventcode(cidx, libpfm4_index, nevt_idx, ntv_evt->allocated_name); + _papi_hwi_set_papi_event_string((const char *)ntv_evt->allocated_name); + _papi_hwi_set_papi_event_code(new_event_code, 1); + + ntv_evt->papi_event_code=new_event_code; + + SUBDBG("Using %#x as index for %s\n", ntv_evt->libpfm4_idx, fullname); + SUBDBG("num_native_events: %d, allocated_native_events: %d\n", event_table->num_native_events, event_table->allocated_native_events); + SUBDBG("Native Event: papi_event_code: %#x, libpfm4_idx: %#x, pmu: %s, base_name: %s, mask_string: %s, allocated_name: %s\n", + ntv_evt->papi_event_code, ntv_evt->libpfm4_idx, ntv_evt->pmu, ntv_evt->base_name, ntv_evt->mask_string, ntv_evt->allocated_name); + SUBDBG("event_table->native_events[%d]: %p, cpu: %d, attr.config: 0x%"PRIx64", attr.config1: 0x%"PRIx64", attr.config2: 0x%"PRIx64", attr.type: 0x%"PRIx32", attr.exclude_user: %d, attr.exclude_kernel: %d, attr.exclude_guest: %d\n", + nevt_idx, &(event_table->native_events[nevt_idx]), ntv_evt->cpu, ntv_evt->attr.config, + ntv_evt->attr.config1, ntv_evt->attr.config2, ntv_evt->attr.type, + ntv_evt->attr.exclude_user, ntv_evt->attr.exclude_kernel, ntv_evt->attr.exclude_guest); + + /* If we've used all of the allocated native events, */ + /* then allocate more room */ + if (event_table->num_native_events >= + event_table->allocated_native_events-1) { + + SUBDBG("Allocating more room for native events (%d %ld)\n", + (event_table->allocated_native_events+NATIVE_EVENT_CHUNK), + (long)sizeof(struct native_event_t) * + (event_table->allocated_native_events+NATIVE_EVENT_CHUNK)); + + event_table->native_events=realloc(event_table->native_events, + sizeof(struct native_event_t) * + (event_table->allocated_native_events+NATIVE_EVENT_CHUNK)); + + event_table->allocated_native_events+=NATIVE_EVENT_CHUNK; + + // we got new space so we need to reset + // the pointer to the correct native event in the new space + ntv_evt = &(event_table->native_events[nevt_idx]); + } + + + // if getting more space for native events failed, + // report that allocate failed + if (event_table->native_events==NULL) { + SUBDBG("EXIT: attempt to get more space for " + "native events failed\n"); + return NULL; + } + + // if we created a new event, bump the number used + if (event_num < 0) { + event_table->num_native_events++; + } + + _papi_hwi_unlock( NAMELIB_LOCK ); + + if (encode_failed != 0) { + SUBDBG("EXIT: encoding event failed\n"); + return NULL; + } + + SUBDBG("EXIT: new_event: %p\n", ntv_evt); + return ntv_evt; +} + + +/** @class get_first_event_next_pmu + * @brief return the first available event that's on an active PMU + * + * @returns returns a libpfm event number + * @retval PAPI_ENOEVENT Could not find an event + * + */ + +static int +get_first_event_next_pmu(int pmu_idx, int pmu_type) +{ + SUBDBG("ENTER: pmu_idx: %d, pmu_type: %d\n", pmu_idx, pmu_type); + int pidx, ret; + + pfm_pmu_info_t pinfo; + + // start looking at the next pmu in the list + pmu_idx++; + + /* We loop forever here and exit if pfm_get_pmu_info() fails. */ + /* Before we only went up to PFM_PMU_MAX but this is set at */ + /* compile time and might not reflect the number of PMUs if */ + /* PAPI is dynamically linked against libpfm4. */ + while(1) { + + /* clear the PMU structure (required by libpfm4) */ + memset(&pinfo,0,sizeof(pfm_pmu_info_t)); + pinfo.size = sizeof(pfm_pmu_info_t); + ret=pfm_get_pmu_info(pmu_idx, &pinfo); + + if (ret==PFM_ERR_INVAL) { + break; + } + + if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) { + + pidx=pinfo.first_event; + SUBDBG("First event in pmu: %s is %#x\n", pinfo.name, pidx); + + if (pidx<0) { + /* For some reason no events available */ + /* despite the PMU being active. */ + /* This can happen, for example with ix86arch */ + /* inside of VMware */ + } + else { + SUBDBG("EXIT: pidx: %#x\n", pidx); + return pidx; + } + } + + pmu_idx++; + + } + + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + +} + + +/***********************************************************/ +/* Exported functions */ +/***********************************************************/ + +/** @class _pe_libpfm4_ntv_name_to_code + * @brief Take an event name and convert it to an event code. + * + * @param[in] *name + * -- name of event to convert + * @param[out] *event_code + * -- pointer to an integer to hold the event code + * @param[in] *cidx + * -- PAPI component index + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK event was found and an event assigned + * @retval PAPI_ENOEVENT event was not found + */ + +int +_pe_libpfm4_ntv_name_to_code( const char *name, unsigned int *event_code, + int cidx, + struct native_event_table_t *event_table) +{ + SUBDBG( "ENTER: name: %s, event_code: %p, *event_code: %#x, event_table: %p\n", name, event_code, *event_code, event_table); + + struct native_event_t *our_event; + int event_num; + + // if we already know this event name, just return its native code + event_num=find_existing_event(name, event_table); + if (event_num >= 0) { + *event_code=event_table->native_events[event_num].libpfm4_idx; + // the following call needs to happen to prevent the internal layer from creating a new papi native event table + _papi_hwi_set_papi_event_code(event_table->native_events[event_num].papi_event_code, 1); + SUBDBG("EXIT: Found papi_event_code: %#x, libpfm4_idx: %#x\n", event_table->native_events[event_num].papi_event_code, event_table->native_events[event_num].libpfm4_idx); + return PAPI_OK; + } + + // Try to allocate this event to see if it is known by libpfm4, if allocate fails tell the caller it is not valid + our_event=allocate_native_event(name, -1, cidx, event_table); + if (our_event==NULL) { + SUBDBG("EXIT: Allocating event: '%s' failed\n", name); + return PAPI_ENOEVNT; + } + + *event_code = our_event->libpfm4_idx; + SUBDBG("EXIT: Found code: %#x\n",*event_code); + return PAPI_OK; +} + + +/** @class _pe_libpfm4_ntv_code_to_name + * @brief Take an event code and convert it to a name + * + * @param[in] EventCode + * -- PAPI event code + * @param[out] *ntv_name + * -- pointer to a string to hold the name + * @param[in] len + * -- length of ntv_name string + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK The event was found and converted to a name + * @retval PAPI_ENOEVENT The event does not exist + * @retval PAPI_EBUF The event name was too big for ntv_name + */ + +int +_pe_libpfm4_ntv_code_to_name(unsigned int EventCode, + char *ntv_name, int len, + struct native_event_table_t *event_table) +{ + SUBDBG("ENTER: EventCode: %#x, ntv_name: %p, len: %d, event_table: %p\n", EventCode, ntv_name, len, event_table); + + int eidx; + int papi_event_code; + + // get the attribute index for this papi event + papi_event_code = _papi_hwi_get_papi_event_code(); + + // a papi event code less than 0 is invalid, return error + if (papi_event_code <= 0) { + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + } + + // find our native event table for this papi event code (search list backwards because it improves chances of finding it quickly) + for (eidx=event_table->num_native_events-1 ; eidx>=0 ; eidx--) { + if ((papi_event_code == event_table->native_events[eidx].papi_event_code) && (EventCode == ((unsigned)event_table->native_events[eidx].libpfm4_idx))) { + SUBDBG("Found native_event[%d]: papi_event_code: %#x, libpfm4_idx: %#x\n", eidx, event_table->native_events[eidx].papi_event_code, event_table->native_events[eidx].libpfm4_idx); + break; + } + } + + // if we did not find a match, return an error + if (eidx < 0) { + // If we did not find a match in our native event table, then the code passed in has not been + // allocated yet It should not be possible to get to this code. The user has to call the papi + // code_to_name api with a papi event code for a native event. But the only way to get one of + // those is to call either name_to_code or enum_cmp_events first. When one of these calls is + // done we allocate the event so it should always be there. + + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + } + + // if this event is defined by the default pmu, then use only the event name + // if it is not defined by the default pmu, then use both the pmu name and event name + char *ename; + if ((event_table->default_pmu.name) && (strcmp(event_table->default_pmu.name, event_table->native_events[eidx].pmu) == 0)) { + ename = event_table->native_events[eidx].base_name; + } else { + ename = event_table->native_events[eidx].pmu_plus_name; + } + + // if it will not fit, return error + if (strlen (ename) >= (unsigned)len) { + SUBDBG("EXIT: event name %s will not fit in buffer provided\n", ename); + return PAPI_EBUF; + } + strcpy (ntv_name, ename); + + // if this event had masks, also add their names + char *mname = event_table->native_events[eidx].mask_string; + if ((mname != NULL) && (strlen(mname) > 0)) { + if ((strlen(ename) + 8 + strlen(mname)) >= (unsigned)len) { + SUBDBG("EXIT: Not enough room for event and mask descriptions: need: %u, have: %u", (unsigned)(strlen(ename) + 8 + strlen(mname)), (unsigned)len); + return PAPI_EBUF; + } + strcat (ntv_name, ":"); + strcat (ntv_name, mname); + } + + SUBDBG("EXIT: event name: %s\n", ntv_name); + return PAPI_OK; +} + + +/** @class _pe_libpfm4_ntv_code_to_descr + * @brief Take an event code and convert it to a description + * + * @param[in] EventCode + * -- PAPI event code + * @param[out] *ntv_descr + * -- pointer to a string to hold the description + * @param[in] len + * -- length of ntv_descr string + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK The event was found and converted to a description + * @retval PAPI_ENOEVENT The event does not exist + * @retval PAPI_EBUF The event name was too big for ntv_descr + * + * Return the event description. + * If the event has umasks, then include ", masks" and the + * umask descriptions follow, separated by commas. + */ + + +int +_pe_libpfm4_ntv_code_to_descr( unsigned int EventCode, + char *ntv_descr, int len, + struct native_event_table_t *event_table) +{ + SUBDBG("ENTER: EventCode: %#x, ntv_descr: %p, len: %d: event_table: %p\n", EventCode, ntv_descr, len, event_table); + + int eidx; + int papi_event_code; + char *mdesc; + char *edesc; + + // get the attribute index for this papi event + papi_event_code = _papi_hwi_get_papi_event_code(); + + // a papi event code less than 0 is invalid, return error + if (papi_event_code <= 0) { + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + } + + // find our native event table for this papi event code (search list backwards because it improves chances of finding it quickly) + for (eidx=event_table->num_native_events-1 ; eidx>=0 ; eidx--) { + SUBDBG("native_event[%d]: papi_event_code: %#x, libpfm4_idx: %#x\n", eidx, event_table->native_events[eidx].papi_event_code, event_table->native_events[eidx].libpfm4_idx); + if ((papi_event_code == event_table->native_events[eidx].papi_event_code) && (EventCode == ((unsigned)event_table->native_events[eidx].libpfm4_idx))) { + break; + } + } + + // if we did not find a match, return an error + if (eidx < 0) { + // If we did not find a match in our native event table, then the code passed in has not been + // allocated yet It should not be possible to get to this code. The user has to call the papi + // code_to_name api with a papi event code for a native event. But the only way to get one of + // those is to call either name_to_code or enum_cmp_events first. When one of these calls is + // done we allocate the event so it should always be there. + + SUBDBG("EXIT: PAPI_ENOEVNT\n"); + return PAPI_ENOEVNT; + } + + edesc = event_table->native_events[eidx].event_description; + + // if it will not fit, return error + if (strlen (edesc) >= (unsigned)len) { + SUBDBG("EXIT: event name %s will not fit in buffer provided\n", edesc); + return PAPI_EBUF; + } + strcpy (ntv_descr, edesc); + + // if this event had masks, also add their descriptions + mdesc = event_table->native_events[eidx].mask_description; + if ((mdesc != NULL) && (strlen(mdesc) > 0)) { + if ((strlen(edesc) + 8 + strlen(mdesc)) >= (unsigned)len) { + SUBDBG("EXIT: Not enough room for event and mask descriptions: need: %u, have: %u", (unsigned)(strlen(edesc) + 8 + strlen(mdesc)), (unsigned)len); + return PAPI_EBUF; + } + strcat (ntv_descr, ", masks:"); + strcat (ntv_descr, mdesc); + } + + SUBDBG("EXIT: event description: %s\n", ntv_descr); + return PAPI_OK; +} + + +int +_pe_libpfm4_ntv_code_to_info(unsigned int EventCode, + PAPI_event_info_t *info, + struct native_event_table_t *event_table) +{ + SUBDBG("ENTER: EventCode: %#x, info: %p, event_table: %p\n", EventCode, info, event_table); + + int ret; + + // get the event name first + if ((ret = _pe_libpfm4_ntv_code_to_name(EventCode, info->symbol, sizeof(info->symbol), event_table)) != PAPI_OK) { + SUBDBG("EXIT: _pe_libpfm4_ntv_code_to_name returned: %d\n", ret); + return PAPI_ENOEVNT; + } + + if ((ret = _pe_libpfm4_ntv_code_to_descr(EventCode, info->long_descr, sizeof(info->long_descr), event_table)) != PAPI_OK) { + SUBDBG("EXIT: _pe_libpfm4_ntv_code_to_descr returned: %d\n", ret); + return PAPI_ENOEVNT; + } + + SUBDBG("EXIT: EventCode: %#x, name: %s, desc: %s\n", EventCode, info->symbol, info->long_descr); + return PAPI_OK; +} + + +/** @class _pe_libpfm4_ntv_enum_events + * @brief Walk through all events in a pre-defined order + * + * @param[in,out] *PapiEventCode + * -- PAPI event code to start with + * @param[in] modifier + * -- describe how to enumerate + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK The event was found and converted to a description + * @retval PAPI_ENOEVENT The event does not exist + * @retval PAPI_ENOIMPL The enumeration method requested in not implemented + * + */ + +int +_pe_libpfm4_ntv_enum_events( unsigned int *PapiEventCode, + int modifier, int cidx, + struct native_event_table_t *event_table) { + + SUBDBG("ENTER: PapiEventCode: %p, *PapiEventCode: %#x, modifier: %d, event_table: %p\n", PapiEventCode, *PapiEventCode, modifier, event_table); + + int code,ret, pnum; + int max_umasks; + char event_string[BUFSIZ]; + pfm_pmu_info_t pinfo; + pfm_event_info_t einfo; + struct native_event_t *our_event; + + /* return first event if so specified */ + if ( modifier == PAPI_ENUM_FIRST ) { + attr_idx = 0; // set so if they want attribute information, it will start with the first attribute + code=get_first_event_next_pmu(-1, event_table->pmu_type); + if (code < 0 ) { + SUBDBG("EXIT: Invalid component first event code: %d\n", code); + return code; + } + + // get the event information from libpfm4 (must zero structure) + memset( &einfo, 0, sizeof( pfm_event_info_t )); + einfo.size = sizeof(pfm_event_info_t); + if ((ret = pfm_get_event_info(code, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); + return PAPI_ENOIMPL; + } + + // get the pmu information from libpfm4 (must zero structure) + memset( &pinfo, 0, sizeof(pfm_pmu_info_t) ); + pinfo.size = sizeof(pfm_pmu_info_t); + ret=pfm_get_pmu_info(einfo.pmu, &pinfo); + if (ret!=PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_pmu_info returned: %d\n", ret); + return ret; + } + + // build full event name + sprintf (event_string, "%s::%s", pinfo.name, einfo.name); + SUBDBG("code: %#x, pmu: %s, event: %s, event_string: %s\n", code, pinfo.name, einfo.name, event_string); + + // go allocate this event, need to create tables used by the get event info call that will probably follow + if ((our_event = allocate_native_event(event_string, code, cidx, event_table)) == NULL) { + // allocate may have created the event table but returned NULL to tell the caller the event string was invalid (attempt to encode it failed). + // if the caller wants to use this event to count something, it will report an error + // but if the caller is just interested in listing the event, then we need an event table with an event name and libpfm4 index + int evt_idx; + if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { + SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); + return PAPI_ENOEVNT; + } + + // give back the new event code + *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; + SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + *PapiEventCode = our_event->libpfm4_idx; + + SUBDBG("EXIT: *PapiEventCode: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + /* Handle looking for the next event */ + if ( modifier == PAPI_ENUM_EVENTS ) { + attr_idx = 0; // set so if they want attribute information, it will start with the first attribute + + // get the next event code from libpfm4, if there are no more in this pmu find first event in next pmu + if ((code = pfm_get_event_next(*PapiEventCode)) < 0) { + + // get this events information from libpfm4, we need the pmu number of the last event we processed (table must be cleared) + memset( &einfo, 0, sizeof( pfm_event_info_t )); + einfo.size = sizeof(pfm_event_info_t); + if ((ret = pfm_get_event_info(*PapiEventCode, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); + return PAPI_ENOIMPL; + } + SUBDBG("*PapiEventCode: %#x, event: %s\n", *PapiEventCode, einfo.name); + + // get the pmu number of the last event + pnum = einfo.pmu; + + SUBDBG("pnum: %d\n", pnum); + code=get_first_event_next_pmu(pnum, event_table->pmu_type); + if (code < 0) { + SUBDBG("EXIT: No more PMUs to list, returning: %d\n", code); + return code; + } + } + + + // get the event information from libpfm4 (must zero structure) + memset( &einfo, 0, sizeof( pfm_event_info_t )); + einfo.size = sizeof(pfm_event_info_t); + if ((ret = pfm_get_event_info(code, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); + return PAPI_ENOIMPL; + } + + // get the pmu information from libpfm4 (must zero structure) + memset( &pinfo, 0, sizeof(pfm_pmu_info_t) ); + pinfo.size = sizeof(pfm_pmu_info_t); + ret=pfm_get_pmu_info(einfo.pmu, &pinfo); + if (ret!=PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_pmu_info returned: %d\n", ret); + return ret; + } + + // build full event name + sprintf (event_string, "%s::%s", pinfo.name, einfo.name); + SUBDBG("code: %#x, pmu: %s, event: %s, event_string: %s\n", code, pinfo.name, einfo.name, event_string); + + // go allocate this event, need to create tables used by the get event info call that will follow + if ((our_event = allocate_native_event(event_string, code, cidx, event_table)) == NULL) { + // allocate may have created the event table but returned NULL to tell the caller the event string was invalid (attempt to encode it failed). + // if the caller wants to use this event to count something, it will report an error + // but if the caller is just interested in listing the event, then we need an event table with an event name and libpfm4 index + int evt_idx; + if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { + SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); + return PAPI_ENOEVNT; + } + + // give back the new event code + *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; + SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + // give back the new event code + *PapiEventCode = our_event->libpfm4_idx; + + SUBDBG("EXIT: *PapiEventCode: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + /* We don't handle PAPI_NTV_ENUM_UMASK_COMBOS */ + if ( modifier == PAPI_NTV_ENUM_UMASK_COMBOS ) { + SUBDBG("EXIT: do not support umask combos yet\n"); + return PAPI_ENOIMPL; + } + + /* Enumerate PAPI_NTV_ENUM_UMASKS (umasks on an event) */ + if ( modifier == PAPI_NTV_ENUM_UMASKS ) { + // get this events information from libpfm4, we need the number of masks this event knows about (table must be cleared) + memset( &einfo, 0, sizeof( pfm_event_info_t )); + einfo.size = sizeof(pfm_event_info_t); + if ((ret = pfm_get_event_info(*PapiEventCode, PFM_OS_PERF_EVENT_EXT, &einfo)) != PFM_SUCCESS) { + SUBDBG("EXIT: pfm_get_event_info returned: %d\n", ret); + return PAPI_ENOIMPL; + } +// SUBDBG("*PapiEventCode: %#x, einfo.name: %s, einfo.code: %#x, einfo.nattrs: %d\n", *PapiEventCode, einfo.name, einfo.code, einfo.nattrs); + + // set max number of masks + max_umasks = einfo.nattrs; + + // if we reached last attribute, return error to show we are done with this events masks + if (attr_idx == max_umasks) { + SUBDBG("EXIT: already processed all umasks: attr_idx: %d\n", attr_idx); + return PAPI_ENOEVNT; + } + + // find the event table for this event, we need the pmu name and event name without any masks + int ntv_idx = _papi_hwi_get_ntv_idx(_papi_hwi_get_papi_event_code()); + if (ntv_idx < 0) { + SUBDBG("EXIT: _papi_hwi_get_ntv_idx returned: %d\n", ntv_idx); + return ntv_idx; + } + char *ename = event_table->native_events[ntv_idx].pmu_plus_name; + if ((ename == NULL) || (strlen(ename) >= sizeof(event_string))) { + SUBDBG("EXIT: Event name will not fit into buffer\n"); + return PAPI_EBUF; + } + strcpy (event_string, ename); + SUBDBG("event_string: %s\n", event_string); + + // go get the attribute information for this event + // libpfm4 likes the table cleared + pfm_event_attr_info_t ainfo; + memset (&ainfo, 0, sizeof(pfm_event_attr_info_t)); + ainfo.size = sizeof(pfm_event_attr_info_t); + ret = pfm_get_event_attr_info(*PapiEventCode, attr_idx, PFM_OS_PERF_EVENT_EXT, &ainfo); + if (ret != PFM_SUCCESS) { + SUBDBG("EXIT: Attribute info not found, EventCode: %#x, attr_idx: %d, ret: %d\n", *PapiEventCode, attr_idx, _papi_libpfm4_error(ret)); + return _papi_libpfm4_error(ret); + } + SUBDBG("*PapiEventCode: %#x, attr_idx: %d, type: %d, name: %s, description: %s\n", *PapiEventCode, attr_idx, ainfo.type, ainfo.name, ainfo.desc); + + if (strlen(event_string) + strlen(ainfo.name) + 35 > sizeof(event_string)) { + SUBDBG("EXIT: Event name and mask will not fit into buffer\n"); + return PAPI_EBUF; + } + + strcat (event_string, ":"); + strcat (event_string, ainfo.name); + switch (ainfo.type) { + case PFM_ATTR_UMASK: + break; + case PFM_ATTR_MOD_BOOL: + case PFM_ATTR_MOD_INTEGER: + // a few attributes require a non-zero value to encode correctly (most would accept zero here) + strcat(event_string,"=0"); + break; + default: + SUBDBG("EXIT: Unsupported attribute type: %d", ainfo.type); + return PAPI_EATTR; + } + + // go allocate this event, need to create tables used by the get event info call that will follow + if ((our_event = allocate_native_event(event_string, *PapiEventCode, cidx, event_table)) == NULL) { + // allocate may have created the event table but returned NULL to tell the caller the event string was invalid. + // if the caller wants to use this event to count something, it must report the error + // but if the caller is just interested in listing the event (like this code), then find the table that was created and return its libpfm4 index + int evt_idx; + if ((evt_idx = find_existing_event(event_string, event_table)) < 0) { + SUBDBG("EXIT: Allocating event: '%s' failed\n", event_string); + return PAPI_ENOEVNT; + } + // bump so next time we will use next attribute + attr_idx++; + // give back the new event code + *PapiEventCode = event_table->native_events[evt_idx].libpfm4_idx; + SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + // bump so next time we will use next attribute + attr_idx++; + + // give back the new event code + *PapiEventCode = our_event->libpfm4_idx; + + SUBDBG("EXIT: event code: %#x\n", *PapiEventCode); + return PAPI_OK; + } + + /* Enumerate PAPI_NTV_ENUM_GROUPS (groups on an event) */ + if ( modifier == PAPI_NTV_ENUM_GROUPS ) { + SUBDBG("EXIT: do not support enumerating groups in this component\n"); + return PAPI_ENOIMPL; + } + + /* An unknown enumeration method was indicated */ + + SUBDBG("EXIT: Invalid modifier argument provided\n"); + return PAPI_ENOIMPL; +} + + +/** @class _pe_libpfm4_shutdown + * @brief Shutdown any initialization done by the libpfm4 code + * + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK We always return PAPI_OK + * + */ + +int +_pe_libpfm4_shutdown(papi_vector_t *my_vector, + struct native_event_table_t *event_table) { + SUBDBG("ENTER: event_table: %p\n", event_table); + + int i; + + for (i=0 ; icmp_info.pmu_names[i] != NULL) { + free (my_vector->cmp_info.pmu_names[i]); + } + } + + /* clean out and free the native events structure */ + _papi_hwi_lock( NAMELIB_LOCK ); + + /* free memory allocated with strdup or malloc */ + for( i=0; inum_native_events; i++) { + free(event_table->native_events[i].base_name); + free(event_table->native_events[i].pmu_plus_name); + free(event_table->native_events[i].pmu); + free(event_table->native_events[i].allocated_name); + free(event_table->native_events[i].mask_string); + free(event_table->native_events[i].event_description); + if (event_table->native_events[i].mask_description != NULL) { + free(event_table->native_events[i].mask_description); + } + } + + free(event_table->native_events); + + _papi_hwi_unlock( NAMELIB_LOCK ); + + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + + +/** @class _pe_libpfm4_init + * @brief Initialize the libpfm4 code + * + * @param[in] component + * -- pointer to component structure + * @param[in] event_table + * -- native event table structure + * + * @retval PAPI_OK We initialized correctly + * @retval PAPI_ECMP There was an error initializing the component + * + */ + +int +_pe_libpfm4_init(papi_vector_t *component, int cidx, + struct native_event_table_t *event_table, + int pmu_type) { + + int detected_pmus=0, found_default=0; + int i; + int j=0; + unsigned int ncnt; + + pfm_err_t retval = PFM_SUCCESS; + pfm_pmu_info_t pinfo; + + /* allocate the native event structure */ + event_table->num_native_events=0; + event_table->pmu_type=pmu_type; + + event_table->native_events=calloc(NATIVE_EVENT_CHUNK, + sizeof(struct native_event_t)); + if (event_table->native_events==NULL) { + strncpy(component->cmp_info.disabled_reason, + "calloc NATIVE_EVENT_CHUNK failed",PAPI_MAX_STR_LEN); + return PAPI_ENOMEM; + } + + event_table->allocated_native_events=NATIVE_EVENT_CHUNK; + + /* Count number of present PMUs */ + detected_pmus=0; + ncnt=0; + + /* init default pmu */ + /* need to init pinfo or pfmlib might complain */ + memset(&(event_table->default_pmu), 0, sizeof(pfm_pmu_info_t)); + event_table->default_pmu.size = sizeof(pfm_pmu_info_t); + retval=pfm_get_pmu_info(0, &(event_table->default_pmu)); + + SUBDBG("Detected pmus:\n"); + i=0; + while(1) { + memset(&pinfo,0,sizeof(pfm_pmu_info_t)); + pinfo.size = sizeof(pfm_pmu_info_t); + retval=pfm_get_pmu_info(i, &pinfo); + + /* We're done if we hit an invalid PMU entry */ + /* We can't check against PFM_PMU_MAX as that might not */ + /* match if libpfm4 is dynamically linked */ + + if (retval==PFM_ERR_INVAL) { + break; + } + + if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) && + (pmu_is_present_and_right_type(&pinfo,pmu_type))) { + + SUBDBG("\t%d %s %s %d\n",i, + pinfo.name,pinfo.desc,pinfo.type); + + detected_pmus++; + ncnt+=pinfo.nevents; + + if (j < PAPI_PMU_MAX) { + component->cmp_info.pmu_names[j++] = + strdup(pinfo.name); + } + + if (pmu_type & PMU_TYPE_CORE) { + + /* Hack to have "default core" PMU */ + if ( (pinfo.type==PFM_PMU_TYPE_CORE) && + strcmp(pinfo.name,"ix86arch")) { + + SUBDBG("\t %s is default\n",pinfo.name); + memcpy(&(event_table->default_pmu), + &pinfo,sizeof(pfm_pmu_info_t)); + found_default++; + } + } + + if (pmu_type==PMU_TYPE_UNCORE) { + /* To avoid confusion, no "default" CPU for uncore */ + found_default=1; + } + } + i++; + } + SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); + + if (detected_pmus==0) { + SUBDBG("Could not find any PMUs\n"); + return PAPI_ENOSUPP; + } + + if (!found_default) { + strncpy(component->cmp_info.disabled_reason, + "could not find default PMU",PAPI_MAX_STR_LEN); + return PAPI_ECMP; + } + + if (found_default>1) { + strncpy(component->cmp_info.disabled_reason, + "found more than one default PMU",PAPI_MAX_STR_LEN); + return PAPI_ECOUNT; + } + + component->cmp_info.num_native_events = ncnt; + + component->cmp_info.num_cntrs = event_table->default_pmu.num_cntrs+ + event_table->default_pmu.num_fixed_cntrs; + + SUBDBG( "num_counters: %d\n", component->cmp_info.num_cntrs ); + + /* Setup presets, only if Component 0 and default core PMU */ + if ((cidx==0) && (found_default)) { + retval = _papi_load_preset_table( (char *)event_table->default_pmu.name, + event_table->default_pmu.pmu, cidx ); + if ( retval!=PAPI_OK ) { + strncpy(component->cmp_info.disabled_reason,"_papi_load_preset_table failed",PAPI_MAX_STR_LEN); + return PAPI_ENOEVNT; + } + } + + return PAPI_OK; +} + +/** @class _peu_libpfm4_init + * @brief Initialize the libpfm4 code + * + * @param[in] event_table + * -- native event table struct + * + * @retval PAPI_OK We initialized correctly + * @retval PAPI_ECMP There was an error initializing the component + * + */ + +int +_peu_libpfm4_init(papi_vector_t *my_vector, int cidx, + struct native_event_table_t *event_table, + int pmu_type) { + + int detected_pmus=0; + int i; + int j=0; + pfm_err_t retval = PFM_SUCCESS; + unsigned int ncnt; + pfm_pmu_info_t pinfo; + + (void)cidx; + + /* allocate the native event structure */ + + event_table->num_native_events=0; + event_table->pmu_type=pmu_type; + + event_table->native_events=calloc(NATIVE_EVENT_CHUNK, + sizeof(struct native_event_t)); + if (event_table->native_events==NULL) { + return PAPI_ENOMEM; + } + event_table->allocated_native_events=NATIVE_EVENT_CHUNK; + + /* Count number of present PMUs */ + detected_pmus=0; + ncnt=0; + + my_vector->cmp_info.num_cntrs=0; + + SUBDBG("Detected pmus:\n"); + i=0; + while(1) { + memset(&pinfo,0,sizeof(pfm_pmu_info_t)); + pinfo.size = sizeof(pfm_pmu_info_t); + retval=pfm_get_pmu_info(i, &pinfo); + + /* We're done if we hit an invalid PMU entry */ + /* We can't check against PFM_PMU_MAX */ + /* as that might not match if libpfm4 is dynamically linked */ + + if (retval==PFM_ERR_INVAL) { + break; + } + + if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) && + (pmu_is_present_and_right_type(&pinfo,pmu_type))) { + + SUBDBG("\t%d %s %s %d\n",i,pinfo.name,pinfo.desc,pinfo.type); + + detected_pmus++; + ncnt+=pinfo.nevents; + + if ((j < PAPI_PMU_MAX) && (pinfo.name != NULL)) { + my_vector->cmp_info.pmu_names[j++] = strdup(pinfo.name); + } + my_vector->cmp_info.num_cntrs += pinfo.num_cntrs+ + pinfo.num_fixed_cntrs; + } + i++; + } + SUBDBG("%d native events detected on %d pmus\n",ncnt,detected_pmus); + + my_vector->cmp_info.num_native_events = ncnt; + + SUBDBG( "num_counters: %d\n", my_vector->cmp_info.num_cntrs ); + + return PAPI_OK; +} + diff --git a/src/components/perf_event/pe_libpfm4_events.h b/src/components/perf_event/pe_libpfm4_events.h new file mode 100644 index 0000000..801dd54 --- /dev/null +++ b/src/components/perf_event/pe_libpfm4_events.h @@ -0,0 +1,32 @@ +/* +* File: pe_libpfm4_events.h +*/ + +/* Prototypes for libpfm name library access */ + +int _pe_libpfm4_setup_presets( char *name, int type, int cidx ); +int _pe_libpfm4_ntv_enum_events( unsigned int *EventCode, int modifier, int cidx, + struct native_event_table_t *event_table); +int _pe_libpfm4_ntv_name_to_code( const char *ntv_name, + unsigned int *EventCode, int cidx, + struct native_event_table_t *event_table); +int _pe_libpfm4_ntv_code_to_name( unsigned int EventCode, char *name, + int len, + struct native_event_table_t *event_table); +int _pe_libpfm4_ntv_code_to_descr( unsigned int EventCode, char *name, + int len, + struct native_event_table_t *event_table); +int _pe_libpfm4_shutdown(papi_vector_t *my_vector, + struct native_event_table_t *event_table); + +int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, + PAPI_event_info_t *info, + struct native_event_table_t *event_table); + +int _pe_libpfm4_init(papi_vector_t *my_vector, int cidx, + struct native_event_table_t *event_table, + int pmu_type); + +int _peu_libpfm4_init(papi_vector_t *my_vector, int cidx, + struct native_event_table_t *event_table, + int pmu_type); diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c new file mode 100644 index 0000000..9e9c852 --- /dev/null +++ b/src/components/perf_event/perf_event.c @@ -0,0 +1,2610 @@ +/* +* File: perf_event.c +* +* Author: Corey Ashford +* cjashfor@us.ibm.com +* - based upon perfmon.c written by - +* Philip Mucci +* mucci@cs.utk.edu +* Mods: Gary Mohr +* gary.mohr@bull.com +* Mods: Vince Weaver +* vweaver1@eecs.utk.edu +* Mods: Philip Mucci +* mucci@eecs.utk.edu +* Mods: Gary Mohr +* gary.mohr@bull.com +* Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4. +* This adds several new event masks, including cpu=, u=, and k= which give the user +* the ability to set cpu number to use or control the domain (user, kernel, or both) +* in which the counter should be incremented. These are event masks so it is now +* possible to have multiple events in the same event set that count activity from +* differennt cpu's or count activity in different domains. +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include + +/* PAPI-specific includes */ +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "extras.h" + +/* libpfm4 includes */ +#include "papi_libpfm4_events.h" +#include "pe_libpfm4_events.h" +#include "perfmon/pfmlib.h" +#include PEINCLUDE + +/* Linux-specific includes */ +#include "mb.h" +#include "linux-memory.h" +#include "linux-timer.h" +#include "linux-common.h" +#include "linux-context.h" + +#include "perf_event_lib.h" +#include "perf_helpers.h" + +/* Set to enable pre-Linux 2.6.34 perf_event workarounds */ +/* If disabling them gets no complaints then we can remove */ +/* These in a future version of PAPI. */ +#define OBSOLETE_WORKAROUNDS 0 + +/* Defines for ctx->state */ +#define PERF_EVENTS_OPENED 0x01 +#define PERF_EVENTS_RUNNING 0x02 + +/* Forward declaration */ +papi_vector_t _perf_event_vector; + +/* Globals */ +struct native_event_table_t perf_native_event_table; +static int our_cidx; +static int exclude_guest_unsupported; + +/* The kernel developers say to never use a refresh value of 0 */ +/* See https://lkml.org/lkml/2011/5/24/172 */ +/* However, on some platforms (like Power) a value of 1 does not work */ +/* We're still tracking down why this happens. */ + +#if defined(__powerpc__) +#define PAPI_REFRESH_VALUE 0 +#else +#define PAPI_REFRESH_VALUE 1 +#endif + +static int _pe_set_domain( hwd_control_state_t *ctl, int domain); + +#if (OBSOLETE_WORKAROUNDS==1) + +/* Check for processor support */ +/* Can be used for generic checking, though in general we only */ +/* check for pentium4 here because support was broken for multiple */ +/* kernel releases and the usual standard detections did not */ +/* handle this. So we check for pentium 4 explicitly. */ +static int +processor_supported(int vendor, int family) { + + /* Error out if kernel too early to support p4 */ + if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) { + if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) { + PAPIERROR("Pentium 4 not supported on kernels before 2.6.35"); + return PAPI_ENOSUPP; + } + } + return PAPI_OK; +} + +#endif + +/* Fix up the config based on what CPU/Vendor we are running on */ +static int +pe_vendor_fixups(papi_vector_t *vector) +{ + /* powerpc */ + /* On IBM and Power6 Machines default domain should include supervisor */ + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_IBM ) { + vector->cmp_info.available_domains |= + PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; + if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) { + vector->cmp_info.default_domain = + PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; + } + } + + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_MIPS ) { + vector->cmp_info.available_domains |= PAPI_DOM_KERNEL; + } + + if ((_papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL) || + (_papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_AMD)) { + vector->cmp_info.fast_real_timer = 1; + } + + /* ARM */ + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM) { + + /* Some ARMv7 and earlier could not measure */ + /* KERNEL and USER separately. */ + + /* Whitelist CortexA7 and CortexA15 */ + /* There might be more */ + + if ((_papi_hwi_system_info.hw_info.cpuid_family < 8) && + (_papi_hwi_system_info.hw_info.cpuid_model!=0xc07) && + (_papi_hwi_system_info.hw_info.cpuid_model!=0xc0f)) { + + vector->cmp_info.available_domains |= + PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; + vector->cmp_info.default_domain = + PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; + } + } + + /* CRAY */ + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_CRAY ) { + vector->cmp_info.available_domains |= PAPI_DOM_OTHER; + } + + return PAPI_OK; +} + + + +/******************************************************************/ +/******** Kernel Version Dependent Routines **********************/ +/******************************************************************/ + + +/* PERF_FORMAT_GROUP allows reading an entire group's counts at once */ +/* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */ +/* from attached processes. We are lazy and disable it for all cases */ +/* commit was: 050735b08ca8a016bbace4445fa025b88fee770b */ + +static int +bug_format_group(void) { + + +#if (OBSOLETE_WORKAROUNDS==1) + if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1; +#endif + + /* MIPS, as of version 3.1, does not support this properly */ + /* FIXME: is this still true? */ + +#if defined(__mips__) + return 1; +#endif + + return 0; + +} + +#if (OBSOLETE_WORKAROUNDS==1) + + +/* There's a bug prior to Linux 2.6.33 where if you are using */ +/* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and */ +/* TOTAL_TIME_RUNNING fields will be zero unless you disable */ +/* the counters first */ +static int +bug_sync_read(void) { + + if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1; + + return 0; + +} + +#endif + +/* Set the F_SETOWN_EX flag on the fd. */ +/* This affects which thread an overflow signal gets sent to */ +/* Handled in a subroutine to handle the fact that the behavior */ +/* is dependent on kernel version. */ +static int +fcntl_setown_fd(int fd) { + + int ret; + struct f_owner_ex fown_ex; + + /* F_SETOWN_EX is not available until 2.6.32 */ + /* but PAPI perf_event support didn't work on 2.6.31 anyay */ + + /* set ownership of the descriptor */ + fown_ex.type = F_OWNER_TID; + fown_ex.pid = mygettid(); + ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex ); + + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s", + fd, strerror( errno ) ); + return PAPI_ESYS; + } + return PAPI_OK; +} + +/* The read format on perf_event varies based on various flags that */ +/* are passed into it. This helper avoids copying this logic */ +/* multiple places. */ +static unsigned int +get_read_format( unsigned int multiplex, + unsigned int inherit, + int format_group ) +{ + unsigned int format = 0; + + /* if we need read format options for multiplexing, add them now */ + if (multiplex) { + format |= PERF_FORMAT_TOTAL_TIME_ENABLED; + format |= PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* if our kernel supports it and we are not using inherit, */ + /* add the group read options */ + if ( (!bug_format_group()) && !inherit) { + if (format_group) { + format |= PERF_FORMAT_GROUP; + } + } + + SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n", + multiplex, inherit, format_group, format); + + return format; +} + + +/* attr.exclude_guest is enabled by default in recent libpfm4 */ +/* however older kernels will reject events with it set */ +/* because the reserved field is not all zeros */ +static int +check_exclude_guest( void ) +{ + int ev_fd; + struct perf_event_attr attr; + + exclude_guest_unsupported=0; + + /* First check that we can open a plain instructions event */ + memset(&attr, 0 , sizeof(attr)); + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + + ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 ); + if ( ev_fd == -1 ) { + PAPIERROR("Couldn't open hw_instructions in exclude_guest=0 test"); + return -1; + } + close(ev_fd); + + /* Now try again with excude_guest */ + memset(&attr, 0 , sizeof(attr)); + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.exclude_guest=1; + + ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 ); + if ( ev_fd == -1 ) { + if (errno==EINVAL) { + exclude_guest_unsupported=1; + } + else { + PAPIERROR("Couldn't open hw_instructions in exclude_guest=1 test"); + } + } else { + exclude_guest_unsupported=0; + close(ev_fd); + } + + return PAPI_OK; +} + +/*****************************************************************/ +/********* End Kernel-version Dependent Routines ****************/ +/*****************************************************************/ + +/*****************************************************************/ +/********* Begin perf_event low-level code ***********************/ +/*****************************************************************/ + +static void perf_event_dump_attr( struct perf_event_attr *hw_event, + pid_t pid, int cpu, int group_fd, unsigned long int flags) { + + /* Mark parameters as not used */ + /* In the common case (no SUBDBG) the function */ + /* compiles into an empty function and complains */ + /* about unused variables. */ + (void)hw_event; + (void)pid; + (void)cpu; + (void)group_fd; + (void)flags; + + SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, " + "group_fd: %d, flags: %lx\n", + hw_event, pid, cpu, group_fd, flags); + SUBDBG(" type: %d\n",hw_event->type); + SUBDBG(" size: %d\n",hw_event->size); + SUBDBG(" config: %"PRIx64" (%"PRIu64")\n", + hw_event->config, hw_event->config); + SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period); + SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type); + SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format); + SUBDBG(" disabled: %d\n",hw_event->disabled); + SUBDBG(" inherit: %d\n",hw_event->inherit); + SUBDBG(" pinned: %d\n",hw_event->pinned); + SUBDBG(" exclusive: %d\n",hw_event->exclusive); + SUBDBG(" exclude_user: %d\n",hw_event->exclude_user); + SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel); + SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv); + SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle); + SUBDBG(" mmap: %d\n",hw_event->mmap); + SUBDBG(" comm: %d\n",hw_event->comm); + SUBDBG(" freq: %d\n",hw_event->freq); + SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat); + SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec); + SUBDBG(" task: %d\n",hw_event->task); + SUBDBG(" watermark: %d\n",hw_event->watermark); + SUBDBG(" precise_ip: %d\n",hw_event->precise_ip); + SUBDBG(" mmap_data: %d\n",hw_event->mmap_data); + SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all); + SUBDBG(" exclude_host: %d\n",hw_event->exclude_host); + SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest); + SUBDBG(" exclude_callchain_kernel: %d\n", + hw_event->exclude_callchain_kernel); + SUBDBG(" exclude_callchain_user: %d\n", + hw_event->exclude_callchain_user); + SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n", + hw_event->wakeup_events, hw_event->wakeup_events); + SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n", + hw_event->bp_type, hw_event->bp_type); + SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n", + hw_event->config1, hw_event->config1); + SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n", + hw_event->config2, hw_event->config2); + SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n", + hw_event->branch_sample_type, hw_event->branch_sample_type); + SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n", + hw_event->sample_regs_user, hw_event->sample_regs_user); + SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n", + hw_event->sample_stack_user, hw_event->sample_stack_user); +} + + +static int map_perf_event_errors_to_papi(int perf_event_error) { + + int ret; + + /* These mappings are approximate. + EINVAL in particular can mean lots of different things */ + switch(perf_event_error) { + case EPERM: + case EACCES: + ret = PAPI_EPERM; + break; + case ENODEV: + case EOPNOTSUPP: + ret = PAPI_ENOSUPP; + break; + case ENOENT: + ret = PAPI_ENOEVNT; + break; + case ENOSYS: + case EAGAIN: + case EBUSY: + case E2BIG: /* Only happens if attr is the wrong size somehow */ + case EBADF: /* We are attempting to group with an invalid file descriptor */ + ret = PAPI_ESYS; + break; + case ENOMEM: + ret = PAPI_ENOMEM; + break; + case EMFILE: /* Out of file descriptors. Typically max out at 1024 */ + ret = PAPI_ECOUNT; + break; + case EINVAL: + default: + ret = PAPI_EINVAL; + break; + } + return ret; +} + + +/** Check if the current set of options is supported by */ +/* perf_events. */ +/* We do this by temporarily opening an event with the */ +/* desired options then closing it again. We use the */ +/* PERF_COUNT_HW_INSTRUCTION event as a dummy event */ +/* on the assumption it is available on all */ +/* platforms. */ + +static int +check_permissions( unsigned long tid, + unsigned int cpu_num, + unsigned int domain, + unsigned int granularity, + unsigned int multiplex, + unsigned int inherit ) +{ + int ev_fd; + struct perf_event_attr attr; + + long pid; + + /* clearing this will set a type of hardware and to count all domains */ + memset(&attr, '\0', sizeof(attr)); + attr.read_format = get_read_format(multiplex, inherit, 1); + + /* set the event id (config field) to instructios */ + /* (an event that should always exist) */ + /* This was cycles but that is missing on Niagara */ + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + + /* now set up domains this event set will be counting */ + if (!(domain & PAPI_DOM_SUPERVISOR)) { + attr.exclude_hv = 1; + } + if (!(domain & PAPI_DOM_USER)) { + attr.exclude_user = 1; + } + if (!(domain & PAPI_DOM_KERNEL)) { + attr.exclude_kernel = 1; + } + + if (granularity==PAPI_GRN_SYS) { + pid = -1; + } else { + pid = tid; + } + + SUBDBG("Calling sys_perf_event_open() from check_permissions\n"); + + perf_event_dump_attr( &attr, pid, cpu_num, -1, 0 ); + + ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 ); + if ( ev_fd == -1 ) { + SUBDBG("sys_perf_event_open returned error. Linux says, %s", + strerror( errno ) ); + return map_perf_event_errors_to_papi(errno); + } + + /* now close it, this was just to make sure we have permissions */ + /* to set these options */ + close(ev_fd); + return PAPI_OK; +} + +/* Maximum size we ever expect to read from a perf_event fd */ +/* (this is the number of 64-bit values) */ +/* We use this to size the read buffers */ +/* The three is for event count, time_enabled, time_running */ +/* and the counter term is count value and count id for each */ +/* possible counter value. */ +#define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS)) + + + +/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */ +/* implementations (e.g. x86 before 2.6.33) which don't do a static event */ +/* scheduability check in sys_perf_event_open. It is also needed if the */ +/* kernel is stealing an event, such as when NMI watchdog is enabled. */ + +static int +check_scheduability( pe_context_t *ctx, pe_control_t *ctl, int idx ) +{ + int retval = 0, cnt = -1; + ( void ) ctx; /*unused */ + long long papi_pe_buffer[READ_BUFFER_SIZE]; + int i,group_leader_fd; + + /* If the kernel isn't tracking scheduability right */ + /* Then we need to start/stop/read to force the event */ + /* to be scheduled and see if an error condition happens. */ + + /* get the proper fd to start */ + group_leader_fd=ctl->events[idx].group_leader_fd; + if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd; + + /* start the event */ + retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL ); + if (retval == -1) { + PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); + return PAPI_ESYS; + } + + /* stop the event */ + retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL ); + if (retval == -1) { + PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" ); + return PAPI_ESYS; + } + + /* See if a read returns any results */ + cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer)); + if ( cnt == -1 ) { + SUBDBG( "read returned an error! Should never happen.\n" ); + return PAPI_ESYS; + } + + if ( cnt == 0 ) { + /* We read 0 bytes if we could not schedule the event */ + /* The kernel should have detected this at open */ + /* but various bugs (including NMI watchdog) */ + /* result in this behavior */ + + return PAPI_ECNFLCT; + + } else { + + /* Reset all of the counters (opened so far) back to zero */ + /* from the above brief enable/disable call pair. */ + + /* We have to reset all events because reset of group leader */ + /* does not reset all. */ + /* we assume that the events are being added one by one and that */ + /* we do not need to reset higher events (doing so may reset ones */ + /* that have not been initialized yet. */ + + /* Note... PERF_EVENT_IOC_RESET does not reset time running */ + /* info if multiplexing, so we should avoid coming here if */ + /* we are multiplexing the event. */ + for( i = 0; i < idx; i++) { + retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); + if (retval == -1) { + PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d " + "(fd %d)failed", + i,ctl->num_events,idx,ctl->events[i].event_fd); + return PAPI_ESYS; + } + } + } + return PAPI_OK; +} + + +/* Do some extra work on a perf_event fd if we're doing sampling */ +/* This mostly means setting up the mmap buffer. */ +static int +configure_fd_for_sampling( pe_control_t *ctl, int evt_idx ) +{ + int ret; + int fd = ctl->events[evt_idx].event_fd; + + /* Register that we would like a SIGIO notification when a mmap'd page */ + /* becomes full. */ + ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK ); + if ( ret ) { + PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) " + "returned error: %s", fd, strerror( errno ) ); + return PAPI_ESYS; + } + + /* Set the F_SETOWN_EX flag on the fd. */ + /* This affects which thread an overflow signal gets sent to. */ + ret=fcntl_setown_fd(fd); + if (ret!=PAPI_OK) return ret; + + /* Set FD_CLOEXEC. Otherwise if we do an exec with an overflow */ + /* running, the overflow handler will continue into the exec()'d*/ + /* process and kill it because no signal handler is set up. */ + ret=fcntl(fd, F_SETFD, FD_CLOEXEC); + if (ret) { + return PAPI_ESYS; + } + + /* when you explicitely declare that you want a particular signal, */ + /* even with you use the default signal, the kernel will send more */ + /* information concerning the event to the signal handler. */ + /* */ + /* In particular, it will send the file descriptor from which the */ + /* event is originating which can be quite useful when monitoring */ + /* multiple tasks from a single thread. */ + ret = fcntl( fd, F_SETSIG, ctl->overflow_signal ); + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s", + ctl->overflow_signal, fd, + strerror( errno ) ); + return PAPI_ESYS; + } + + return PAPI_OK; +} + +static int +set_up_mmap( pe_control_t *ctl, int evt_idx) +{ + + void *buf_addr; + int fd = ctl->events[evt_idx].event_fd; + + /* mmap() the sample buffer */ + buf_addr = mmap( NULL, + ctl->events[evt_idx].nr_mmap_pages * getpagesize(), + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, 0 ); + + /* This may happen if we go over the limit in */ + /* /proc/sys/kernel/perf_event_mlock_kb */ + /* which defaults to 516k */ + /* with regular rdpmc events on 4k page archs */ + /* this is roughly 128 events */ + + /* We sholdn't fail, just fall back to non-rdpmc */ + /* Although not sure what happens if it's a sample */ + /* event that fails to mmap. */ + + if ( buf_addr == MAP_FAILED ) { + SUBDBG( "mmap(NULL,%d,%d,%d,%d,0): %s", + ctl->events[evt_idx].nr_mmap_pages * getpagesize(), + PROT_READ | PROT_WRITE, + MAP_SHARED, + fd, strerror( errno ) ); + + ctl->events[evt_idx].mmap_buf = NULL; + + /* Easier to just globally disable this, as it should */ + /* be a fairly uncommon case hopefully. */ + if (_perf_event_vector.cmp_info.fast_counter_read) { + PAPIERROR("Can't mmap, disabling fast_counter_read\n"); + _perf_event_vector.cmp_info.fast_counter_read=0; + } + return PAPI_ESYS; + } + + SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr ); + + /* Set up the mmap buffer and its associated helpers */ + ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr; + ctl->events[evt_idx].tail = 0; + ctl->events[evt_idx].mask = + ( ctl->events[evt_idx].nr_mmap_pages - 1 ) * getpagesize() - 1; + + return PAPI_OK; +} + + + +/* Open all events in the control state */ +static int +open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) +{ + + int i, ret = PAPI_OK; + long pid; + + if (ctl->granularity==PAPI_GRN_SYS) { + pid = -1; + } + else { + pid = ctl->tid; + } + + for( i = 0; i < ctl->num_events; i++ ) { + + ctl->events[i].event_opened=0; + + /* set up the attr structure. */ + /* We don't set up all fields here */ + /* as some have already been set up previously. */ + + /* Handle the broken exclude_guest problem */ + /* libpfm4 sets this by default (PEBS events depend on it) */ + /* but on older kernels that dont know about exclude_guest */ + /* perf_event_open() will error out as a "reserved" */ + /* unknown bit is set to 1. */ + /* Do we need to also watch for exclude_host, exclude_idle */ + /* exclude_callchain*? */ + if ((ctl->events[i].attr.exclude_guest) && + (exclude_guest_unsupported)) { + SUBDBG("Disabling exclude_guest in event %d\n",i); + ctl->events[i].attr.exclude_guest=0; + } + + /* group leader (event 0) is special */ + /* If we're multiplexed, everyone is a group leader */ + if (( i == 0 ) || (ctl->multiplexed)) { + ctl->events[i].attr.pinned = !ctl->multiplexed; + ctl->events[i].attr.disabled = 1; + ctl->events[i].group_leader_fd=-1; + ctl->events[i].attr.read_format = get_read_format( + ctl->multiplexed, + ctl->inherit, + !ctl->multiplexed ); + } else { + ctl->events[i].attr.pinned=0; + ctl->events[i].attr.disabled = 0; + ctl->events[i].group_leader_fd=ctl->events[0].event_fd; + ctl->events[i].attr.read_format = get_read_format( + ctl->multiplexed, + ctl->inherit, + 0 ); + } + + /* try to open */ + perf_event_dump_attr( + &ctl->events[i].attr, + pid, + ctl->events[i].cpu, + ctl->events[i].group_leader_fd, + 0 /* flags */ ); + + ctl->events[i].event_fd = sys_perf_event_open( + &ctl->events[i].attr, + pid, + ctl->events[i].cpu, + ctl->events[i].group_leader_fd, + 0 /* flags */ ); + + /* Try to match Linux errors to PAPI errors */ + if ( ctl->events[i].event_fd == -1 ) { + SUBDBG("sys_perf_event_open returned error " + "on event #%d. Error: %s\n", + i, strerror( errno ) ); + ret=map_perf_event_errors_to_papi(errno); + + goto open_pe_cleanup; + } + + SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d," + " group_leader/fd: %d, event_fd: %d," + " read_format: %"PRIu64"\n", + pid, ctl->events[i].cpu, + ctl->events[i].group_leader_fd, + ctl->events[i].event_fd, + ctl->events[i].attr.read_format); + + + /* in many situations the kernel will indicate we opened fine */ + /* yet things will fail later. So we need to double check */ + /* we actually can use the events we've set up. */ + + /* This is not necessary if we are multiplexing, and in fact */ + /* we cannot do this properly if multiplexed because */ + /* PERF_EVENT_IOC_RESET does not reset the time running info */ + if (!ctl->multiplexed) { + ret = check_scheduability( ctx, ctl, i ); + + if ( ret != PAPI_OK ) { + /* the last event did open, so we need to */ + /* bump the counter before doing the cleanup */ + i++; + goto open_pe_cleanup; + } + } + ctl->events[i].event_opened=1; + } + + /* Now that we've successfully opened all of the events, do whatever */ + /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */ + /* and so on. */ + + + /* Make things easier and give each event a mmap() buffer */ + /* Keeping separate tracking for rdpmc vs regular events */ + /* Would be a pain. Also perf always gives every event a */ + /* mmap buffer. */ + + for ( i = 0; i < ctl->num_events; i++ ) { + + /* Can't mmap() inherited events :( */ + if (ctl->inherit) { + ctl->events[i].nr_mmap_pages = 0; + ctl->events[i].mmap_buf = NULL; + } + else { + /* Just a guess at how many pages would make this */ + /* relatively efficient. */ + /* Note that it's "1 +" because of the need for a */ + /* control page, and the number following the "+" */ + /* must be a power of 2 (1, 4, 8, 16, etc) or zero. */ + /* This is required to optimize dealing with */ + /* circular buffer wrapping of the mapped pages. */ + if (ctl->events[i].sampling) { + ctl->events[i].nr_mmap_pages = 1 + 2; + } + else if (_perf_event_vector.cmp_info.fast_counter_read) { + ctl->events[i].nr_mmap_pages = 1; + } + else { + ctl->events[i].nr_mmap_pages = 0; + } + + /* Set up the MMAP sample pages */ + if (ctl->events[i].nr_mmap_pages) { + set_up_mmap(ctl,i); + } else { + ctl->events[i].mmap_buf = NULL; + } + } + } + + for ( i = 0; i < ctl->num_events; i++ ) { + + /* If sampling is enabled, hook up signal handler */ + if (ctl->events[i].attr.sample_period) { + + ret = configure_fd_for_sampling( ctl, i ); + if ( ret != PAPI_OK ) { + /* We failed, and all of the fds are open */ + /* so we need to clean up all of them */ + i = ctl->num_events; + goto open_pe_cleanup; + } + } + } + + /* Set num_evts only if completely successful */ + ctx->state |= PERF_EVENTS_OPENED; + + return PAPI_OK; + +open_pe_cleanup: + /* We encountered an error, close up the fds we successfully opened. */ + /* We go backward in an attempt to close group leaders last, although */ + /* That's probably not strictly necessary. */ + while ( i > 0 ) { + i--; + if (ctl->events[i].event_fd>=0) { + close( ctl->events[i].event_fd ); + ctl->events[i].event_opened=0; + } + } + + return ret; +} + +/* TODO: make code clearer -- vmw */ +static int +close_event( pe_event_info_t *event ) +{ + int munmap_error=0,close_error=0; + + if ( event->mmap_buf ) { + if (event->nr_mmap_pages==0) { + PAPIERROR("munmap and num pages is zero"); + } + if ( munmap ( event->mmap_buf, + event->nr_mmap_pages * getpagesize() ) ) { + PAPIERROR( "munmap of fd = %d returned error: %s", + event->event_fd, + strerror( errno ) ); + event->mmap_buf=NULL; + munmap_error=1; + } + } + if ( close( event->event_fd ) ) { + PAPIERROR( "close of fd = %d returned error: %s", + event->event_fd, strerror( errno ) ); + close_error=1; + } + + event->event_opened=0; + + if ((close_error || munmap_error)) { + return PAPI_ESYS; + } + + return 0; +} + +/* Close all of the opened events */ +static int +close_pe_events( pe_context_t *ctx, pe_control_t *ctl ) +{ + int i,result; + int num_closed=0; + int events_not_opened=0; + + /* should this be a more serious error? */ + if ( ctx->state & PERF_EVENTS_RUNNING ) { + SUBDBG("Closing without stopping first\n"); + } + + /* Close child events first */ + /* Is that necessary? -- vmw */ + for( i=0; inum_events; i++ ) { + if (ctl->events[i].event_opened) { + if (ctl->events[i].group_leader_fd!=-1) { + result=close_event(&ctl->events[i]); + if (result!=0) return result; + else num_closed++; + } + } + else { + events_not_opened++; + } + } + + /* Close the group leaders last */ + for( i=0; inum_events; i++ ) { + if (ctl->events[i].event_opened) { + if (ctl->events[i].group_leader_fd==-1) { + result=close_event(&ctl->events[i]); + if (result!=0) return result; + else num_closed++; + } + } + } + + if (ctl->num_events!=num_closed) { + if (ctl->num_events!=(num_closed+events_not_opened)) { + PAPIERROR("Didn't close all events: " + "Closed %d Not Opened: %d Expected %d", + num_closed,events_not_opened,ctl->num_events); + return PAPI_EBUG; + } + } + + ctl->num_events=0; + + ctx->state &= ~PERF_EVENTS_OPENED; + + return PAPI_OK; +} + + +/********************************************************************/ +/********************************************************************/ +/* Functions that are exported via the component interface */ +/********************************************************************/ +/********************************************************************/ + +/********************* DOMAIN RELATED *******************************/ + + +/* set the domain. */ +/* perf_events allows per-event control of this, */ +/* papi allows it to be set at the event level or at the event set level. */ +/* this will set the event set level domain values */ +/* but they only get used if no event level domain mask (u= or k=) */ +/* was specified. */ +static int +_pe_set_domain( hwd_control_state_t *ctl, int domain) +{ + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain); + pe_ctl->domain = domain; + return PAPI_OK; +} + + +/********************* THREAD RELATED *******************************/ + + +/* Shutdown a thread */ +static int +_pe_shutdown_thread( hwd_context_t *ctx ) +{ + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + + pe_ctx->initialized=0; + + return PAPI_OK; +} + +/* Initialize a thread */ +static int +_pe_init_thread( hwd_context_t *hwd_ctx ) +{ + + pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx; + + /* clear the context structure and mark as initialized */ + memset( pe_ctx, 0, sizeof ( pe_context_t ) ); + pe_ctx->initialized=1; + pe_ctx->event_table=&perf_native_event_table; + pe_ctx->cidx=our_cidx; + + return PAPI_OK; +} + + + +/**************************** COUNTER RELATED *******************/ + + +/* reset the hardware counters */ +/* Note: PAPI_reset() does not necessarily call this */ +/* unless the events are actually running. */ +static int +_pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int i, ret; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + ( void ) ctx; /*unused */ + + /* We need to reset all of the events, not just the group leaders */ + for( i = 0; i < pe_ctl->num_events; i++ ) { + ret = ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_RESET, NULL ); + if ( ret == -1 ) { + PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) " + "returned error, Linux says: %s", + pe_ctl->events[i].event_fd, + strerror( errno ) ); + return PAPI_ESYS; + } + } + + return PAPI_OK; +} + + +/* write (set) the hardware counters */ +/* Currently we do not support this. */ +static int +_pe_write( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long *from ) +{ + ( void ) ctx; /*unused */ + ( void ) ctl; /*unused */ + ( void ) from; /*unused */ + /* + * Counters cannot be written. Do we need to virtualize the + * counters so that they can be written, or perhaps modify code so that + * they can be written? FIXME ? + */ + + return PAPI_ENOSUPP; +} + +/* + * perf_event provides a complicated read interface. + * the info returned by read() varies depending on whether + * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED, + * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set + * + * To simplify things we just always ask for everything. This might + * lead to overhead when reading more than we need, but it makes the + * read code a lot simpler than the original implementation we had here. + * + * For more info on the layout see include/uapi/linux/perf_event.h + * + */ + + +/* When we read with rdpmc, we must read each counter individually */ +/* Because of this we don't need separate multiplexing support */ +/* This is all handled by mmap_read_self() */ +static int +_pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", + ctx, ctl, events, flags); + + ( void ) flags; /*unused */ + ( void ) ctx; /*unused */ + int i; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + unsigned long long count, enabled, running, adjusted; + + /* we must read each counter individually */ + for ( i = 0; i < pe_ctl->num_events; i++ ) { + + count = mmap_read_self(pe_ctl->events[i].mmap_buf, + &enabled,&running); + + /* TODO: error checking? */ + + /* Handle multiplexing case */ + if (enabled!=running) { + adjusted = (enabled * 128LL) / running; + adjusted = adjusted * count; + adjusted = adjusted / 128LL; + count = adjusted; + } + + pe_ctl->counts[i] = count; + } + /* point PAPI to the values we read */ + *events = pe_ctl->counts; + + SUBDBG("EXIT: *events: %p\n", *events); + + return PAPI_OK; +} + + +static int +_pe_read_multiplexed( pe_control_t *pe_ctl ) +{ + int i,ret=-1; + long long papi_pe_buffer[READ_BUFFER_SIZE]; + long long tot_time_running, tot_time_enabled, scale; + + /* perf_event does not support FORMAT_GROUP on multiplex */ + /* so we have to handle separate events when multiplexing */ + + for ( i = 0; i < pe_ctl->num_events; i++ ) { + + ret = read( pe_ctl->events[i].event_fd, + papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", + strerror( errno )); + return PAPI_ESYS; + } + + /* We should read 3 64-bit values from the counter */ + if (ret<(signed)(3*sizeof(long long))) { + PAPIERROR("Error! short read"); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[i].event_fd, + (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret); + SUBDBG("read: %lld %lld %lld\n", + papi_pe_buffer[0], + papi_pe_buffer[1], + papi_pe_buffer[2]); + + tot_time_enabled = papi_pe_buffer[1]; + tot_time_running = papi_pe_buffer[2]; + + SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * " + "tot_time_enabled %lld) / " + "tot_time_running %lld\n", + i, 0,papi_pe_buffer[0], + tot_time_enabled,tot_time_running); + + if (tot_time_running == tot_time_enabled) { + /* No scaling needed */ + pe_ctl->counts[i] = papi_pe_buffer[0]; + } else if (tot_time_running && tot_time_enabled) { + /* Scale to give better results */ + /* avoid truncation. */ + /* Why use 100? Would 128 be faster? */ + scale = (tot_time_enabled * 100LL) / tot_time_running; + scale = scale * papi_pe_buffer[0]; + scale = scale / 100LL; + pe_ctl->counts[i] = scale; + } else { + /* This should not happen, but Phil reports it sometime does. */ + SUBDBG("perf_event kernel bug(?) count, enabled, " + "running: %lld, %lld, %lld\n", + papi_pe_buffer[0],tot_time_enabled, + tot_time_running); + + pe_ctl->counts[i] = papi_pe_buffer[0]; + } + } + return PAPI_OK; +} + +/* For cases where we can't group counters together */ +/* But must read them out individually */ +/* This includes when INHERIT is set, as well as various bugs */ + +static int +_pe_read_nogroup( pe_control_t *pe_ctl ) { + + int i,ret=-1; + long long papi_pe_buffer[READ_BUFFER_SIZE]; + + /* we must read each counter individually */ + for ( i = 0; i < pe_ctl->num_events; i++ ) { + ret = read( pe_ctl->events[i].event_fd, + papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", + strerror( errno )); + return PAPI_ESYS; + } + + /* we should read one 64-bit value from each counter */ + if (ret!=sizeof(long long)) { + PAPIERROR("Error! short read"); + PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d", + pe_ctl->events[i].event_fd, + (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[i].event_fd, (long)pe_ctl->tid, + pe_ctl->events[i].cpu, ret); + SUBDBG("read: %lld\n",papi_pe_buffer[0]); + + pe_ctl->counts[i] = papi_pe_buffer[0]; + } + + return PAPI_OK; + +} + +static int +_pe_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", + ctx, ctl, events, flags); + + ( void ) flags; /*unused */ + ( void ) ctx; /*unused */ + int i, j, ret = -1; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + long long papi_pe_buffer[READ_BUFFER_SIZE]; + + /* Handle fast case */ + if ((_perf_event_vector.cmp_info.fast_counter_read) && (!pe_ctl->inherit)) { + return _pe_rdpmc_read( ctx, ctl, events, flags); + } + + /* Handle case where we are multiplexing */ + if (pe_ctl->multiplexed) { + _pe_read_multiplexed(pe_ctl); + } + + /* Handle cases where we cannot use FORMAT GROUP */ + else if (bug_format_group() || pe_ctl->inherit) { + _pe_read_nogroup(pe_ctl); + } + + /* Handle common case where we are using FORMAT_GROUP */ + /* We assume only one group leader, in position 0 */ + + /* By reading the leader file descriptor, we get a series */ + /* of 64-bit values. The first is the total number of */ + /* events, followed by the counts for them. */ + + else { + if (pe_ctl->events[0].group_leader_fd!=-1) { + PAPIERROR("Was expecting group leader"); + } + + ret = read( pe_ctl->events[0].event_fd, + papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", + strerror( errno )); + return PAPI_ESYS; + } + + /* we read 1 64-bit value (number of events) then */ + /* num_events more 64-bit values that hold the counts */ + if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) { + PAPIERROR("Error! short read"); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[0].event_fd, + (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret); + + for(j=0;jnum_events) { + PAPIERROR("Error! Wrong number of events"); + return PAPI_ESYS; + } + + /* put the count values in their proper location */ + for(i=0;inum_events;i++) { + pe_ctl->counts[i] = papi_pe_buffer[1+i]; + } + } + + /* point PAPI to the values we read */ + *events = pe_ctl->counts; + + SUBDBG("EXIT: *events: %p\n", *events); + + return PAPI_OK; +} + +#if (OBSOLETE_WORKAROUNDS==1) +/* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */ +/* fields are always 0 unless the counter is disabled. So if we are on */ +/* one of these kernels, then we must disable events before reading. */ +/* Elsewhere though we disable multiplexing on kernels before 2.6.34 */ +/* so maybe this isn't even necessary. */ +static int +_pe_read_bug_sync( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + + ( void ) flags; /*unused */ + int i, ret = -1; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + int result; + + if ( pe_ctx->state & PERF_EVENTS_RUNNING ) { + for ( i = 0; i < pe_ctl->num_events; i++ ) { + /* disable only the group leaders */ + if ( pe_ctl->events[i].group_leader_fd == -1 ) { + ret = ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_DISABLE, NULL ); + if ( ret == -1 ) { + PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) " + "returned an error: ", strerror( errno )); + return PAPI_ESYS; + } + } + } + } + + result=_pe_read( ctx, ctl, events, flags ); + + /* If we disabled the counters due to the sync_read_bug(), */ + /* then we need to re-enable them now. */ + + if ( pe_ctx->state & PERF_EVENTS_RUNNING ) { + for ( i = 0; i < pe_ctl->num_events; i++ ) { + if ( pe_ctl->events[i].group_leader_fd == -1 ) { + /* this should refresh any overflow counters too */ + ret = ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_ENABLE, NULL ); + if ( ret == -1 ) { + /* Should never happen */ + PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ", + strerror( errno )); + return PAPI_ESYS; + } + } + } + } + + return result; +} + +#endif + +/* Start counting events */ +static int +_pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int ret; + int i; + int did_something = 0; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* Reset the counters first. Is this necessary? */ + ret = _pe_reset( pe_ctx, pe_ctl ); + if ( ret ) { + return ret; + } + + /* Enable all of the group leaders */ + /* All group leaders have a group_leader_fd of -1 */ + for( i = 0; i < pe_ctl->num_events; i++ ) { + if (pe_ctl->events[i].group_leader_fd == -1) { + SUBDBG("ioctl(enable): fd: %d\n", + pe_ctl->events[i].event_fd); + ret=ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_ENABLE, NULL) ; + + /* ioctls always return -1 on failure */ + if (ret == -1) { + PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed"); + return PAPI_ESYS; + } + + did_something++; + } + } + + if (!did_something) { + PAPIERROR("Did not enable any counters"); + return PAPI_EBUG; + } + + pe_ctx->state |= PERF_EVENTS_RUNNING; + + return PAPI_OK; + +} + +/* Stop all of the counters */ +static int +_pe_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl); + + int ret; + int i; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* Just disable the group leaders */ + for ( i = 0; i < pe_ctl->num_events; i++ ) { + if ( pe_ctl->events[i].group_leader_fd == -1 ) { + ret=ioctl( pe_ctl->events[i].event_fd, + PERF_EVENT_IOC_DISABLE, NULL); + if ( ret == -1 ) { + PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) " + "returned error, Linux says: %s", + pe_ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_EBUG; + } + } + } + + pe_ctx->state &= ~PERF_EVENTS_RUNNING; + + SUBDBG( "EXIT:\n"); + + return PAPI_OK; +} + + + + + +/*********************** CONTROL STATE RELATED *******************/ + + +/* This function clears the current contents of the control structure and + updates it with whatever resources are allocated for all the native events + in the native info structure array. */ + +static int +_pe_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, hwd_context_t *ctx ) +{ + SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n", + ctl, native, count, ctx); + int i; + int j; + int ret; + int skipped_events=0; + struct native_event_t *ntv_evt; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* close all of the existing fds and start over again */ + /* In theory we could have finer-grained control and know if */ + /* things were changed, but it's easier to tear things down and rebuild. */ + close_pe_events( pe_ctx, pe_ctl ); + + /* Calling with count==0 should be OK, it's how things are deallocated */ + /* when an eventset is destroyed. */ + if ( count == 0 ) { + SUBDBG( "EXIT: Called with count == 0\n" ); + return PAPI_OK; + } + + /* set up all the events */ + for( i = 0; i < count; i++ ) { + if ( native ) { + /* get the native event pointer used for this papi event */ + int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code)); + if (ntv_idx < -1) { + SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code); + continue; + } + /* if native index is -1, then we have an event without a mask and need to find the right native index to use */ + if (ntv_idx == -1) { + /* find the native event index we want by matching for the right papi event code */ + for (j=0 ; jevent_table->num_native_events ; j++) { + if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) { + ntv_idx = j; + } + } + } + + /* if native index is still negative, we did not find event we wanted so just return error */ + if (ntv_idx < 0) { + SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code); + continue; + } + + /* this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use */ + ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx])); + SUBDBG("ntv_evt: %p\n", ntv_evt); + + SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events); + + /* Move this events hardware config values and other attributes to the perf_events attribute structure */ + memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t)); + + /* may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain) */ + /* only done if the event mask which controls each counting domain was not provided */ + + /* get pointer to allocated name, will be NULL when adding preset events to event set */ + char *aName = ntv_evt->allocated_name; + if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) { + SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER)); + pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER); + } + if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) { + SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL)); + pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL); + } + + // libpfm4 supports mh (monitor host) and mg (monitor guest) event masks + // perf_events supports exclude_hv and exclude_idle attributes + // PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes + // not sure how these perf_event attributes, and PAPI domain attributes relate to each other + // if that can be figured out then there should probably be code here to set some perf_events attributes based on what was set in a PAPI_set_domain call + // the code sample below is one possibility +// if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) { +// SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR)); +// pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR); +// } + + + // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided) + pe_ctl->events[i].cpu = ntv_evt->cpu; + // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called) + if (pe_ctl->events[i].cpu == -1) { + pe_ctl->events[i].cpu = pe_ctl->cpu; + } + } else { + /* This case happens when called from _pe_set_overflow and _pe_ctl */ + /* Those callers put things directly into the pe_ctl structure so it is already set for the open call */ + } + + /* Copy the inherit flag into the attribute block that will be passed to the kernel */ + pe_ctl->events[i].attr.inherit = pe_ctl->inherit; + + /* Set the position in the native structure */ + /* We just set up events linearly */ + if ( native ) { + native[i].ni_position = i; + SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n", + i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners); + } + } + + if (count <= skipped_events) { + SUBDBG("EXIT: No events to count, they all contained invalid umasks\n"); + return PAPI_ENOEVNT; + } + + pe_ctl->num_events = count - skipped_events; + + /* actually open the events */ + ret = open_pe_events( pe_ctx, pe_ctl ); + if ( ret != PAPI_OK ) { + SUBDBG("EXIT: open_pe_events returned: %d\n", ret); + /* Restore values ? */ + return ret; + } + + SUBDBG( "EXIT: PAPI_OK\n" ); + return PAPI_OK; +} + +/* Set various options on a control state */ +static int +_pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + int ret; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = NULL; + + switch ( code ) { + case PAPI_MULTIPLEX: + pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state ); + ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain, + pe_ctl->granularity, + 1, pe_ctl->inherit ); + if (ret != PAPI_OK) { + return ret; + } + + /* looks like we are allowed, so set multiplexed attribute */ + pe_ctl->multiplexed = 1; + ret = _pe_update_control_state( pe_ctl, NULL, + pe_ctl->num_events, pe_ctx ); + if (ret != PAPI_OK) { + pe_ctl->multiplexed = 0; + } + return ret; + + case PAPI_ATTACH: + pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state ); + ret = check_permissions( option->attach.tid, pe_ctl->cpu, + pe_ctl->domain, pe_ctl->granularity, + pe_ctl->multiplexed, + pe_ctl->inherit ); + if (ret != PAPI_OK) { + return ret; + } + + pe_ctl->tid = option->attach.tid; + + /* If events have been already been added, something may */ + /* have been done to the kernel, so update */ + ret =_pe_update_control_state( pe_ctl, NULL, + pe_ctl->num_events, pe_ctx); + + return ret; + + case PAPI_DETACH: + pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state ); + + pe_ctl->tid = 0; + return PAPI_OK; + + case PAPI_CPU_ATTACH: + pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state ); + ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num, + pe_ctl->domain, pe_ctl->granularity, + pe_ctl->multiplexed, + pe_ctl->inherit ); + if (ret != PAPI_OK) { + return ret; + } + /* looks like we are allowed so set cpu number */ + + /* this tells the kernel not to count for a thread */ + /* should we warn if we try to set both? perf_event */ + /* will reject it. */ + pe_ctl->tid = -1; + + pe_ctl->cpu = option->cpu.cpu_num; + + return PAPI_OK; + + case PAPI_DOMAIN: + pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state ); + ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, + option->domain.domain, + pe_ctl->granularity, + pe_ctl->multiplexed, + pe_ctl->inherit ); + if (ret != PAPI_OK) { + return ret; + } + /* looks like we are allowed, so set event set level counting domains */ + pe_ctl->domain = option->domain.domain; + return PAPI_OK; + + case PAPI_GRANUL: + pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state ); + + /* FIXME: we really don't support this yet */ + + switch ( option->granularity.granularity ) { + case PAPI_GRN_PROCG: + case PAPI_GRN_SYS_CPU: + case PAPI_GRN_PROC: + return PAPI_ECMP; + + /* Currently we only support thread and CPU granularity */ + case PAPI_GRN_SYS: + pe_ctl->granularity=PAPI_GRN_SYS; + pe_ctl->cpu=_papi_getcpu(); + break; + + case PAPI_GRN_THR: + pe_ctl->granularity=PAPI_GRN_THR; + break; + + + default: + return PAPI_EINVAL; + } + return PAPI_OK; + + case PAPI_INHERIT: + pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state ); + ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain, + pe_ctl->granularity, pe_ctl->multiplexed, + option->inherit.inherit ); + if (ret != PAPI_OK) { + return ret; + } + /* looks like we are allowed, so set the requested inheritance */ + if (option->inherit.inherit) { + /* children will inherit counters */ + pe_ctl->inherit = 1; + } else { + /* children won't inherit counters */ + pe_ctl->inherit = 0; + } + return PAPI_OK; + + case PAPI_DATA_ADDRESS: + return PAPI_ENOSUPP; +#if 0 + pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state); + ret = set_default_domain( pe_ctl, option->address_range.domain ); + if ( ret != PAPI_OK ) { + return ret; + } + set_drange( pe_ctx, pe_ctl, option ); + return PAPI_OK; +#endif + case PAPI_INSTR_ADDRESS: + return PAPI_ENOSUPP; +#if 0 + pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state); + ret = set_default_domain( pe_ctl, option->address_range.domain ); + if ( ret != PAPI_OK ) { + return ret; + } + set_irange( pe_ctx, pe_ctl, option ); + return PAPI_OK; +#endif + + case PAPI_DEF_ITIMER: + /* What should we be checking for here? */ + /* This seems like it should be OS-specific not component */ + /* specific. */ + + return PAPI_OK; + + case PAPI_DEF_MPX_NS: + /* Defining a given ns per set is not current supported */ + return PAPI_ENOSUPP; + + case PAPI_DEF_ITIMER_NS: + /* We don't support this... */ + return PAPI_OK; + + default: + return PAPI_ENOSUPP; + } +} + + +/* Initialize a new control state */ +static int +_pe_init_control_state( hwd_control_state_t *ctl ) +{ + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* clear the contents */ + memset( pe_ctl, 0, sizeof ( pe_control_t ) ); + + /* Set the domain */ + _pe_set_domain( ctl, _perf_event_vector.cmp_info.default_domain ); + + /* default granularity */ + pe_ctl->granularity= _perf_event_vector.cmp_info.default_granularity; + + /* overflow signal */ + pe_ctl->overflow_signal=_perf_event_vector.cmp_info.hardware_intr_sig; + + pe_ctl->cidx=our_cidx; + + /* Set cpu number in the control block to show events */ + /* are not tied to specific cpu */ + pe_ctl->cpu = -1; + + return PAPI_OK; +} + + +/****************** EVENT NAME HANDLING CODE *****************/ + +static int +_pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier ) +{ + return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier, our_cidx, + &perf_native_event_table); +} + +static int +_pe_ntv_name_to_code( const char *name, unsigned int *event_code) +{ + return _pe_libpfm4_ntv_name_to_code(name,event_code, our_cidx, + &perf_native_event_table); +} + +static int +_pe_ntv_code_to_name(unsigned int EventCode, + char *ntv_name, int len) +{ + return _pe_libpfm4_ntv_code_to_name(EventCode, + ntv_name, len, + &perf_native_event_table); +} + +static int +_pe_ntv_code_to_descr( unsigned int EventCode, + char *ntv_descr, int len) +{ + + return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len, + &perf_native_event_table); +} + +static int +_pe_ntv_code_to_info(unsigned int EventCode, + PAPI_event_info_t *info) { + + return _pe_libpfm4_ntv_code_to_info(EventCode, info, + &perf_native_event_table); +} + + +/*********************** SAMPLING / PROFILING *******************/ + + +/* Find a native event specified by a profile index */ +static int +find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags, + unsigned int *native_index, int *profile_index ) +{ + int pos, esi_index, count; + + for ( count = 0; count < ESI->profile.event_counter; count++ ) { + esi_index = ESI->profile.EventIndex[count]; + pos = ESI->EventInfoArray[esi_index].pos[0]; + + if ( pos == evt_idx ) { + *profile_index = count; + *native_index = ESI->NativeInfoArray[pos].ni_event & + PAPI_NATIVE_AND_MASK; + *flags = ESI->profile.flags; + SUBDBG( "Native event %d is at profile index %d, flags %d\n", + *native_index, *profile_index, *flags ); + return PAPI_OK; + } + } + PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", + count, ESI->profile.event_counter ); + return PAPI_EBUG; +} + + +/* What exactly does this do? */ +static int +process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx ) +{ + int ret, flags, profile_index; + unsigned native_index; + pe_control_t *ctl; + + ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx, + &flags, &native_index, &profile_index ); + if ( ret != PAPI_OK ) { + return ret; + } + + ctl= (*thr)->running_eventset[cidx]->ctl_state; + + mmap_read( cidx, thr, &(ctl->events[evt_idx]), profile_index ); + + return PAPI_OK; +} + +/* + * This function is used when hardware overflows are working or when + * software overflows are forced + */ + +static void +_pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc) +{ + ( void ) n; /*unused */ + _papi_hwi_context_t hw_context; + int found_evt_idx = -1, fd = info->si_fd; + caddr_t address; + ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); + int i; + pe_control_t *ctl; + int cidx = _perf_event_vector.cmp_info.CmpIdx; + + if ( thread == NULL ) { + PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd ); + return; + } + + if ( thread->running_eventset[cidx] == NULL ) { + PAPIERROR( "thread->running_eventset == NULL in " + "_papi_pe_dispatch_timer for fd %d!",fd ); + return; + } + + if ( thread->running_eventset[cidx]->overflow.flags == 0 ) { + PAPIERROR( "thread->running_eventset->overflow.flags == 0 in " + "_papi_pe_dispatch_timer for fd %d!", fd ); + return; + } + + hw_context.si = info; + hw_context.ucontext = ( hwd_ucontext_t * ) uc; + + if ( thread->running_eventset[cidx]->overflow.flags & + PAPI_OVERFLOW_FORCE_SW ) { + address = GET_OVERFLOW_ADDRESS( hw_context ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context, + address, NULL, 0, + 0, &thread, cidx ); + return; + } + + if ( thread->running_eventset[cidx]->overflow.flags != + PAPI_OVERFLOW_HARDWARE ) { + PAPIERROR( "thread->running_eventset->overflow.flags " + "is set to something other than " + "PAPI_OVERFLOW_HARDWARE or " + "PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)", + fd, + thread->running_eventset[cidx]->overflow.flags); + } + + /* convoluted way to get ctl */ + ctl= thread->running_eventset[cidx]->ctl_state; + + /* See if the fd is one that's part of the this thread's context */ + for( i=0; i < ctl->num_events; i++ ) { + if ( fd == ctl->events[i].event_fd ) { + found_evt_idx = i; + break; + } + } + + if ( found_evt_idx == -1 ) { + PAPIERROR( "Unable to find fd %d among the open event fds " + "_papi_hwi_dispatch_timer!", fd ); + return; + } + + if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) { + PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed"); + } + + if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && + !( thread->running_eventset[cidx]->profile.flags & + PAPI_PROFIL_FORCE_SW ) ) { + process_smpl_buf( found_evt_idx, &thread, cidx ); + } + else { + uint64_t ip; + unsigned int head; + pe_event_info_t *pe = &(ctl->events[found_evt_idx]); + unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( ); + + /* + * Read up the most recent IP from the sample in the mmap buffer. To + * do this, we make the assumption that all of the records in the + * mmap buffer are the same size, and that they all contain the IP as + * their only record element. This means that we can use the + * data_head element from the user page and move backward one record + * from that point and read the data. Since we don't actually need + * to access the header of the record, we can just subtract 8 (size + * of the IP) from data_head and read up that word from the mmap + * buffer. After we subtract 8, we account for mmap buffer wrapping + * by AND'ing this offset with the buffer mask. + */ + head = mmap_read_head( pe ); + + if ( head == 0 ) { + PAPIERROR( "Attempting to access memory " + "which may be inaccessable" ); + return; + } + ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) ); + /* + * Update the tail to the current head pointer. + * + * Note: that if we were to read the record at the tail pointer, + * rather than the one at the head (as you might otherwise think + * would be natural), we could run into problems. Signals don't + * stack well on Linux, particularly if not using RT signals, and if + * they come in rapidly enough, we can lose some. Overtime, the head + * could catch up to the tail and monitoring would be stopped, and + * since no more signals are coming in, this problem will never be + * resolved, resulting in a complete loss of overflow notification + * from that point on. So the solution we use here will result in + * only the most recent IP value being read every time there are two + * or more samples in the buffer (for that one overflow signal). But + * the handler will always bring up the tail, so the head should + * never run into the tail. + */ + mmap_write_tail( pe, head ); + + /* + * The fourth parameter is supposed to be a vector of bits indicating + * the overflowed hardware counters, but it's not really clear that + * it's useful, because the actual hardware counters used are not + * exposed to the PAPI user. For now, I'm just going to set the bit + * that indicates which event register in the array overflowed. The + * result is that the overflow vector will not be identical to the + * perfmon implementation, and part of that is due to the fact that + * which hardware register is actually being used is opaque at the + * user level (the kernel event dispatcher hides that info). + */ + + _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context, + ( caddr_t ) ( unsigned long ) ip, + NULL, ( 1 << found_evt_idx ), 0, + &thread, cidx ); + + } + + /* Restart the counters */ + if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) { + PAPIERROR( "overflow refresh failed", 0 ); + } +} + +/* Stop profiling */ +/* FIXME: does this actually stop anything? */ +/* It looks like it is only actually called from PAPI_stop() */ +/* So the event will be destroyed soon after anyway. */ +static int +_pe_stop_profiling( ThreadInfo_t *thread, EventSetInfo_t *ESI ) +{ + int i, ret = PAPI_OK; + pe_control_t *ctl; + int cidx; + + ctl=ESI->ctl_state; + + cidx=ctl->cidx; + + /* Loop through all of the events and process those which have mmap */ + /* buffers attached. */ + for ( i = 0; i < ctl->num_events; i++ ) { + /* Use the mmap_buf field as an indicator */ + /* of this fd being used for profiling. */ + if ( ctl->events[i].profiling ) { + /* Process any remaining samples in the sample buffer */ + ret = process_smpl_buf( i, &thread, cidx ); + if ( ret ) { + PAPIERROR( "process_smpl_buf returned error %d", ret ); + return ret; + } + ctl->events[i].profiling=0; + } + } + + return ret; +} + +/* Set up an event to cause overflow */ +/* If threshold==0 then disable overflow for that event */ +static int +_pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ) +{ + SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n", + ESI, EventIndex, threshold); + + pe_context_t *ctx; + pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state ); + int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK; + int cidx; + + cidx = ctl->cidx; + ctx = ( pe_context_t *) ( ESI->master->context[cidx] ); + + /* pos[0] is the first native event */ + /* derived events might be made up of multiple native events */ + evt_idx = ESI->EventInfoArray[EventIndex].pos[0]; + + SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n", + evt_idx,EventIndex,ESI->EventSetIndex); + + if (evt_idx<0) { + SUBDBG("EXIT: evt_idx: %d\n", evt_idx); + return PAPI_EINVAL; + } + + /* It's an error to disable overflow if it wasn't set in the */ + /* first place. */ + if (( threshold == 0 ) && + ( ctl->events[evt_idx].attr.sample_period == 0 ) ) { + SUBDBG("EXIT: PAPI_EINVAL, Tried to clear " + "sample threshold when it was not set\n"); + return PAPI_EINVAL; + } + + /* Set the sample period to threshold */ + ctl->events[evt_idx].attr.sample_period = threshold; + + if (threshold == 0) { + ctl->events[evt_idx].sampling = 0; + } + else { + ctl->events[evt_idx].sampling = 1; + + /* Setting wakeup_events to one means issue a wakeup on every */ + /* counter overflow (not mmap page overflow). */ + ctl->events[evt_idx].attr.wakeup_events = 1; + /* We need the IP to pass to the overflow handler */ + ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP; + } + + + /* Check to see if any events in the EventSet are setup to sample */ + /* Do we actually handle multiple overflow events at once? --vmw */ + for ( i = 0; i < ctl->num_events; i++ ) { + if ( ctl->events[i].attr.sample_period ) { + found_non_zero_sample_period = 1; + break; + } + } + + if ( found_non_zero_sample_period ) { + /* turn on internal overflow flag for this event set */ + ctl->overflow = 1; + + /* Enable the signal handler */ + retval = _papi_hwi_start_signal( + ctl->overflow_signal, + 1, ctl->cidx ); + if (retval != PAPI_OK) { + SUBDBG("Call to _papi_hwi_start_signal " + "returned: %d\n", retval); + } + } else { + + /* turn off internal overflow flag for this event set */ + ctl->overflow = 0; + + /* Remove the signal handler, if there are no remaining */ + /* non-zero sample_periods set */ + retval = _papi_hwi_stop_signal(ctl->overflow_signal); + if ( retval != PAPI_OK ) { + SUBDBG("Call to _papi_hwi_stop_signal " + "returned: %d\n", retval); + return retval; + } + } + + retval = _pe_update_control_state( ctl, NULL, + ((pe_control_t *)(ESI->ctl_state) )->num_events, + ctx ); + + SUBDBG("EXIT: return: %d\n", retval); + + return retval; +} + +/* Enable/disable profiling */ +/* If threshold is zero, we disable */ +static int +_pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold ) +{ + int ret; + int evt_idx; + pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state ); + + /* Since you can't profile on a derived event, */ + /* the event is always the first and only event */ + /* in the native event list. */ + evt_idx = ESI->EventInfoArray[EventIndex].pos[0]; + + /* If threshold is zero we want to *disable* */ + /* profiling on the event */ + if ( threshold == 0 ) { +// SUBDBG( "MUNMAP(%p,%"PRIu64")\n", +// ctl->events[evt_idx].mmap_buf, +// ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages * +// getpagesize() ); + +// if ( ctl->events[evt_idx].mmap_buf ) { +// munmap( ctl->events[evt_idx].mmap_buf, +// ctl->events[evt_idx].nr_mmap_pages * +// getpagesize() ); +// } +// ctl->events[evt_idx].mmap_buf = NULL; +// ctl->events[evt_idx].nr_mmap_pages = 0; + + /* no longer sample on IP */ + ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP; + + /* Clear any residual overflow flags */ + /* ??? old warning says "This should be handled somewhere else" */ + ESI->state &= ~( PAPI_OVERFLOWING ); + ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE ); + + ctl->events[evt_idx].profiling=0; + + } else { + + /* Otherwise, we are *enabling* profiling */ + + /* Look up the native event code */ + + if ( ESI->profile.flags & (PAPI_PROFIL_DATA_EAR | + PAPI_PROFIL_INST_EAR)) { + /* Not supported yet... */ + return PAPI_ENOSUPP; + } + + if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) { + /* This requires an ability to randomly alter the */ + /* sample_period within a given range. */ + /* Linux currently does not have this ability. FIXME */ + return PAPI_ENOSUPP; + } + ctl->events[evt_idx].profiling=1; + } + + ret = _pe_set_overflow( ESI, EventIndex, threshold ); + if ( ret != PAPI_OK ) return ret; + + return PAPI_OK; +} + + +/************ INITIALIZATION / SHUTDOWN CODE *********************/ + + +/* Shutdown the perf_event component */ +static int +_pe_shutdown_component( void ) { + + /* deallocate our event table */ + _pe_libpfm4_shutdown(&_perf_event_vector, &perf_native_event_table); + + /* Shutdown libpfm4 */ + _papi_libpfm4_shutdown(&_perf_event_vector); + + return PAPI_OK; +} + + +/* Check the mmap page for rdpmc support */ +static int _pe_detect_rdpmc(void) { + + struct perf_event_attr pe; + int fd,rdpmc_exists=1; + void *addr; + struct perf_event_mmap_page *our_mmap; + int page_size=getpagesize(); + +#if defined(__i386__) || defined (__x86_64__) +#else + /* We only support rdpmc on x86 for now */ + return 0; +#endif + + /* There were various subtle bugs in rdpmc support before */ + /* the Linux 4.13 release. */ + if (_papi_os_info.os_version < LINUX_VERSION(4,13,0)) { + return 0; + } + + /* Create a fake instructions event so we can read a mmap page */ + memset(&pe,0,sizeof(struct perf_event_attr)); + + pe.type=PERF_TYPE_HARDWARE; + pe.size=sizeof(struct perf_event_attr); + pe.config=PERF_COUNT_HW_INSTRUCTIONS; + pe.exclude_kernel=1; + pe.disabled=1; + + perf_event_dump_attr(&pe,0,-1,-1,0); + fd=sys_perf_event_open(&pe,0,-1,-1,0); + + /* This hopefully won't happen? */ + /* Though there is a chance this is the first */ + /* attempt to open a perf_event */ + if (fd<0) { + SUBDBG("FAILED perf_event_open trying to detect rdpmc support"); + return PAPI_ESYS; + } + + /* create the mmap page */ + addr=mmap(NULL, page_size, PROT_READ, MAP_SHARED,fd,0); + if (addr == MAP_FAILED) { + SUBDBG("FAILED mmap trying to detect rdpmc support"); + close(fd); + return PAPI_ESYS; + } + + /* get the rdpmc info from the mmap page */ + our_mmap=(struct perf_event_mmap_page *)addr; + + /* If cap_usr_rdpmc bit is set to 1, we have support! */ + if (our_mmap->cap_usr_rdpmc!=0) { + rdpmc_exists=1; + } + else if ((!our_mmap->cap_bit0_is_deprecated) && (our_mmap->cap_bit0)) { + /* 3.4 to 3.11 had somewhat broken rdpmc support */ + /* This convoluted test is the "official" way to detect this */ + /* To make things easier we don't support these kernels */ + rdpmc_exists=0; + } + else { + rdpmc_exists=0; + } + + /* close the fake event */ + munmap(addr,page_size); + close(fd); + + return rdpmc_exists; + +} + + +static int +_pe_handle_paranoid(papi_vector_t *component) { + + FILE *fff; + int paranoid_level; + int retval; + + /* The is the official way to detect if perf_event support exists */ + /* The file is called perf_counter_paranoid on 2.6.31 */ + /* currently we are lazy and do not support 2.6.31 kernels */ + + fff=fopen("/proc/sys/kernel/perf_event_paranoid","r"); + if (fff==NULL) { + strncpy(component->cmp_info.disabled_reason, + "perf_event support not detected",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + /* 3 (vendor patch) means completely disabled */ + /* 2 means no kernel measurements allowed */ + /* 1 means normal counter access */ + /* 0 means you can access CPU-specific data */ + /* -1 means no restrictions */ + retval=fscanf(fff,"%d",¶noid_level); + if (retval!=1) fprintf(stderr,"Error reading paranoid level\n"); + fclose(fff); + + if (paranoid_level==3) { + strncpy(component->cmp_info.disabled_reason, + "perf_event support disabled by Linux with paranoid=3",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + if ((paranoid_level==2) && (getuid()!=0)) { + SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts"); + component->cmp_info.available_domains &=~PAPI_DOM_KERNEL; + } + + return PAPI_OK; + +} + +#if (OBSOLETE_WORKAROUNDS==1) +/* Version based workarounds */ +/* perf_event has many bugs */ +/* PAPI has to work around a number of them, but for the most part */ +/* all of those were fixed by Linux 2.6.34 (May 2010) */ +/* Unfortunately it's not easy to auto-detect for these so we were */ +/* going by uname() version number */ +/* To complicate things, some vendors like Redhat backport fixes */ +/* So even though their kernel reports as 2.6.32 it has the fixes */ +/* As of PAPI 5.6 we're going to default to disabling the workarounds */ +/* I'm going to leave them here, ifdefed out, for the time being */ +static int +_pe_version_workarounds(papi_vector_t *component) { + + /* Kernel multiplexing is broken prior to kernel 2.6.34 */ + /* The fix was probably git commit: */ + /* 45e16a6834b6af098702e5ea6c9a40de42ff77d8 */ + if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) { + component->cmp_info.kernel_multiplex = 0; + component->cmp_info.num_mpx_cntrs = PAPI_MAX_SW_MPX_EVENTS; + } + + /* Check that processor is supported */ + if (processor_supported(_papi_hwi_system_info.hw_info.vendor, + _papi_hwi_system_info.hw_info.cpuid_family)!=PAPI_OK) { + fprintf(stderr,"warning, your processor is unsupported\n"); + /* should not return error, as software events should still work */ + } + + /* Update the default function pointers */ + /* Based on features/bugs */ + if (bug_sync_read()) { + component->read = _pe_read_bug_sync; + } + + return PAPI_OK; + +} + +#endif + + + + +/* Initialize the perf_event component */ +static int +_pe_init_component( int cidx ) +{ + + int retval; + + our_cidx=cidx; + + /* Update component behavior based on paranoid setting */ + retval=_pe_handle_paranoid(_papi_hwd[cidx]); + if (retval!=PAPI_OK) return retval; + +#if (OBSOLETE_WORKAROUNDS==1) + /* Handle any kernel version related workarounds */ + _pe_version_workarounds(_papi_hwd[cidx]); +#endif + + /* Setup mmtimers, if appropriate */ + retval=mmtimer_setup(); + if (retval) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error initializing mmtimer",PAPI_MAX_STR_LEN); + return retval; + } + + /* Set the overflow signal */ + _papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2; + + /* Run Vendor-specific fixups */ + pe_vendor_fixups(_papi_hwd[cidx]); + + /* Detect if we can use rdpmc (or equivalent) */ + retval=_pe_detect_rdpmc(); + _papi_hwd[cidx]->cmp_info.fast_counter_read = retval; + if (retval < 0 ) { + /* Don't actually fail here, as could be a surivable bug? */ + /* If perf_event_open/mmap truly are failing we will */ + /* likely catch it pretty quickly elsewhere. */ + _papi_hwd[cidx]->cmp_info.fast_counter_read = 0; + } + +#if (USE_PERFEVENT_RDPMC==1) + +#else + /* Force fast_counter_read off if --enable-perfevent-rdpmc=no */ + _papi_hwd[cidx]->cmp_info.fast_counter_read = 0; +#endif + + /* Run the libpfm4-specific setup */ + retval = _papi_libpfm4_init(_papi_hwd[cidx]); + if (retval) { + + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error initializing libpfm4",PAPI_MAX_STR_LEN); + return retval; + + } + + /* Now that libpfm4 is initialized */ + /* Try to setup the perf_event component events */ + + retval = _pe_libpfm4_init(_papi_hwd[cidx], cidx, + &perf_native_event_table, + PMU_TYPE_CORE | PMU_TYPE_OS); + if (retval) { + switch(retval) { + case PAPI_ENOMEM: + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error libpfm4 memory allocation", + PAPI_MAX_STR_LEN); + break; + case PAPI_ENOSUPP: + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error libpfm4 no PMUs found", + PAPI_MAX_STR_LEN); + break; + case PAPI_ENOCMP: + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error libpfm4 no default PMU found", + PAPI_MAX_STR_LEN); + break; + case PAPI_ECOUNT: + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error libpfm4 too many default PMUs found", + PAPI_MAX_STR_LEN); + break; + case PAPI_ENOEVNT: + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error loading preset events", + PAPI_MAX_STR_LEN); + break; + default: + printf("PAPI error %d\n",retval); + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Unknown libpfm4 related error", + PAPI_MAX_STR_LEN); + + } + return retval; + } + + /* Detect NMI watchdog which can steal counters */ + /* FIXME: on Intel we should also halve the count if SMT enabled */ + if (_linux_detect_nmi_watchdog()) { + if (_papi_hwd[cidx]->cmp_info.num_cntrs>0) { + _papi_hwd[cidx]->cmp_info.num_cntrs--; + } + SUBDBG("The Linux nmi_watchdog is using one of the performance " + "counters, reducing the total number available.\n"); + } + + /* check for exclude_guest issue */ + check_exclude_guest(); + + return PAPI_OK; + +} + + + +/* Our component vector */ + +papi_vector_t _perf_event_vector = { + .cmp_info = { + /* component information (unspecified values initialized to 0) */ + .name = "perf_event", + .short_name = "perf", + .version = "5.0", + .description = "Linux perf_event CPU counters", + + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS, + + .hardware_intr = 1, + .kernel_profile = 1, + + /* component specific cmp_info initializations */ + .fast_virtual_timer = 0, + .attach = 1, + .attach_must_ptrace = 1, + .cpu = 1, + .inherit = 1, + .cntr_umasks = 1, + + .kernel_multiplex = 1, + .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS, + + + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( pe_context_t ), + .control_state = sizeof ( pe_control_t ), + .reg_value = sizeof ( int ), + .reg_alloc = sizeof ( int ), + }, + + /* function pointers in this component */ + .init_component = _pe_init_component, + .shutdown_component = _pe_shutdown_component, + .init_thread = _pe_init_thread, + .init_control_state = _pe_init_control_state, + .dispatch_timer = _pe_dispatch_timer, + + /* function pointers from the shared perf_event lib */ + .start = _pe_start, + .stop = _pe_stop, + .read = _pe_read, + .shutdown_thread = _pe_shutdown_thread, + .ctl = _pe_ctl, + .update_control_state = _pe_update_control_state, + .set_domain = _pe_set_domain, + .reset = _pe_reset, + .set_overflow = _pe_set_overflow, + .set_profile = _pe_set_profile, + .stop_profiling = _pe_stop_profiling, + .write = _pe_write, + + + /* from counter name mapper */ + .ntv_enum_events = _pe_ntv_enum_events, + .ntv_name_to_code = _pe_ntv_name_to_code, + .ntv_code_to_name = _pe_ntv_code_to_name, + .ntv_code_to_descr = _pe_ntv_code_to_descr, + .ntv_code_to_info = _pe_ntv_code_to_info, +}; diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h new file mode 100644 index 0000000..f4ad0c5 --- /dev/null +++ b/src/components/perf_event/perf_event_lib.h @@ -0,0 +1,48 @@ +/* Various definitions */ + +/* This is arbitrary. Typically you can add up to ~1000 before */ +/* you run out of fds */ +#define PERF_EVENT_MAX_MPX_COUNTERS 384 + +/* We really don't need fancy definitions for these */ + +typedef struct +{ + int group_leader_fd; /* fd of group leader */ + int event_fd; /* fd of event */ + int event_opened; /* event successfully opened */ + int profiling; /* event is profiling */ + int sampling; /* event is a sampling event */ + uint32_t nr_mmap_pages; /* number pages in the mmap buffer */ + void *mmap_buf; /* used for control/profiling */ + uint64_t tail; /* current read location in mmap buffer */ + uint64_t mask; /* mask used for wrapping the pages */ + int cpu; /* cpu associated with this event */ + struct perf_event_attr attr; /* perf_event config structure */ +} pe_event_info_t; + + +typedef struct { + int num_events; /* number of events in control state */ + unsigned int domain; /* control-state wide domain */ + unsigned int granularity; /* granularity */ + unsigned int multiplexed; /* multiplexing enable */ + unsigned int overflow; /* overflow enable */ + unsigned int inherit; /* inherit enable */ + unsigned int overflow_signal; /* overflow signal */ + int cidx; /* current component */ + int cpu; /* which cpu to measure */ + pid_t tid; /* thread we are monitoring */ + pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]; + long long counts[PERF_EVENT_MAX_MPX_COUNTERS]; +} pe_control_t; + + +typedef struct { + int initialized; /* are we initialized? */ + int state; /* are we opened and/or running? */ + int cidx; /* our component id */ + struct native_event_table_t *event_table; /* our event table */ +} pe_context_t; + + diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h new file mode 100644 index 0000000..20dfbac --- /dev/null +++ b/src/components/perf_event/perf_helpers.h @@ -0,0 +1,286 @@ +/*****************************************************************/ +/********* Begin perf_event low-level code ***********************/ +/*****************************************************************/ + +/* In case headers aren't new enough to have __NR_perf_event_open */ +#ifndef __NR_perf_event_open + +#ifdef __powerpc__ +#define __NR_perf_event_open 319 +#elif defined(__x86_64__) +#define __NR_perf_event_open 298 +#elif defined(__i386__) +#define __NR_perf_event_open 336 +#elif defined(__arm__) +#define __NR_perf_event_open 364 +#endif + +#endif + +static long +sys_perf_event_open( struct perf_event_attr *hw_event, + pid_t pid, int cpu, int group_fd, unsigned long flags ) +{ + int ret; + + ret = syscall( __NR_perf_event_open, + hw_event, pid, cpu, group_fd, flags ); + + return ret; +} + +#if defined(__x86_64__) || defined(__i386__) + + +static inline unsigned long long rdtsc(void) { + + unsigned a,d; + + __asm__ volatile("rdtsc" : "=a" (a), "=d" (d)); + + return ((unsigned long long)a) | (((unsigned long long)d) << 32); +} + +static inline unsigned long long rdpmc(unsigned int counter) { + + unsigned int low, high; + + __asm__ volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); + + return (unsigned long long)low | ((unsigned long long)high) <<32; +} + +#define barrier() __asm__ volatile("" ::: "memory") + +/* based on the code in include/uapi/linux/perf_event.h */ +static inline unsigned long long mmap_read_self(void *addr, + unsigned long long *en, + unsigned long long *ru) { + + struct perf_event_mmap_page *pc = addr; + + uint32_t seq, time_mult, time_shift, index, width; + int64_t count; + uint64_t enabled, running; + uint64_t cyc, time_offset; + int64_t pmc = 0; + uint64_t quot, rem; + uint64_t delta = 0; + + + do { + /* The kernel increments pc->lock any time */ + /* perf_event_update_userpage() is called */ + /* So by checking now, and the end, we */ + /* can see if an update happened while we */ + /* were trying to read things, and re-try */ + /* if something changed */ + /* The barrier ensures we get the most up to date */ + /* version of the pc->lock variable */ + + seq=pc->lock; + barrier(); + + /* For multiplexing */ + /* time_enabled is time the event was enabled */ + enabled = pc->time_enabled; + /* time_running is time the event was actually running */ + running = pc->time_running; + + /* if cap_user_time is set, we can use rdtsc */ + /* to calculate more exact enabled/running time */ + /* for more accurate multiplex calculations */ + if ( (pc->cap_user_time) && (enabled != running)) { + cyc = rdtsc(); + time_offset = pc->time_offset; + time_mult = pc->time_mult; + time_shift = pc->time_shift; + + quot=(cyc>>time_shift); + rem = cyc & (((uint64_t)1 << time_shift) - 1); + delta = time_offset + (quot * time_mult) + + ((rem * time_mult) >> time_shift); + } + enabled+=delta; + + /* actually do the measurement */ + + /* Index of register to read */ + /* 0 means stopped/not-active */ + /* Need to subtract 1 to get actual index to rdpmc() */ + index = pc->index; + + /* count is the value of the counter the last time */ + /* the kernel read it */ + /* If we don't sign extend it, we get large negative */ + /* numbers which break if an IOC_RESET is done */ + width = pc->pmc_width; + count = pc->offset; + count<<=(64-width); + count>>=(64-width); + + /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */ + /* while actual perf_event.h has cap_user_rdpmc */ + + /* Only read if rdpmc enabled and event index valid */ + /* Otherwise return the older (out of date?) count value */ + if (pc->cap_usr_rdpmc && index) { + + /* Read counter value */ + pmc = rdpmc(index-1); + + /* sign extend result */ + pmc<<=(64-width); + pmc>>=(64-width); + + /* add current count into the existing kernel count */ + count+=pmc; + + /* Only adjust if index is valid */ + running+=delta; + } + + barrier(); + + } while (pc->lock != seq); + + if (en) *en=enabled; + if (ru) *ru=running; + + return count; +} + +#else +static inline unsigned long long mmap_read_self(void *addr, + unsigned long long *en, + unsigned long long *ru) { + + (void)addr; + + *en=0; + *ru=0; + + return (unsigned long long)(-1); +} + +#endif + +/* These functions are based on builtin-record.c in the */ +/* kernel's tools/perf directory. */ +/* This code is from a really ancient version of perf */ +/* And should be updated/commented properly */ + + +static uint64_t +mmap_read_head( pe_event_info_t *pe ) +{ + struct perf_event_mmap_page *pc = pe->mmap_buf; + int head; + + if ( pc == NULL ) { + PAPIERROR( "perf_event_mmap_page is NULL" ); + return 0; + } + + head = pc->data_head; + rmb(); + + return head; +} + +static void +mmap_write_tail( pe_event_info_t *pe, uint64_t tail ) +{ + struct perf_event_mmap_page *pc = pe->mmap_buf; + + /* ensure all reads are done before we write the tail out. */ + pc->data_tail = tail; +} + +/* Does the kernel define these somewhere? */ +struct ip_event { + struct perf_event_header header; + uint64_t ip; +}; +struct lost_event { + struct perf_event_header header; + uint64_t id; + uint64_t lost; +}; +typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct lost_event lost; +} perf_sample_event_t; + +/* Should re-write with comments if we ever figure out what's */ +/* going on here. */ +static void +mmap_read( int cidx, ThreadInfo_t **thr, pe_event_info_t *pe, + int profile_index ) +{ + uint64_t head = mmap_read_head( pe ); + uint64_t old = pe->tail; + unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize(); + int diff; + + diff = head - old; + if ( diff < 0 ) { + SUBDBG( "WARNING: failed to keep up with mmap data. head = %" PRIu64 + ", tail = %" PRIu64 ". Discarding samples.\n", head, old ); + /* head points to a known good entry, start there. */ + old = head; + } + + for( ; old != head; ) { + perf_sample_event_t *event = ( perf_sample_event_t * )& data[old & pe->mask]; + perf_sample_event_t event_copy; + size_t size = event->header.size; + + /* Event straddles the mmap boundary -- header should always */ + /* be inside due to u64 alignment of output. */ + if ( ( old & pe->mask ) + size != ( ( old + size ) & pe->mask ) ) { + uint64_t offset = old; + uint64_t len = min( sizeof ( *event ), size ), cpy; + void *dst = &event_copy; + + do { + cpy = min( pe->mask + 1 - ( offset & pe->mask ), len ); + memcpy( dst, &data[offset & pe->mask], cpy ); + offset += cpy; + dst = ((unsigned char*)dst) + cpy; + len -= cpy; + } while ( len ); + + event = &event_copy; + } + old += size; + + SUBDBG( "event->type = %08x\n", event->header.type ); + SUBDBG( "event->size = %d\n", event->header.size ); + + switch ( event->header.type ) { + case PERF_RECORD_SAMPLE: + _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx], + ( caddr_t ) ( unsigned long ) event->ip.ip, + 0, profile_index ); + break; + + case PERF_RECORD_LOST: + SUBDBG( "Warning: because of a mmap buffer overrun, %" PRId64 + " events were lost.\n" + "Loss was recorded when counter id %#"PRIx64 + " overflowed.\n", event->lost.lost, event->lost.id ); + break; + default: + SUBDBG( "Error: unexpected header type - %d\n", + event->header.type ); + break; + } + } + + pe->tail = old; + mmap_write_tail( pe, old ); +} + + diff --git a/src/components/perf_event/tests/Makefile b/src/components/perf_event/tests/Makefile new file mode 100644 index 0000000..4db8705 --- /dev/null +++ b/src/components/perf_event/tests/Makefile @@ -0,0 +1,52 @@ +NAME=perf_event +include ../../Makefile_comp_tests.target + +TESTS = broken_events nmi_watchdog perf_event_offcore_response perf_event_system_wide perf_event_user_kernel + +DOLOOPS= $(testlibdir)/do_loops.o + +perf_event_tests: $(TESTS) + + +event_name_lib.o: event_name_lib.c event_name_lib.h + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c event_name_lib.c + + +broken_events.o: broken_events.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c broken_events.c + +broken_events: broken_events.o event_name_lib.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o broken_events broken_events.o event_name_lib.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +nmi_watchdog.o: nmi_watchdog.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c nmi_watchdog.c + +nmi_watchdog: nmi_watchdog.o event_name_lib.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o nmi_watchdog nmi_watchdog.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +perf_event_offcore_response.o: perf_event_offcore_response.c event_name_lib.h + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_offcore_response.c + +perf_event_offcore_response: perf_event_offcore_response.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) -o perf_event_offcore_response perf_event_offcore_response.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + + +perf_event_system_wide.o: perf_event_system_wide.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_system_wide.c + +perf_event_system_wide: perf_event_system_wide.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) -o perf_event_system_wide perf_event_system_wide.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + + +perf_event_user_kernel.o: perf_event_user_kernel.c event_name_lib.h + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c perf_event_user_kernel.c + +perf_event_user_kernel: perf_event_user_kernel.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) -o perf_event_user_kernel perf_event_user_kernel.o event_name_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + + +clean: + rm -f $(TESTS) *.o *~ + diff --git a/src/components/perf_event/tests/broken_events.c b/src/components/perf_event/tests/broken_events.c new file mode 100644 index 0000000..c4e5513 --- /dev/null +++ b/src/components/perf_event/tests/broken_events.c @@ -0,0 +1,64 @@ +/* + * This tests adding invalid events + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "event_name_lib.h" + +int main( int argc, char **argv ) { + + int retval; + + int EventSet = PAPI_NULL; + int quiet=0; + char user_event[4096]; + long long values[1]; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (get_invalid_event_name(user_event,4096)==NULL) { + if (!quiet) { + printf("No sample invalid event defined for this architecture\n"); + } + test_skip( __FILE__, __LINE__, "No event", 0); + } + + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_add_named_event(EventSet, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Correctly failed adding invalid event %s %s\n",user_event,PAPI_strerror(retval)); + } + test_pass(__FILE__); + + } + + PAPI_start(EventSet); + + PAPI_stop(EventSet,&values[0]); + + if (!quiet) { + printf("Read result: %lld\n",values[0]); + } + + test_fail( __FILE__, __LINE__,"Added comma separated event somehow",0); + + return 0; +} diff --git a/src/components/perf_event/tests/event_name_lib.c b/src/components/perf_event/tests/event_name_lib.c new file mode 100644 index 0000000..d7cd5e2 --- /dev/null +++ b/src/components/perf_event/tests/event_name_lib.c @@ -0,0 +1,165 @@ +#include +#include +#include + +#include "papi.h" + +char *get_offcore_event(char *event, int size) { + + const PAPI_hw_info_t *hwinfo; + + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + return NULL; + } + + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + + if ( hwinfo->cpuid_family == 6) { + switch(hwinfo->cpuid_model) { + + case 26: + case 30: + case 31: /* Nehalem */ + case 46: /* Nehalem EX */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:LOCAL_DRAM",size); + return event; + break; + + case 37: + case 44: /* Westmere */ + case 47: /* Westmere EX */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:LOCAL_DRAM",size); + return event; + break; + + case 45: /* SandyBridge EP */ + case 42: /* SandyBridge */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 58: /* IvyBridge */ + case 62: /* Ivy Trail */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 60: /* Haswell */ + case 69: + case 70: + case 63: /* Haswell EP */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 61: /* Broadwell */ + case 71: + case 86: + case 79: /* Broadwell EP */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 78: /* Skylake */ + case 94: + case 85: /* Skylake-X */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 142: /* Kabylake */ + case 158: + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + case 87: /* Knights Landing */ + strncpy(event, + "OFFCORE_RESPONSE_0:DMND_DATA_RD:ANY_RESPONSE",size); + return event; + break; + + } + } + return NULL; + } + else if (hwinfo->vendor == PAPI_VENDOR_AMD) { + return NULL; + } + + return NULL; +} + +char *get_instructions_event(char *event, int size) { + + const PAPI_hw_info_t *hwinfo; + + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + return NULL; + } + + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + + if ( hwinfo->cpuid_family == 6) { + strncpy(event,"INSTRUCTIONS_RETIRED",size); + return event; + } + + if ( hwinfo->cpuid_family == 15) { + strncpy(event,"INSTR_RETIRED:NBOGUSNTAG",size); + return event; + } + + return NULL; + } + else if (hwinfo->vendor == PAPI_VENDOR_AMD) { + strncpy(event,"RETIRED_INSTRUCTIONS",size); + return event; + } + + return NULL; +} + +char *get_invalid_event_name(char *event, int size) { + + const PAPI_hw_info_t *hwinfo; + + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + return NULL; + } + + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + + if ( hwinfo->cpuid_family == 6) { + switch(hwinfo->cpuid_model) { + + case 78: /* Skylake */ + case 94: + case 85: /* Skylake-X */ + strncpy(event, + "DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK:u=1",size); + return event; + break; + + } + } + return NULL; + } + else if (hwinfo->vendor == PAPI_VENDOR_AMD) { + return NULL; + } + + return NULL; +} + diff --git a/src/components/perf_event/tests/event_name_lib.h b/src/components/perf_event/tests/event_name_lib.h new file mode 100644 index 0000000..275933f --- /dev/null +++ b/src/components/perf_event/tests/event_name_lib.h @@ -0,0 +1,3 @@ +char *get_instructions_event(char *event, int size); +char *get_offcore_event(char *event, int size); +char *get_invalid_event_name(char *event, int size); diff --git a/src/components/perf_event/tests/nmi_watchdog.c b/src/components/perf_event/tests/nmi_watchdog.c new file mode 100644 index 0000000..fbebd03 --- /dev/null +++ b/src/components/perf_event/tests/nmi_watchdog.c @@ -0,0 +1,82 @@ +/* If the NMI watchdog is enabled it will steal a performance counter. */ +/* There is a bug that if you try to use the maximum number of counters */ +/* (not counting the stolen one) with a group leader, sys_perf_open() */ +/* will indicate success, as will starting the count, but you will fail */ +/* at read time. */ + +/* This bug still exists in 3.x */ +/* The perf NMI watchdog was not introduced until 2.6.34 */ + +/* This also triggers in the case of the schedulability bug */ +/* but since that was fixed in 2.6.34 then in theory there is */ +/* no overlap in the tests. */ + +#include + +#include "papi.h" +#include "papi_test.h" + + +static int detect_nmi_watchdog(void) { + + int watchdog_detected=0,watchdog_value=0; + FILE *fff; + + fff=fopen("/proc/sys/kernel/nmi_watchdog","r"); + if (fff!=NULL) { + if (fscanf(fff,"%d",&watchdog_value)==1) { + if (watchdog_value>0) watchdog_detected=1; + } + fclose(fff); + } + else { + watchdog_detected=-1; + } + + return watchdog_detected; +} + +int main( int argc, char **argv ) { + + int retval,watchdog_active=0; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + watchdog_active=detect_nmi_watchdog(); + + if (watchdog_active<0) { + test_skip( __FILE__, __LINE__, + "nmi_watchdog file does not exist\n", 0); + } + + if (watchdog_active) { + if (!quiet) { + printf("\nOn perf_event kernels with the nmi_watchdog enabled\n"); + printf("the watchdog steals an event, but the scheduability code\n"); + printf("is not notified. Thus adding a full complement of events\n"); + printf("seems to pass, but then fails at read time.\n"); + printf("Because of this, PAPI has to do some slow workarounds.\n"); + printf("For best PAPI performance, you may wish to disable\n"); + printf("the watchdog by running (as root)\n"); + printf("\techo \"0\" > /proc/sys/kernel/nmi_watchdog\n\n"); + } + + test_warn( __FILE__, __LINE__, + "NMI Watchdog Active, enabling slow workarounds", 0 ); + } + + test_pass( __FILE__ ); + + return 0; +} + + + diff --git a/src/components/perf_event/tests/perf_event_offcore_response.c b/src/components/perf_event/tests/perf_event_offcore_response.c new file mode 100644 index 0000000..ed2bc06 --- /dev/null +++ b/src/components/perf_event/tests/perf_event_offcore_response.c @@ -0,0 +1,79 @@ +/* + * This tests the use of offcore_response events + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "event_name_lib.h" + +int main( int argc, char **argv ) { + + int quiet; + + char *offcore_event=NULL; + char event_name[BUFSIZ]; + + int retval; + int EventSet1 = PAPI_NULL; + + long long total_values[1]; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_create_eventset(&EventSet1); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Get a relevant event name */ + offcore_event=get_offcore_event(event_name, BUFSIZ); + if (offcore_event==NULL) { + if (!quiet) { + printf("No test event available on this processor\n"); + } + test_skip( __FILE__, __LINE__, + "PAPI does not support offcore on this processor", + PAPI_ENOSUPP ); + } + + retval = PAPI_add_named_event(EventSet1, offcore_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",offcore_event); + } + test_fail(__FILE__, __LINE__, "adding offcore event ",retval); + } + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet1, total_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t%s count = %lld\n",offcore_event,total_values[0]); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event/tests/perf_event_system_wide.c b/src/components/perf_event/tests/perf_event_system_wide.c new file mode 100644 index 0000000..9a6f5c6 --- /dev/null +++ b/src/components/perf_event/tests/perf_event_system_wide.c @@ -0,0 +1,875 @@ +/* + * This tests the measuring of events using a system-wide granularity + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#ifndef __USE_GNU +#define __USE_GNU +#endif + +/* For sched_setaffinity() */ +#include + + +int main( int argc, char **argv ) { + + int retval; + int EventSetDefault = PAPI_NULL; + int EventSetUser = PAPI_NULL; + int EventSetKernel = PAPI_NULL; + int EventSetUserKernel = PAPI_NULL; + int EventSetAll = PAPI_NULL; + int EventSet4 = PAPI_NULL; + int EventSet5 = PAPI_NULL; + int EventSet6 = PAPI_NULL; + int EventSet7 = PAPI_NULL; + int EventSet8 = PAPI_NULL; + int EventSet9 = PAPI_NULL; + int EventSet10 = PAPI_NULL; + + int quiet=0; + + PAPI_domain_option_t domain_opt; + PAPI_granularity_option_t gran_opt; + PAPI_cpu_option_t cpu_opt; + cpu_set_t mask; + + long long dom_default_values[1], + dom_user_values[1], + dom_kernel_values[1], + dom_userkernel_values[1], + dom_all_values[1]; + long long grn_thr_values[1],grn_proc_values[1]; + long long grn_sys_values[1],grn_sys_cpu_values[1]; + long long total_values[1],total_affinity_values[1]; + long long total_all_values[1]; + + dom_user_values[0]=0; + dom_userkernel_values[0]=0; + dom_all_values[0]=0; + grn_thr_values[0]=0; + grn_proc_values[0]=0; + grn_sys_values[0]=0; + grn_sys_cpu_values[0]=0; + total_values[0]=0; + total_affinity_values[0]=0; + total_all_values[0]=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (!quiet) { + printf("\nTrying PAPI_TOT_INS with different domains:\n"); + } + + /***************************/ + /***************************/ + /* Default */ + /***************************/ + /***************************/ + + retval = PAPI_create_eventset(&EventSetDefault); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_add_named_event(EventSetDefault, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_skip(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + if (!quiet) { + printf("\tDefault:\t\t\t"); + } + + retval = PAPI_start( EventSetDefault ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSetDefault, dom_default_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",dom_default_values[0]); + } + + /***************************/ + /***************************/ + /* user events */ + /***************************/ + /***************************/ + + retval = PAPI_create_eventset(&EventSetUser); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSetUser, 0); + + /* we need to set domain to be as inclusive as possible */ + domain_opt.def_cidx=0; + domain_opt.eventset=EventSetUser; + domain_opt.domain=PAPI_DOM_USER; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + if (retval==PAPI_EPERM) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + else { + test_fail(__FILE__, __LINE__, "setting PAPI_DOM_KERNEL",retval); + } + } + + retval = PAPI_add_named_event(EventSetUser, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_skip(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + if (!quiet) { + printf("\tPAPI_DOM_USER:\t\t\t"); + } + + retval = PAPI_start( EventSetUser ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSetUser, dom_user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",dom_user_values[0]); + } + + + /***************************/ + /***************************/ + /* kernel events */ + /***************************/ + /***************************/ + + retval = PAPI_create_eventset(&EventSetKernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSetKernel, 0); + + /* we need to set domain to be as inclusive as possible */ + domain_opt.def_cidx=0; + domain_opt.eventset=EventSetKernel; + domain_opt.domain=PAPI_DOM_KERNEL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + if (retval==PAPI_EPERM) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + else { + test_fail(__FILE__, __LINE__, "setting PAPI_DOM_KERNEL",retval); + } + } + + retval = PAPI_add_named_event(EventSetKernel, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_skip(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + if (!quiet) { + printf("\tPAPI_DOM_KERNEL:\t\t"); + } + + retval = PAPI_start( EventSetKernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSetKernel, dom_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",dom_kernel_values[0]); + } + + + /***************************/ + /***************************/ + /* User+Kernel events */ + /***************************/ + /***************************/ + + if (!quiet) { + printf("\tPAPI_DOM_USER|PAPI_DOM_KERNEL:\t"); + } + + retval = PAPI_create_eventset(&EventSetUserKernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSetUserKernel, 0); + + /* we need to set domain to be as inclusive as possible */ + + domain_opt.def_cidx=0; + domain_opt.eventset=EventSetUserKernel; + domain_opt.domain=PAPI_DOM_USER|PAPI_DOM_KERNEL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + + if (retval==PAPI_EPERM) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + else { + test_fail(__FILE__, __LINE__, "setting PAPI_DOM_ALL",retval); + } + } + + + retval = PAPI_add_named_event(EventSetUserKernel, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSetUserKernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSetUserKernel, dom_userkernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",dom_userkernel_values[0]); + } + + /***************************/ + /***************************/ + /* DOMAIN_ALL events */ + /***************************/ + /***************************/ + + if (!quiet) { + printf("\tPAPI_DOM_ALL:\t\t\t"); + } + + retval = PAPI_create_eventset(&EventSetAll); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSetAll, 0); + + /* we need to set domain to be as inclusive as possible */ + + domain_opt.def_cidx=0; + domain_opt.eventset=EventSetAll; + domain_opt.domain=PAPI_DOM_ALL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + + if (retval==PAPI_EPERM) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + else { + test_fail(__FILE__, __LINE__, "setting PAPI_DOM_ALL",retval); + } + } + + + retval = PAPI_add_named_event(EventSetAll, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSetAll ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSetAll, dom_all_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",dom_all_values[0]); + } + + + /***************************/ + /***************************/ + /* PAPI_GRN_THR events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\nTrying different granularities:\n"); + } + + if ( !quiet ) { + printf("\tPAPI_GRN_THR:\t\t\t"); + } + + retval = PAPI_create_eventset(&EventSet4); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet4, 0); + + /* Set the granularity to individual thread */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet4; + gran_opt.granularity=PAPI_GRN_THR; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_GRN_THR", + retval); + } + + + retval = PAPI_add_named_event(EventSet4, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet4 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet4, grn_thr_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",grn_thr_values[0]); + } + + + /***************************/ + /***************************/ + /* PAPI_GRN_PROC events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\tPAPI_GRN_PROC:\t\t\t"); + } + + retval = PAPI_create_eventset(&EventSet5); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet5, 0); + + /* Set the granularity to process */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet5; + gran_opt.granularity=PAPI_GRN_PROC; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_PROC\n"); + } + } + else { + retval = PAPI_add_named_event(EventSet5, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet5 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet5, grn_proc_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",grn_proc_values[0]); + } + } + + + + /***************************/ + /***************************/ + /* PAPI_GRN_SYS events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\tPAPI_GRN_SYS:\t\t\t"); + } + + retval = PAPI_create_eventset(&EventSet6); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet6, 0); + + /* Set the granularity to current cpu */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet6; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_SYS\n"); + } + } + else { + + retval = PAPI_add_named_event(EventSet6, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + + if (retval == PAPI_EPERM) { + /* FIXME: read perf_event_paranoid and see */ + if (!quiet) printf("SYS granularity not allowed, probably perf_event_paranoid permissions\n"); + } + else { + if ( !quiet ) { + printf("Error adding PAPI_TOT_INS with system granularity\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS with system granularity",retval); + } + } else { + + retval = PAPI_start( EventSet6 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet6, grn_sys_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",grn_sys_values[0]); + } + } + + } + + + /****************************/ + /****************************/ + /* PAPI_GRN_SYS_CPU events */ + /****************************/ + /****************************/ + + if ( !quiet ) { + printf("\tPAPI_GRN_SYS_CPU:\t\t"); + } + + retval = PAPI_create_eventset(&EventSet7); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet7, 0); + + /* Set the granularity to all cpus */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet7; + gran_opt.granularity=PAPI_GRN_SYS_CPU; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_SYS_CPU\n"); + } + } + else { + retval = PAPI_add_named_event(EventSet7, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet7 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet7, grn_sys_cpu_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",grn_sys_cpu_values[0]); + } + } + + + /***************************/ + /***************************/ + /* SYS and ATTACH events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\nPAPI_GRN_SYS plus CPU attach:\n"); + } + + if ( !quiet ) { + printf("\tGRN_SYS, DOM_USER, CPU 0 attach:\t"); + } + + retval = PAPI_create_eventset(&EventSet8); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet8, 0); + + /* Set the granularity to system-wide */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet8; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_SYS\n"); + } + } + else { + /* we need to set to a certain cpu */ + + cpu_opt.eventset=EventSet8; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + if (retval==PAPI_EPERM) { + if (!quiet) { + printf("Permission error trying to CPU_ATTACH; need to run as root\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to CPU_ATTACH; need to run as root", + retval); + } + + test_fail(__FILE__, __LINE__, "PAPI_CPU_ATTACH",retval); + } + + retval = PAPI_add_named_event(EventSet8, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet8 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet8, total_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",total_values[0]); + } + } + + + /***************************/ + /***************************/ + /* SYS and ATTACH, bind CPU events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\tGRN_SYS, DOM_USER, CPU 0 affinity:\t"); + } + + /* Set affinity to CPU 0 */ + CPU_ZERO(&mask); + CPU_SET(0,&mask); + retval=sched_setaffinity(0, sizeof(mask), &mask); + + if (retval<0) { + if (!quiet) { + printf("Setting affinity failed: %s\n",strerror(errno)); + } + } else { + + retval = PAPI_create_eventset(&EventSet9); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet9, 0); + + /* Set the granularity to system-wide */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet9; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_SYS\n"); + } + } + else { + /* we need to set to a certain cpu for uncore to work */ + + cpu_opt.eventset=EventSet9; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_CPU_ATTACH",retval); + } + + retval = PAPI_add_named_event(EventSet9, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet9 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet9, total_affinity_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",total_affinity_values[0]); + } + } + } + + /***************************/ + /***************************/ + /* SYS and ATTACH, bind CPU events */ + /***************************/ + /***************************/ + + if ( !quiet ) { + printf("\tGRN_SYS, DOM_ALL, CPU 0 affinity:\t"); + } + + + + /* Set affinity to CPU 0 */ + CPU_ZERO(&mask); + CPU_SET(0,&mask); + retval=sched_setaffinity(0, sizeof(mask), &mask); + + if (retval<0) { + if (!quiet) { + printf("Setting affinity failed: %s\n",strerror(errno)); + } + } else { + + retval = PAPI_create_eventset(&EventSet10); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + retval = PAPI_assign_eventset_component(EventSet10, 0); + + /* Set DOM_ALL */ + domain_opt.def_cidx=0; + domain_opt.eventset=EventSet10; + domain_opt.domain=PAPI_DOM_ALL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + + if (retval==PAPI_EPERM) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + else { + test_fail(__FILE__, __LINE__, "setting PAPI_DOM_ALL",retval); + } + } + + /* Set the granularity to system-wide */ + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet10; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Unable to set PAPI_GRN_SYS\n"); + } + } + else { + /* we need to set to a certain cpu for uncore to work */ + + cpu_opt.eventset=EventSet10; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_CPU_ATTACH",retval); + } + + retval = PAPI_add_named_event(EventSet10, "PAPI_TOT_INS"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_INS\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_INS ",retval); + } + + retval = PAPI_start( EventSet10 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet10, total_all_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("%lld\n",total_all_values[0]); + } + } + } + + /**************/ + /* Validation */ + /**************/ + + if ( !quiet ) { + printf("\n"); + } + + if ( !quiet ) { + printf("Validating:\n"); + printf("\tDOM_USER|DOM_KERNEL (%lld) > DOM_USER (%lld)\n", + dom_userkernel_values[0],dom_user_values[0]); + } + if (dom_user_values[0] > dom_userkernel_values[0]) { + test_fail( __FILE__, __LINE__, "DOM_USER too high", 0 ); + } + + if ( !quiet ) { + printf("\n"); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event/tests/perf_event_user_kernel.c b/src/components/perf_event/tests/perf_event_user_kernel.c new file mode 100644 index 0000000..4310de2 --- /dev/null +++ b/src/components/perf_event/tests/perf_event_user_kernel.c @@ -0,0 +1,723 @@ +/* + * This tests the use of offcore_response events + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "event_name_lib.h" + +int main( int argc, char **argv ) { + + + char *instructions_event=NULL; + char event_name[BUFSIZ]; + + char user_event[BUFSIZ]; + char kernel_event[BUFSIZ]; + char user_kernel_event[BUFSIZ]; + + + int retval; + + /* Default Domain */ + int EventSet_default = PAPI_NULL; + int EventSet_default_user = PAPI_NULL; + int EventSet_default_kernel = PAPI_NULL; + int EventSet_default_user_kernel = PAPI_NULL; + long long default_values[1]; + long long default_user_values[1]; + long long default_kernel_values[1]; + long long default_user_kernel_values[1]; + + /* User Domain */ + int EventSet_user = PAPI_NULL; + int EventSet_user_user = PAPI_NULL; + int EventSet_user_kernel = PAPI_NULL; + int EventSet_user_user_kernel = PAPI_NULL; + long long user_values[1]; + long long user_user_values[1]; + long long user_kernel_values[1]; + long long user_user_kernel_values[1]; + + /* Kernel Domain */ + int EventSet_kernel = PAPI_NULL; + int EventSet_kernel_user = PAPI_NULL; + int EventSet_kernel_kernel = PAPI_NULL; + int EventSet_kernel_user_kernel = PAPI_NULL; + long long kernel_values[1]; + long long kernel_user_values[1]; + long long kernel_kernel_values[1]; + long long kernel_user_kernel_values[1]; + + /* All Domain */ + int EventSet_all = PAPI_NULL; + int EventSet_all_user = PAPI_NULL; + int EventSet_all_kernel = PAPI_NULL; + int EventSet_all_user_kernel = PAPI_NULL; + long long all_values[1]; + long long all_user_values[1]; + long long all_kernel_values[1]; + long long all_user_kernel_values[1]; + + /* Two Events */ + int EventSet_two = PAPI_NULL; + long long two_values[2]; + + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + + /* Get a relevant event name */ + instructions_event=get_instructions_event(event_name, BUFSIZ); + if (instructions_event==NULL) { + test_skip( __FILE__, __LINE__, + "No instructions event definition for this arch", + PAPI_ENOSUPP ); + } + + sprintf(user_event,"%s:u=1",instructions_event); + sprintf(kernel_event,"%s:k=1",instructions_event); + sprintf(user_kernel_event,"%s:u=1:k=1",instructions_event); + + /*********************************/ + /* Two Events */ + /*********************************/ + + if (!quiet) { + printf("\tTwo Events in same EventSet\n"); + } + + retval = PAPI_create_eventset(&EventSet_two); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_two, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s %s\n",user_event,PAPI_strerror(retval)); + } + test_skip( __FILE__, __LINE__,"Could not add event",PAPI_ENOSUPP); + } + retval = PAPI_add_named_event(EventSet_two, kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_two ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_two, two_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld, %s count = %lld\n", + user_event,two_values[0], + kernel_event,two_values[1]); + } + + + /*********************************/ + /* Default Domain, Default Event */ + /*********************************/ + + if (!quiet) { + printf("\tDefault Domain\n"); + } + + retval = PAPI_create_eventset(&EventSet_default); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_default, instructions_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",instructions_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_default ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_default, default_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",instructions_event,default_values[0]); + } + + + /*********************************/ + /* Default Domain, User Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_default_user); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_default_user, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_default_user ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_default_user, default_user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_event,default_user_values[0]); + } + + /*********************************/ + /* Default Domain, Kernel Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_default_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_default_kernel, kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_default_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_default_kernel, default_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",kernel_event,default_kernel_values[0]); + } + + /*****************************************/ + /* Default Domain, user and Kernel Event */ + /*****************************************/ + + + retval = PAPI_create_eventset(&EventSet_default_user_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_default_user_kernel, user_kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_default_user_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_default_user_kernel, default_user_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_kernel_event,default_user_kernel_values[0]); + } + + /*********************************/ + /* User Domain, Default Event */ + /*********************************/ + + if (!quiet) { + printf("\tPAPI_DOM_USER Domain\n"); + } + + retval=PAPI_set_cmp_domain(PAPI_DOM_USER, 0); + + retval = PAPI_create_eventset(&EventSet_user); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_user, instructions_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",instructions_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_user ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_user, user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",instructions_event,user_values[0]); + } + + + /*********************************/ + /* User Domain, User Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_user_user); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_user_user, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_user_user ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_user_user, user_user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_event,user_user_values[0]); + } + + /*********************************/ + /* User Domain, Kernel Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_user_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_user_kernel, kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_user_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_user_kernel, user_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",kernel_event,user_kernel_values[0]); + } + + /*****************************************/ + /* User Domain, user and Kernel Event */ + /*****************************************/ + + retval = PAPI_create_eventset(&EventSet_user_user_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_user_user_kernel, user_kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_user_user_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_user_user_kernel, user_user_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_kernel_event,user_user_kernel_values[0]); + } + + /*********************************/ + /* Kernel Domain, Default Event */ + /*********************************/ + + if (!quiet) { + printf("\tPAPI_DOM_KERNEL Domain\n"); + } + + retval=PAPI_set_cmp_domain(PAPI_DOM_KERNEL, 0); + + retval = PAPI_create_eventset(&EventSet_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_kernel, instructions_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",instructions_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_kernel, kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",instructions_event,kernel_values[0]); + } + + + /*********************************/ + /* Kernel Domain, User Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_kernel_user); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_kernel_user, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_kernel_user ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_kernel_user, kernel_user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_event,kernel_user_values[0]); + } + + /*********************************/ + /* Kernel Domain, Kernel Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_kernel_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_kernel_kernel, kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_kernel_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_kernel_kernel, kernel_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",kernel_event,kernel_kernel_values[0]); + } + + /*****************************************/ + /* Kernel Domain, user and Kernel Event */ + /*****************************************/ + + retval = PAPI_create_eventset(&EventSet_kernel_user_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_kernel_user_kernel, user_kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_kernel_user_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_kernel_user_kernel, kernel_user_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_kernel_event,kernel_user_kernel_values[0]); + } + + /*********************************/ + /* All Domain, Default Event */ + /*********************************/ + + if (!quiet) { + printf("\tPAPI_DOM_ALL Domain\n"); + } + + retval=PAPI_set_cmp_domain(PAPI_DOM_ALL, 0); + + retval = PAPI_create_eventset(&EventSet_all); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_all, instructions_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",instructions_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_all ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_all, all_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",instructions_event,all_values[0]); + } + + + /*********************************/ + /* All Domain, User Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_all_user); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_all_user, user_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_all_user ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_all_user, all_user_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_event,all_user_values[0]); + } + + /*********************************/ + /* All Domain, Kernel Event */ + /*********************************/ + + retval = PAPI_create_eventset(&EventSet_all_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_all_kernel, kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_all_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_all_kernel, all_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",kernel_event,all_kernel_values[0]); + } + + /*****************************************/ + /* All Domain, user and Kernel Event */ + /*****************************************/ + + retval = PAPI_create_eventset(&EventSet_all_user_kernel); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + + retval = PAPI_add_named_event(EventSet_all_user_kernel, user_kernel_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to add %s\n",user_kernel_event); + } + test_fail(__FILE__, __LINE__, "adding instructions event ",retval); + } + + retval = PAPI_start( EventSet_all_user_kernel ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet_all_user_kernel, all_user_kernel_values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\t\t%s count = %lld\n",user_kernel_event,all_user_kernel_values[0]); + } + + /**************/ + /* Validation */ + /**************/ + + //TODO + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event_uncore/Rules.perf_event_uncore b/src/components/perf_event_uncore/Rules.perf_event_uncore new file mode 100644 index 0000000..26d1335 --- /dev/null +++ b/src/components/perf_event_uncore/Rules.perf_event_uncore @@ -0,0 +1,9 @@ +# Note, this component can only be built if perf_event component also built + +COMPSRCS += components/perf_event_uncore/perf_event_uncore.c + +COMPOBJS += perf_event_uncore.o + +perf_event_uncore.o: components/perf_event_uncore/perf_event_uncore.c components/perf_event/perf_event_lib.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -Icomponents/perf_event/ -c components/perf_event_uncore/perf_event_uncore.c -o perf_event_uncore.o + diff --git a/src/components/perf_event_uncore/perf_event_uncore.c b/src/components/perf_event_uncore/perf_event_uncore.c new file mode 100644 index 0000000..278cdb7 --- /dev/null +++ b/src/components/perf_event_uncore/perf_event_uncore.c @@ -0,0 +1,1345 @@ +/* +* File: perf_event_uncore.c +* +* Author: Vince Weaver +* vincent.weaver@maine.edu +* Mods: Gary Mohr +* gary.mohr@bull.com +* Modified the perf_event_uncore component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4. +* This adds several new event masks, including cpu=, u=, and k= which give the user +* the ability to set cpu number to use or control the domain (user, kernel, or both) +* in which the counter should be incremented. These are event masks so it is now +* possible to have multiple events in the same event set that count activity from +* differennt cpu's or count activity in different domains. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* PAPI-specific includes */ +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "extras.h" + +/* libpfm4 includes */ +#include "papi_libpfm4_events.h" +#include "components/perf_event/pe_libpfm4_events.h" +#include "perfmon/pfmlib.h" +#include PEINCLUDE + +/* Linux-specific includes */ +#include "mb.h" +#include "linux-memory.h" +#include "linux-timer.h" +#include "linux-common.h" +#include "linux-context.h" + +#include "components/perf_event/perf_event_lib.h" + +/* Forward declaration */ +papi_vector_t _perf_event_uncore_vector; + +/* Globals */ +struct native_event_table_t uncore_native_event_table; +static int our_cidx; +//int +//_peu_libpfm4_get_cidx() { +// return our_cidx; +//} + +/* Defines for ctx->state */ +#define PERF_EVENTS_OPENED 0x01 +#define PERF_EVENTS_RUNNING 0x02 + +static int _peu_set_domain( hwd_control_state_t *ctl, int domain); + + + +/******************************************************************/ +/******** Kernel Version Dependent Routines **********************/ +/******************************************************************/ + +/* The read format on perf_event varies based on various flags that */ +/* are passed into it. This helper avoids copying this logic */ +/* multiple places. */ +static unsigned int +get_read_format( unsigned int multiplex, + unsigned int inherit, + int format_group ) +{ + unsigned int format = 0; + + /* if we need read format options for multiplexing, add them now */ + if (multiplex) { + format |= PERF_FORMAT_TOTAL_TIME_ENABLED; + format |= PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* If we are not using inherit, add the group read options */ + if (!inherit) { + if (format_group) { + format |= PERF_FORMAT_GROUP; + } + } + + SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n", + multiplex, inherit, format_group, format); + + return format; +} + +/*****************************************************************/ +/********* End Kernel-version Dependent Routines ****************/ +/*****************************************************************/ + +/********************************************************************/ +/* Low-level perf_event calls */ +/********************************************************************/ + +/* In case headers aren't new enough to have __NR_perf_event_open */ +#ifndef __NR_perf_event_open + +#ifdef __powerpc__ +#define __NR_perf_event_open 319 +#elif defined(__x86_64__) +#define __NR_perf_event_open 298 +#elif defined(__i386__) +#define __NR_perf_event_open 336 +#elif defined(__arm__) 366+0x900000 +#define __NR_perf_event_open +#endif + +#endif + +static long +sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu, + int group_fd, unsigned long flags ) +{ + int ret; + + SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, group_fd: %d, flags: %lx\n",hw_event,pid,cpu,group_fd,flags); + SUBDBG(" type: %d\n",hw_event->type); + SUBDBG(" size: %d\n",hw_event->size); + SUBDBG(" config: %#"PRIx64" (%"PRIu64")\n",hw_event->config, + hw_event->config); + SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period); + SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type); + SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format); + SUBDBG(" disabled: %d\n",hw_event->disabled); + SUBDBG(" inherit: %d\n",hw_event->inherit); + SUBDBG(" pinned: %d\n",hw_event->pinned); + SUBDBG(" exclusive: %d\n",hw_event->exclusive); + SUBDBG(" exclude_user: %d\n",hw_event->exclude_user); + SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel); + SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv); + SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle); + SUBDBG(" mmap: %d\n",hw_event->mmap); + SUBDBG(" comm: %d\n",hw_event->comm); + SUBDBG(" freq: %d\n",hw_event->freq); + SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat); + SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec); + SUBDBG(" task: %d\n",hw_event->task); + SUBDBG(" watermark: %d\n",hw_event->watermark); + SUBDBG(" precise_ip: %d\n",hw_event->precise_ip); + SUBDBG(" mmap_data: %d\n",hw_event->mmap_data); + SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all); + SUBDBG(" exclude_host: %d\n",hw_event->exclude_host); + SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest); + SUBDBG(" exclude_callchain_kernel: %d\n",hw_event->exclude_callchain_kernel); + SUBDBG(" exclude_callchain_user: %d\n",hw_event->exclude_callchain_user); + SUBDBG(" wakeup_watermark: %d\n",hw_event->wakeup_watermark); + SUBDBG(" bp_type: %d\n",hw_event->bp_type); + SUBDBG(" config1: %#lx (%lu)\n",hw_event->config1,hw_event->config1); + SUBDBG(" config2: %#lx (%lu)\n",hw_event->config2,hw_event->config2); + SUBDBG(" branch_sample_type: %lu\n",hw_event->branch_sample_type); + SUBDBG(" sample_regs_user: %lu\n",hw_event->sample_regs_user); + SUBDBG(" sample_stack_user: %d\n",hw_event->sample_stack_user); + + ret = syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); + SUBDBG("Returned %d %d %s\n",ret, + ret<0?errno:0, + ret<0?strerror(errno):" "); + return ret; +} + + +static int map_perf_event_errors_to_papi(int perf_event_error) { + + int ret; + + /* These mappings are approximate. + EINVAL in particular can mean lots of different things */ + switch(perf_event_error) { + case EPERM: + case EACCES: + ret = PAPI_EPERM; + break; + case ENODEV: + case EOPNOTSUPP: + ret = PAPI_ENOSUPP; + break; + case ENOENT: + ret = PAPI_ENOEVNT; + break; + case ENOSYS: + case EAGAIN: + case EBUSY: + case E2BIG: + ret = PAPI_ESYS; + break; + case ENOMEM: + ret = PAPI_ENOMEM; + break; + case EINVAL: + default: + ret = PAPI_EINVAL; + break; + } + return ret; +} + +/* Maximum size we ever expect to read from a perf_event fd */ +/* (this is the number of 64-bit values) */ +/* We use this to size the read buffers */ +/* The three is for event count, time_enabled, time_running */ +/* and the counter term is count value and count id for each */ +/* possible counter value. */ +#define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS)) + + + +/* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */ +/* implementations (e.g. x86 before 2.6.33) which don't do a static event */ +/* scheduability check in sys_perf_event_open. It is also needed if the */ +/* kernel is stealing an event, such as when NMI watchdog is enabled. */ + +static int +check_scheduability( pe_context_t *ctx, pe_control_t *ctl) +{ + SUBDBG("ENTER: ctx: %p, ctl: %p\n", ctx, ctl); + int retval = 0, cnt = -1; + ( void ) ctx; /*unused */ + long long papi_pe_buffer[READ_BUFFER_SIZE]; + int i; + + /* If the kernel isn't tracking scheduability right */ + /* Then we need to start/stop/read to force the event */ + /* to be scheduled and see if an error condition happens. */ + + /* start all events */ + for( i = 0; i < ctl->num_events; i++) { + retval = ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL ); + if (retval == -1) { + SUBDBG("EXIT: Enable failed event index: %d, num_events: %d, return PAPI_ESYS\n", i, ctl->num_events); + return PAPI_ESYS; + } + } + + /* stop all events */ + for( i = 0; i < ctl->num_events; i++) { + retval = ioctl(ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL ); + if (retval == -1) { + SUBDBG("EXIT: Disable failed: event index: %d, num_events: %d, return PAPI_ESYS\n", i, ctl->num_events); + return PAPI_ESYS; + } + } + + /* See if a read of each event returns results */ + for( i = 0; i < ctl->num_events; i++) { + cnt = read( ctl->events[i].event_fd, papi_pe_buffer, sizeof(papi_pe_buffer)); + if ( cnt == -1 ) { + SUBDBG( "EXIT: read failed: event index: %d, num_events: %d, return PAPI_ESYS. Should never happen.\n", i, ctl->num_events); + return PAPI_ESYS; + } + + if ( cnt == 0 ) { + /* We read 0 bytes if we could not schedule the event */ + /* The kernel should have detected this at open */ + /* but various bugs (including NMI watchdog) */ + /* result in this behavior */ + + SUBDBG( "EXIT: read returned 0: event index: %d, num_events: %d, return PAPI_ECNFLCT.\n", i, ctl->num_events); + return PAPI_ECNFLCT; + } + } + + /* Reset all of the counters (opened so far) back to zero */ + /* from the above brief enable/disable call pair. */ + + /* We have to reset all events because reset of group leader */ + /* does not reset all. */ + /* we assume that the events are being added one by one and that */ + /* we do not need to reset higher events (doing so may reset ones */ + /* that have not been initialized yet. */ + + /* Note... PERF_EVENT_IOC_RESET does not reset time running */ + /* info if multiplexing, so we should avoid coming here if */ + /* we are multiplexing the event. */ + for( i = 0; i < ctl->num_events; i++) { + retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); + if (retval == -1) { + SUBDBG("EXIT: Reset failed: event index: %d, num_events: %d, return PAPI_ESYS\n", i, ctl->num_events); + return PAPI_ESYS; + } + } + SUBDBG("EXIT: return PAPI_OK\n"); + return PAPI_OK; +} + + +/* Open all events in the control state */ +static int +open_pe_events( pe_context_t *ctx, pe_control_t *ctl ) +{ + + int i, ret = PAPI_OK; + long pid; + + if (ctl->granularity==PAPI_GRN_SYS) { + pid = -1; + } + else { + pid = ctl->tid; + } + + for( i = 0; i < ctl->num_events; i++ ) { + + ctl->events[i].event_opened=0; + + /* set up the attr structure. We don't set up all fields here */ + /* as some have already been set up previously. */ + +/* + * The following code controls how the uncore component interfaces with the + * kernel for uncore events. The code inside the ifdef will use grouping of + * uncore events which can make the cost of reading the results more efficient. + * The problem with it is that the uncore component supports 20 different uncore + * PMU's. The kernel requires that all events in a group must be for the same PMU. + * This means that with grouping enabled papi applications can count events on only + * one of the 20 PMU's during a run. + * + * The code inside the else clause treats each event in the event set as + * independent. When running in this mode the kernel allows the papi multiple + * uncore PMU's at the same time. + * + * Example: + * An application wants to measure all the L3 cache write requests. + * The event to do this is part of a cbox pmu (there are 8 cbox pmu's). + * When built with the code in the ifdef, the application would have to be + * run 8 times and count write requests from one pmu at a time. + * When built with the code in the else, the write requests in all 8 cbox + * pmu's could be counted in the same run. + * + */ +// #define GROUPIT 1 // remove the comment on this line to force event grouping +#ifdef GROUPIT + /* group leader (event 0) is special */ + /* If we're multiplexed, everyone is a group leader */ + if (( i == 0 ) || (ctl->multiplexed)) { + ctl->events[i].attr.pinned = !ctl->multiplexed; + ctl->events[i].attr.disabled = 1; + ctl->events[i].group_leader_fd=-1; + ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed, + ctl->inherit, + !ctl->multiplexed ); + } else { + ctl->events[i].attr.pinned=0; + ctl->events[i].attr.disabled = 0; + ctl->events[i].group_leader_fd=ctl->events[0].event_fd, + ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed, + ctl->inherit, + 0 ); + } +#else + ctl->events[i].attr.pinned = !ctl->multiplexed; + ctl->events[i].attr.disabled = 1; + ctl->inherit = 1; + ctl->events[i].group_leader_fd=-1; + ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed, ctl->inherit, 0 ); +#endif + + + /* try to open */ + ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr, + pid, + ctl->events[i].cpu, + ctl->events[i].group_leader_fd, + 0 /* flags */ + ); + + /* Try to match Linux errors to PAPI errors */ + if ( ctl->events[i].event_fd == -1 ) { + SUBDBG("sys_perf_event_open returned error on event #%d." + " Error: %s\n", + i, strerror( errno ) ); + ret=map_perf_event_errors_to_papi(errno); + + goto open_peu_cleanup; + } + + SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d," + " group_leader/fd: %d, event_fd: %d," + " read_format: %"PRIu64"\n", + pid, ctl->events[i].cpu, ctl->events[i].group_leader_fd, + ctl->events[i].event_fd, ctl->events[i].attr.read_format); + + ctl->events[i].event_opened=1; + } + + + /* in many situations the kernel will indicate we opened fine */ + /* yet things will fail later. So we need to double check */ + /* we actually can use the events we've set up. */ + + /* This is not necessary if we are multiplexing, and in fact */ + /* we cannot do this properly if multiplexed because */ + /* PERF_EVENT_IOC_RESET does not reset the time running info */ + if (!ctl->multiplexed) { + ret = check_scheduability( ctx, ctl); + + if ( ret != PAPI_OK ) { + /* the last event did open, so we need to bump the counter */ + /* before doing the cleanup */ + i++; + goto open_peu_cleanup; + } + } + + /* Now that we've successfully opened all of the events, do whatever */ + /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */ + /* and so on. */ + for ( i = 0; i < ctl->num_events; i++ ) { + + /* No sampling if uncore */ + ctl->events[i].mmap_buf = NULL; + } + + /* Set num_evts only if completely successful */ + ctx->state |= PERF_EVENTS_OPENED; + + return PAPI_OK; + +open_peu_cleanup: + /* We encountered an error, close up the fds we successfully opened. */ + /* We go backward in an attempt to close group leaders last, although */ + /* That's probably not strictly necessary. */ + while ( i > 0 ) { + i--; + if (ctl->events[i].event_fd>=0) { + close( ctl->events[i].event_fd ); + ctl->events[i].event_opened=0; + } + } + + return ret; +} + +/* Close all of the opened events */ +static int +close_pe_events( pe_context_t *ctx, pe_control_t *ctl ) +{ + int i; + int num_closed=0; + int events_not_opened=0; + + /* should this be a more serious error? */ + if ( ctx->state & PERF_EVENTS_RUNNING ) { + SUBDBG("Closing without stopping first\n"); + } + + /* Close child events first */ + for( i=0; inum_events; i++ ) { + + if (ctl->events[i].event_opened) { + + if (ctl->events[i].group_leader_fd!=-1) { + if ( ctl->events[i].mmap_buf ) { + if ( munmap ( ctl->events[i].mmap_buf, + ctl->events[i].nr_mmap_pages * getpagesize() ) ) { + PAPIERROR( "munmap of fd = %d returned error: %s", + ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_ESYS; + } + } + + if ( close( ctl->events[i].event_fd ) ) { + PAPIERROR( "close of fd = %d returned error: %s", + ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_ESYS; + } else { + num_closed++; + } + ctl->events[i].event_opened=0; + } + } + else { + events_not_opened++; + } + } + + /* Close the group leaders last */ + for( i=0; inum_events; i++ ) { + + if (ctl->events[i].event_opened) { + + if (ctl->events[i].group_leader_fd==-1) { + if ( ctl->events[i].mmap_buf ) { + if ( munmap ( ctl->events[i].mmap_buf, + ctl->events[i].nr_mmap_pages * getpagesize() ) ) { + PAPIERROR( "munmap of fd = %d returned error: %s", + ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_ESYS; + } + } + + + if ( close( ctl->events[i].event_fd ) ) { + PAPIERROR( "close of fd = %d returned error: %s", + ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_ESYS; + } else { + num_closed++; + } + ctl->events[i].event_opened=0; + } + } + } + + + if (ctl->num_events!=num_closed) { + if (ctl->num_events!=(num_closed+events_not_opened)) { + PAPIERROR("Didn't close all events: " + "Closed %d Not Opened: %d Expected %d\n", + num_closed,events_not_opened,ctl->num_events); + return PAPI_EBUG; + } + } + + ctl->num_events=0; + + ctx->state &= ~PERF_EVENTS_OPENED; + + return PAPI_OK; +} + + + + +/********************************************************************/ +/* Component Interface */ +/********************************************************************/ + + + +/* Initialize a thread */ +static int +_peu_init_thread( hwd_context_t *hwd_ctx ) +{ + + pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx; + + /* clear the context structure and mark as initialized */ + memset( pe_ctx, 0, sizeof ( pe_context_t ) ); + pe_ctx->initialized=1; + + pe_ctx->event_table=&uncore_native_event_table; + pe_ctx->cidx=our_cidx; + + return PAPI_OK; +} + +/* Initialize a new control state */ +static int +_peu_init_control_state( hwd_control_state_t *ctl ) +{ + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* clear the contents */ + memset( pe_ctl, 0, sizeof ( pe_control_t ) ); + + /* Set the default domain */ + _peu_set_domain( ctl, _perf_event_uncore_vector.cmp_info.default_domain ); + + /* Set the default granularity */ + pe_ctl->granularity=_perf_event_uncore_vector.cmp_info.default_granularity; + + pe_ctl->cidx=our_cidx; + + /* Set cpu number in the control block to show events */ + /* are not tied to specific cpu */ + pe_ctl->cpu = -1; + return PAPI_OK; +} + + + +/* Initialize the perf_event uncore component */ +static int +_peu_init_component( int cidx ) +{ + + int retval; + int paranoid_level; + + FILE *fff; + + our_cidx=cidx; + + /* The is the official way to detect if perf_event support exists */ + /* The file is called perf_counter_paranoid on 2.6.31 */ + /* currently we are lazy and do not support 2.6.31 kernels */ + + fff=fopen("/proc/sys/kernel/perf_event_paranoid","r"); + if (fff==NULL) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "perf_event support not detected",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + retval=fscanf(fff,"%d",¶noid_level); + if (retval!=1) fprintf(stderr,"Error reading paranoid level\n"); + fclose(fff); + + + /* Run the libpfm4-specific setup */ + + retval = _papi_libpfm4_init(_papi_hwd[cidx]); + if (retval) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error initializing libpfm4",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + + /* Run the uncore specific libpfm4 setup */ + + retval = _peu_libpfm4_init(_papi_hwd[cidx], our_cidx, + &uncore_native_event_table, + PMU_TYPE_UNCORE); + if (retval) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Error setting up libpfm4",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + /* Check if no uncore events found */ + + if (_papi_hwd[cidx]->cmp_info.num_native_events==0) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "No uncore PMUs or events found",PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + /* Check if we have enough permissions for uncore */ + + /* 2 means no kernel measurements allowed */ + /* 1 means normal counter access */ + /* 0 means you can access CPU-specific data */ + /* -1 means no restrictions */ + + if ((paranoid_level>0) && (getuid()!=0)) { + strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason, + "Insufficient permissions for uncore access. Set /proc/sys/kernel/perf_event_paranoid to 0 or run as root.", + PAPI_MAX_STR_LEN); + return PAPI_ENOCMP; + } + + return PAPI_OK; + +} + +/* Shutdown the perf_event component */ +static int +_peu_shutdown_component( void ) { + + /* deallocate our event table */ + _pe_libpfm4_shutdown(&_perf_event_uncore_vector, + &uncore_native_event_table); + + /* Shutdown libpfm4 */ + _papi_libpfm4_shutdown(&_perf_event_uncore_vector); + + return PAPI_OK; +} + +/* This function clears the current contents of the control structure and + updates it with whatever resources are allocated for all the native events + in the native info structure array. */ + +int +_peu_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, hwd_context_t *ctx ) +{ + int i; + int j; + int ret; + int skipped_events=0; + struct native_event_t *ntv_evt; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* close all of the existing fds and start over again */ + /* In theory we could have finer-grained control and know if */ + /* things were changed, but it's easier to tear things down and rebuild. */ + close_pe_events( pe_ctx, pe_ctl ); + + /* Calling with count==0 should be OK, it's how things are deallocated */ + /* when an eventset is destroyed. */ + if ( count == 0 ) { + SUBDBG( "Called with count == 0\n" ); + return PAPI_OK; + } + + /* set up all the events */ + for( i = 0; i < count; i++ ) { + if ( native ) { + // get the native event pointer used for this papi event + int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code)); + if (ntv_idx < -1) { + SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code); + continue; + } + // if native index is -1, then we have an event without a mask and need to find the right native index to use + if (ntv_idx == -1) { + // find the native event index we want by matching for the right papi event code + for (j=0 ; jevent_table->num_native_events ; j++) { + if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) { + ntv_idx = j; + } + } + } + + // if native index is still negative, we did not find event we wanted so just return error + if (ntv_idx < 0) { + SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code); + continue; + } + + // this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use + ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx])); + + SUBDBG("ntv_evt: %p\n", ntv_evt); + + SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events); + + // Move this events hardware config values and other attributes to the perf_events attribute structure + memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t)); + + // may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain) + // only done if the event mask which controls each counting domain was not provided + + // get pointer to allocated name, will be NULL when adding preset events to event set + char *aName = ntv_evt->allocated_name; + if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) { + SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER)); + pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER); + } + if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) { + SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL)); + pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL); + } + + // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided) + pe_ctl->events[i].cpu = ntv_evt->cpu; + // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called) + if (pe_ctl->events[i].cpu == -1) { + pe_ctl->events[i].cpu = pe_ctl->cpu; + } + } else { + // This case happens when called from _pe_set_overflow and _pe_ctl + // Those callers put things directly into the pe_ctl structure so it is already set for the open call + } + + // Copy the inherit flag into the attribute block that will be passed to the kernel + pe_ctl->events[i].attr.inherit = pe_ctl->inherit; + + /* Set the position in the native structure */ + /* We just set up events linearly */ + if ( native ) { + native[i].ni_position = i; + SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n", + i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners); + } + } + + if (count <= skipped_events) { + SUBDBG("EXIT: No events to count, they all contained invalid umasks\n"); + return PAPI_ENOEVNT; + } + + pe_ctl->num_events = count - skipped_events; + + /* actuall open the events */ + /* (why is this a separate function?) */ + ret = open_pe_events( pe_ctx, pe_ctl ); + if ( ret != PAPI_OK ) { + SUBDBG("open_pe_events failed\n"); + /* Restore values ? */ + return ret; + } + + SUBDBG( "EXIT: PAPI_OK\n" ); + return PAPI_OK; +} + +/********************************************************************/ +/********************************************************************/ +/* Start with functions that are exported via the module interface */ +/********************************************************************/ +/********************************************************************/ + + +/* set the domain. perf_events allows per-event control of this, papi allows it to be set at the event level or at the event set level. */ +/* this will set the event set level domain values but they only get used if no event level domain mask (u= or k=) was specified. */ +static int +_peu_set_domain( hwd_control_state_t *ctl, int domain) +{ + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + SUBDBG("old control domain %d, new domain %d\n", + pe_ctl->domain,domain); + + pe_ctl->domain = domain; + return PAPI_OK; +} + +/* Shutdown a thread */ +static int +_peu_shutdown_thread( hwd_context_t *ctx ) +{ + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + + pe_ctx->initialized=0; + + return PAPI_OK; +} + + +/* reset the hardware counters */ +/* Note: PAPI_reset() does not necessarily call this */ +/* unless the events are actually running. */ +static int +_peu_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int i, ret; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + ( void ) ctx; /*unused */ + + /* We need to reset all of the events, not just the group leaders */ + for( i = 0; i < pe_ctl->num_events; i++ ) { + ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL ); + if ( ret == -1 ) { + PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) " + "returned error, Linux says: %s", + pe_ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_ESYS; + } + } + + return PAPI_OK; +} + + +/* write (set) the hardware counters */ +/* Current we do not support this. */ +static int +_peu_write( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long *from ) +{ + ( void ) ctx; /*unused */ + ( void ) ctl; /*unused */ + ( void ) from; /*unused */ + /* + * Counters cannot be written. Do we need to virtualize the + * counters so that they can be written, or perhaps modify code so that + * they can be written? FIXME ? + */ + + return PAPI_ENOSUPP; +} + +/* + * perf_event provides a complicated read interface. + * the info returned by read() varies depending on whether + * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED, + * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set + * + * To simplify things we just always ask for everything. This might + * lead to overhead when reading more than we need, but it makes the + * read code a lot simpler than the original implementation we had here. + * + * For more info on the layout see include/linux/perf_event.h + * + */ + +static int +_peu_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n", ctx, ctl, events, flags); + + ( void ) flags; /*unused */ + int i, ret = -1; + /* pe_context_t *pe_ctx = ( pe_context_t *) ctx; */ + (void) ctx; /*unused*/ + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + long long papi_pe_buffer[READ_BUFFER_SIZE]; + long long tot_time_running, tot_time_enabled, scale; + + /* Handle case where we are multiplexing */ + if (pe_ctl->multiplexed) { + + /* currently we handle multiplexing by having individual events */ + /* so we read from each in turn. */ + + for ( i = 0; i < pe_ctl->num_events; i++ ) { + + ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", strerror( errno )); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + /* We should read 3 64-bit values from the counter */ + if (ret<(signed)(3*sizeof(long long))) { + PAPIERROR("Error! short read!\n"); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[i].event_fd, + (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret); + SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0], + papi_pe_buffer[1],papi_pe_buffer[2]); + + tot_time_enabled = papi_pe_buffer[1]; + tot_time_running = papi_pe_buffer[2]; + + SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * " + "tot_time_enabled %lld) / tot_time_running %lld\n", + i, 0,papi_pe_buffer[0], + tot_time_enabled,tot_time_running); + + if (tot_time_running == tot_time_enabled) { + /* No scaling needed */ + pe_ctl->counts[i] = papi_pe_buffer[0]; + } else if (tot_time_running && tot_time_enabled) { + /* Scale factor of 100 to avoid overflows when computing */ + /*enabled/running */ + + scale = (tot_time_enabled * 100LL) / tot_time_running; + scale = scale * papi_pe_buffer[0]; + scale = scale / 100LL; + pe_ctl->counts[i] = scale; + } else { + /* This should not happen, but Phil reports it sometime does. */ + SUBDBG("perf_event kernel bug(?) count, enabled, " + "running: %lld, %lld, %lld\n", + papi_pe_buffer[0],tot_time_enabled, + tot_time_running); + + pe_ctl->counts[i] = papi_pe_buffer[0]; + } + } + } + + /* Handle cases where we cannot use FORMAT GROUP */ + else if (pe_ctl->inherit) { + + /* we must read each counter individually */ + for ( i = 0; i < pe_ctl->num_events; i++ ) { + + ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", strerror( errno )); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + /* we should read one 64-bit value from each counter */ + if (ret!=sizeof(long long)) { + PAPIERROR("Error! short read!\n"); + PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[i].event_fd, + (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[i].event_fd, (long)pe_ctl->tid, + pe_ctl->events[i].cpu, ret); + SUBDBG("read: %lld\n",papi_pe_buffer[0]); + + pe_ctl->counts[i] = papi_pe_buffer[0]; + } + } + + + /* Handle cases where we are using FORMAT_GROUP */ + /* We assume only one group leader, in position 0 */ + + else { + if (pe_ctl->events[0].group_leader_fd!=-1) { + PAPIERROR("Was expecting group leader!\n"); + } + + ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer, + sizeof ( papi_pe_buffer ) ); + + if ( ret == -1 ) { + PAPIERROR("read returned an error: ", strerror( errno )); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + /* we read 1 64-bit value (number of events) then */ + /* num_events more 64-bit values that hold the counts */ + if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) { + PAPIERROR("Error! short read!\n"); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", + pe_ctl->events[0].event_fd, + (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret); + { + int j; + for(j=0;jnum_events) { + PAPIERROR("Error! Wrong number of events!\n"); + SUBDBG("EXIT: PAPI_ESYS\n"); + return PAPI_ESYS; + } + + /* put the count values in their proper location */ + for(i=0;inum_events;i++) { + pe_ctl->counts[i] = papi_pe_buffer[1+i]; + } + } + + /* point PAPI to the values we read */ + *events = pe_ctl->counts; + + SUBDBG("EXIT: PAPI_OK\n"); + return PAPI_OK; +} + +/* Start counting events */ +static int +_peu_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + int ret; + int i; + int did_something = 0; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* Reset the counters first. Is this necessary? */ + ret = _peu_reset( pe_ctx, pe_ctl ); + if ( ret ) { + return ret; + } + + /* Enable all of the group leaders */ + /* All group leaders have a group_leader_fd of -1 */ + for( i = 0; i < pe_ctl->num_events; i++ ) { + if (pe_ctl->events[i].group_leader_fd == -1) { + SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd); + ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ; + + /* ioctls always return -1 on failure */ + if (ret == -1) { + PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n"); + return PAPI_ESYS; + } + + did_something++; + } + } + + if (!did_something) { + PAPIERROR("Did not enable any counters.\n"); + return PAPI_EBUG; + } + + pe_ctx->state |= PERF_EVENTS_RUNNING; + + return PAPI_OK; + +} + +/* Stop all of the counters */ +static int +_peu_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + int ret; + int i; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = ( pe_control_t *) ctl; + + /* Just disable the group leaders */ + for ( i = 0; i < pe_ctl->num_events; i++ ) { + if ( pe_ctl->events[i].group_leader_fd == -1 ) { + ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL); + if ( ret == -1 ) { + PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) " + "returned error, Linux says: %s", + pe_ctl->events[i].event_fd, strerror( errno ) ); + return PAPI_EBUG; + } + } + } + + pe_ctx->state &= ~PERF_EVENTS_RUNNING; + + return PAPI_OK; +} + +/* Set various options on a control state */ +static int +_peu_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + int ret; + pe_context_t *pe_ctx = ( pe_context_t *) ctx; + pe_control_t *pe_ctl = NULL; + + switch ( code ) { + case PAPI_MULTIPLEX: + pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state ); + + pe_ctl->multiplexed = 1; + ret = _peu_update_control_state( pe_ctl, NULL, + pe_ctl->num_events, pe_ctx ); + if (ret != PAPI_OK) { + pe_ctl->multiplexed = 0; + } + return ret; + + case PAPI_ATTACH: + pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state ); + + pe_ctl->tid = option->attach.tid; + + /* If events have been already been added, something may */ + /* have been done to the kernel, so update */ + ret =_peu_update_control_state( pe_ctl, NULL, + pe_ctl->num_events, pe_ctx); + + return ret; + + case PAPI_DETACH: + pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state ); + + pe_ctl->tid = 0; + return PAPI_OK; + + case PAPI_CPU_ATTACH: + pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state ); + + /* this tells the kernel not to count for a thread */ + /* should we warn if we try to set both? perf_event */ + /* will reject it. */ + pe_ctl->tid = -1; + + pe_ctl->cpu = option->cpu.cpu_num; + + return PAPI_OK; + + case PAPI_DOMAIN: + pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state ); + + /* looks like we are allowed, so set event set level counting domains */ + pe_ctl->domain = option->domain.domain; + return PAPI_OK; + + case PAPI_GRANUL: + pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state ); + + /* FIXME: we really don't support this yet */ + + switch ( option->granularity.granularity ) { + case PAPI_GRN_PROCG: + case PAPI_GRN_SYS_CPU: + case PAPI_GRN_PROC: + return PAPI_ECMP; + + /* Currently we only support thread and CPU granularity */ + case PAPI_GRN_SYS: + pe_ctl->granularity=PAPI_GRN_SYS; + break; + + case PAPI_GRN_THR: + pe_ctl->granularity=PAPI_GRN_THR; + break; + + + default: + return PAPI_EINVAL; + } + return PAPI_OK; + + case PAPI_INHERIT: + pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state ); + + if (option->inherit.inherit) { + /* children will inherit counters */ + pe_ctl->inherit = 1; + } else { + /* children won't inherit counters */ + pe_ctl->inherit = 0; + } + return PAPI_OK; + + case PAPI_DATA_ADDRESS: + return PAPI_ENOSUPP; + + case PAPI_INSTR_ADDRESS: + return PAPI_ENOSUPP; + + case PAPI_DEF_ITIMER: + return PAPI_ENOSUPP; + + case PAPI_DEF_MPX_NS: + return PAPI_ENOSUPP; + + case PAPI_DEF_ITIMER_NS: + return PAPI_ENOSUPP; + + default: + return PAPI_ENOSUPP; + } +} + + +static int +_peu_ntv_enum_events( unsigned int *PapiEventCode, int modifier ) +{ + + if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT; + + + return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier, our_cidx, + &uncore_native_event_table); +} + +static int +_peu_ntv_name_to_code( const char *name, unsigned int *event_code) { + + if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT; + + return _pe_libpfm4_ntv_name_to_code(name,event_code, our_cidx, + &uncore_native_event_table); +} + +static int +_peu_ntv_code_to_name(unsigned int EventCode, + char *ntv_name, int len) { + + if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT; + + return _pe_libpfm4_ntv_code_to_name(EventCode, + ntv_name, len, + &uncore_native_event_table); +} + +static int +_peu_ntv_code_to_descr( unsigned int EventCode, + char *ntv_descr, int len) { + + if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT; + + return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len, + &uncore_native_event_table); +} + +static int +_peu_ntv_code_to_info(unsigned int EventCode, + PAPI_event_info_t *info) { + + if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT; + + return _pe_libpfm4_ntv_code_to_info(EventCode, info, + &uncore_native_event_table); +} + +/* Our component vector */ + +papi_vector_t _perf_event_uncore_vector = { + .cmp_info = { + /* component information (unspecified values initialized to 0) */ + .name = "perf_event_uncore", + .short_name = "peu", + .version = "5.0", + .description = "Linux perf_event CPU uncore and northbridge", + + .default_domain = PAPI_DOM_ALL, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + + .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS, + + /* component specific cmp_info initializations */ + .fast_virtual_timer = 0, + .attach = 1, + .attach_must_ptrace = 1, + .cpu = 1, + .inherit = 1, + .cntr_umasks = 1, + + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( pe_context_t ), + .control_state = sizeof ( pe_control_t ), + .reg_value = sizeof ( int ), + .reg_alloc = sizeof ( int ), + }, + + /* function pointers in this component */ + .init_component = _peu_init_component, + .shutdown_component = _peu_shutdown_component, + .init_thread = _peu_init_thread, + .init_control_state = _peu_init_control_state, + .start = _peu_start, + .stop = _peu_stop, + .read = _peu_read, + .shutdown_thread = _peu_shutdown_thread, + .ctl = _peu_ctl, + .update_control_state = _peu_update_control_state, + .set_domain = _peu_set_domain, + .reset = _peu_reset, + .write = _peu_write, + + /* from counter name mapper */ + .ntv_enum_events = _peu_ntv_enum_events, + .ntv_name_to_code = _peu_ntv_name_to_code, + .ntv_code_to_name = _peu_ntv_code_to_name, + .ntv_code_to_descr = _peu_ntv_code_to_descr, + .ntv_code_to_info = _peu_ntv_code_to_info, +}; + + diff --git a/src/components/perf_event_uncore/tests/Makefile b/src/components/perf_event_uncore/tests/Makefile new file mode 100644 index 0000000..3ee8fc2 --- /dev/null +++ b/src/components/perf_event_uncore/tests/Makefile @@ -0,0 +1,38 @@ +NAME=perf_event_uncore +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = perf_event_uncore perf_event_uncore_attach perf_event_uncore_multiple \ + perf_event_amd_northbridge perf_event_uncore_cbox + +DOLOOPS= $(testlibdir)/do_loops.o + +perf_event_uncore_tests: $(TESTS) + + +perf_event_uncore_lib.o: perf_event_uncore_lib.c perf_event_uncore_lib.h + $(CC) $(CFLAGS) $(INCLUDE) -c perf_event_uncore_lib.c + + +perf_event_amd_northbridge: perf_event_amd_northbridge.o $(DOLOOPS) $(UTILOBJS) $(PAPILIB) $(DOLOOPS) + $(CC) $(LFLAGS) -o perf_event_amd_northbridge perf_event_amd_northbridge.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + +perf_event_uncore: perf_event_uncore.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o + $(CC) $(LFLAGS) -o perf_event_uncore perf_event_uncore.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + +perf_event_uncore_attach: perf_event_uncore_attach.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) perf_event_uncore_lib.o + $(CC) $(LFLAGS) -o perf_event_uncore_attach perf_event_uncore_attach.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + +perf_event_uncore_multiple: perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) + $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_multiple perf_event_uncore_multiple.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + +perf_event_uncore_cbox: perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) + $(CC) $(LFLAGS) $(INCLUDE) -o perf_event_uncore_cbox perf_event_uncore_cbox.o perf_event_uncore_lib.o $(UTILOBJS) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) + + + +clean: + rm -f $(TESTS) *.o *~ + diff --git a/src/components/perf_event_uncore/tests/perf_event_amd_northbridge.c b/src/components/perf_event_uncore/tests/perf_event_amd_northbridge.c new file mode 100644 index 0000000..8af4ad7 --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_amd_northbridge.c @@ -0,0 +1,180 @@ +/* + * This file tests uncore events on AMD fam15h Northbridge machines + * The Linux perf_event developers introduced fam15h Northbridge + * support in Linux 3.9 with an interfae similar to fam10h + * where the events were part of the core CPU + * They broke the ABI with Linux 3.10 and made fam15h NB a separate + * PMU, like the Intel uncore support. + */ + +#include +#include + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int main( int argc, char **argv ) { + + int retval; + int EventSet = PAPI_NULL; + long long values[1]; + char event_name[BUFSIZ]; + int uncore_cidx=-1; + const PAPI_hw_info_t *hwinfo; + int quiet; + struct utsname uname_info; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + uname(&uname_info); + if (!quiet) printf("Found Linux %s\n",uname_info.release); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Check for AMD machine */ + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); + } + + if (hwinfo->vendor != PAPI_VENDOR_AMD) { + if (!quiet) printf("Test only for AMD machines\n"); + test_skip(__FILE__,__LINE__,"Test only for AMD processor",0); + } + + if ( hwinfo->cpuid_family != 21) { + if (!quiet) printf("Test only for fam15h AMD machines\n"); + test_skip(__FILE__,__LINE__,"Test only for fam15h AMD processor",0); + } + + if (!strcmp(uname_info.release,"3.9")) { + + if (!quiet) printf("Detected 3.9 kernel, using perf_event\n"); + + /* For kernel 3.9 use regular CPU component */ + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event"); + if (uncore_cidx<0) { + test_skip(__FILE__,__LINE__,"perf_event component not found",0); + } + + /* Get a relevant event name */ + strncpy(event_name,"DRAM_ACCESSES:ALL", BUFSIZ); + } + else { + + /* 3.10 and later */ + + if (!quiet) { + printf("Detected > 3.9 kernel, using perf_event_uncore\n"); + } + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event_uncore"); + if (uncore_cidx<0) { + test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); + } + + /* Get a relevant event name */ + /* This might change once libpfm4 gets new fam15h NB support */ + strncpy(event_name,"DRAM_ACCESSES:ALL", BUFSIZ); + } + + /* Create an eventset */ + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Set a component for the EventSet */ + retval = PAPI_assign_eventset_component(EventSet, uncore_cidx); + + /* we need to set to a certain cpu for uncore to work */ + + PAPI_cpu_option_t cpu_opt; + + cpu_opt.eventset=EventSet; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + test_skip( __FILE__, __LINE__, + "this test; trying to PAPI_CPU_ATTACH; need to run as root", + retval); + } + + /* we need to set the granularity to system-wide for uncore to work */ + + PAPI_granularity_option_t gran_opt; + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_GRN_SYS", + retval); + } + + /* we need to set domain to be as inclusive as possible */ + + PAPI_domain_option_t domain_opt; + + domain_opt.def_cidx=0; + domain_opt.eventset=EventSet; + domain_opt.domain=PAPI_DOM_ALL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + + /* Add our uncore event */ + retval = PAPI_add_named_event(EventSet, event_name); + if (retval != PAPI_OK) { + if ( !quiet ) { + fprintf(stderr,"Error trying to use event %s\n", event_name); + } + test_fail(__FILE__, __LINE__, "adding uncore event",retval); + } + + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our work code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("AMD fam15h Northbridge test:\n"); + printf("Using event %s\n",event_name); + printf("\t%s: %lld\n",event_name,values[0]); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event_uncore/tests/perf_event_uncore.c b/src/components/perf_event_uncore/tests/perf_event_uncore.c new file mode 100644 index 0000000..b055b8f --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_uncore.c @@ -0,0 +1,109 @@ +/* + * This file tests uncore events on perf_event kernels + * + * In this test we use the :cpu=0 way of attaching to the CPU + * rather than the legacy PAPI way. + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "perf_event_uncore_lib.h" + +int main( int argc, char **argv ) { + + int retval,quiet; + int EventSet = PAPI_NULL; + long long values[1]; + char *uncore_event=NULL; + char event_name[BUFSIZ]; + int uncore_cidx=-1; + const PAPI_component_info_t *info; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) { + printf("Testing the :cpu=0 way of attaching an uncore event to a core\n"); + } + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event_uncore"); + if (uncore_cidx<0) { + if (!quiet) { + printf("perf_event_uncore component not found\n"); + } + test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); + } + + /* Check if component disabled */ + info=PAPI_get_component_info(uncore_cidx); + if (info->disabled) { + if (!quiet) { + printf("perf_event_uncore component is disabled\n"); + } + test_skip(__FILE__,__LINE__,"uncore component disabled",0); + } + + /* Get a relevant event name */ + uncore_event=get_uncore_event(event_name, BUFSIZ); + if (uncore_event==NULL) { + if (!quiet) { + printf("uncore event name not available\n"); + } + test_skip( __FILE__, __LINE__, + "PAPI does not support uncore on this processor", + PAPI_ENOSUPP ); + } + + sprintf(uncore_event,"%s:cpu=0",uncore_event); + + /* Create an eventset */ + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Add our uncore event */ + retval = PAPI_add_named_event(EventSet, uncore_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to use event %s\n", uncore_event); + } + test_fail(__FILE__, __LINE__, "adding uncore event",retval); + } + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our work code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\tUsing event %s\n",uncore_event); + printf("\t%s: %lld\n",uncore_event,values[0]); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event_uncore/tests/perf_event_uncore_attach.c b/src/components/perf_event_uncore/tests/perf_event_uncore_attach.c new file mode 100644 index 0000000..51999bf --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_uncore_attach.c @@ -0,0 +1,160 @@ +/* + * This file tests uncore events on perf_event kernels + * + * It uses the older PAPI_set_opt() way of specifying the CPU/granularity + * rather than the new :cpu=0 method + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "perf_event_uncore_lib.h" + +int main( int argc, char **argv ) { + + int retval,quiet; + int EventSet = PAPI_NULL; + long long values[1]; + char *uncore_event=NULL; + char event_name[BUFSIZ]; + int uncore_cidx=-1; + const PAPI_component_info_t *info; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) { + printf("Testing creating an uncore event using PAPI_set_opt() to specify CPU\n"); + } + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event_uncore"); + if (uncore_cidx<0) { + if (!quiet) { + printf("perf_event_uncore component not found\n"); + } + test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); + } + + /* Check if component disabled */ + info=PAPI_get_component_info(uncore_cidx); + if (info->disabled) { + if (!quiet) { + printf("perf_event_uncore component is disabled\n"); + } + test_skip(__FILE__,__LINE__,"uncore component disabled",0); + } + + /* Get a relevant event name */ + uncore_event=get_uncore_event(event_name, BUFSIZ); + if (uncore_event==NULL) { + if (!quiet) { + printf("uncore event name not available\n"); + } + test_skip( __FILE__, __LINE__, + "PAPI does not support uncore on this processor", + PAPI_ENOSUPP ); + } + + /* Create an eventset */ + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Set a component for the EventSet */ + retval = PAPI_assign_eventset_component(EventSet, uncore_cidx); + + /* we need to set to a certain cpu for uncore to work */ + + PAPI_cpu_option_t cpu_opt; + + cpu_opt.eventset=EventSet; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Could not cpu attach\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to PAPI_CPU_ATTACH; need to run as root", + retval); + } + + /* we need to set the granularity to system-wide for uncore to work */ + + PAPI_granularity_option_t gran_opt; + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_GRN_SYS", + retval); + } + + /* we need to set domain to be as inclusive as possible */ + + PAPI_domain_option_t domain_opt; + + domain_opt.def_cidx=0; + domain_opt.eventset=EventSet; + domain_opt.domain=PAPI_DOM_ALL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("could not set PAPI_DOM_ALL\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + + /* Add our uncore event */ + retval = PAPI_add_named_event(EventSet, uncore_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to use event %s\n", uncore_event); + } + test_fail(__FILE__, __LINE__, "adding uncore event",retval); + } + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our work code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("\tUsing event %s\n",uncore_event); + printf("\t%s: %lld\n",uncore_event,values[0]); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c b/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c new file mode 100644 index 0000000..9544edc --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_uncore_cbox.c @@ -0,0 +1,189 @@ +/* + * This file tests cbox uncore events on Intel Processors + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "perf_event_uncore_lib.h" + +#define EVENTS_TO_TRY 16 +#define MAX_PACKAGES 4 + +int main( int argc, char **argv ) { + + int retval,i,j,quiet; + int EventSet[EVENTS_TO_TRY][MAX_PACKAGES]; + long long values[EVENTS_TO_TRY][MAX_PACKAGES]; + char event_name[BUFSIZ]; + char uncore_base[BUFSIZ]; + char uncore_event[BUFSIZ]; + int uncore_cidx=-1; + int max_cbox=0; + int core_to_use=0; + char *result; + + const PAPI_hw_info_t *hwinfo; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event_uncore"); + if (uncore_cidx<0) { + if (!quiet) { + printf("perf_event_uncore component not found\n"); + } + test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); + } + + /* Get hardware info */ + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + test_fail(__FILE__,__LINE__,"PAPI_get_hardware_info()",retval); + } + + /* Get event to use */ + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + result=get_uncore_cbox_event(event_name,uncore_base,BUFSIZ); + + if (result==NULL) { + if (!quiet) { + printf("No event available\n"); + } + test_skip( __FILE__, __LINE__, + "No event available", PAPI_ENOSUPP ); + } + } + else { + if (!quiet) { + printf("We only support Intel for now\n"); + } + test_skip( __FILE__, __LINE__, + "This test only supported Intel chips", PAPI_ENOSUPP ); + } + + if (!quiet) { + printf("Trying for %d sockets\n",hwinfo->sockets); + printf("threads %d cores %d ncpus %d\n", hwinfo->threads,hwinfo->cores, + hwinfo->ncpu); + } + + for(i=0;i < hwinfo->sockets; i++) { + + /* perf_event provides which to use in "cpumask" */ + /* but libpfm4 doesn't report this back to us (yet) */ + core_to_use=i*hwinfo->threads*hwinfo->cores; + if (!quiet) { + printf("Using core %d for socket %d\n",core_to_use,i); + } + + for(j=0;jsockets; i++) { + + for(j=0;jsockets; i++) { + for(j=0;jsockets; i++) { + printf("Socket %d\n",i); + for(j=0;j +#include +#include + +#include "papi.h" + +char *get_uncore_event(char *event, int size) { + + const PAPI_hw_info_t *hwinfo; + + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + return NULL; + } + + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + + if ( hwinfo->cpuid_family == 6) { + + switch(hwinfo->cpuid_model) { + + case 26: + case 30: + case 31: /* Nehalem */ + case 46: /* Nehalem EX */ + strncpy(event,"nhm_unc::UNC_CLK_UNHALTED",size); + return event; + break; + + case 37: + case 44: /* Westmere */ + case 47: /* Westmere EX */ + strncpy(event,"wsm_unc::UNC_CLK_UNHALTED",size); + return event; + break; + + case 42: /* SandyBridge */ + strncpy(event,"snb_unc_cbo0::UNC_CLOCKTICKS",size); + return event; + break; + + case 58: /* IvyBridge */ + strncpy(event,"ivb_unc_cbo0::UNC_CLOCKTICKS",size); + return event; + break; + + case 62: /* Ivy Trail */ + case 45: /* SandyBridge EP */ + strncpy(event,"snbep_unc_imc0::UNC_M_CLOCKTICKS",size); + return event; + break; + + case 60: + case 70: + case 69: /* Haswell: note libpfm4 has no haswell unc support */ + return NULL; + break; + + case 63: /*haswell EP*/ + strncpy(event,"hswep_unc_cbo0::UNC_C_CLOCKTICKS",size); + return event; + break; + + case 61: + case 71: + case 86: /* Broadwell: note libpfm4 has no broadwell unc support */ + return NULL; + break; + + case 79: /* Broadwell-EP */ + strncpy(event,"bdx_unc_cbo0::UNC_C_CLOCKTICKS",size); + return event; + break; + + case 78: + case 94: /* Skylake: note libpfm4 has no skylake unc support */ + return NULL; + break; + + case 85: /* Skylake-X */ + /* note libpfm4 has no skylake-x unc support */ + return NULL; + break; + + case 87: /*Knights Landing*/ + strncpy(event,"knl_unc_imc0::UNC_M_D_CLOCKTICKS",size); + return event; + break; + } + } + return NULL; + } + else if (hwinfo->vendor == PAPI_VENDOR_AMD) { + if ( hwinfo->cpuid_family == 21) { + /* For kernel 3.9 at least */ + strncpy(event,"DRAM_ACCESSES:ALL",size); + return event; + } + return NULL; + } + + return NULL; +} + + +char *get_uncore_cbox_event(char *event_name, char *uncore_base, int size) { + + const PAPI_hw_info_t *hwinfo; + + hwinfo = PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + return NULL; + } + + if (hwinfo->vendor == PAPI_VENDOR_INTEL) { + + if ( hwinfo->cpuid_family == 6) { + + switch(hwinfo->cpuid_model) { + + case 26: + case 30: + case 31: /* Nehalem */ + case 46: /* Nehalem EX */ + /* No CBOX event? */ + return NULL; + break; + + case 37: + case 44: /* Westmere */ + case 47: /* Westmere EX */ + /* No CBOX event? */ + return NULL; + break; + + case 42: /* SandyBridge */ + strncpy(event_name,"UNC_CBO_CACHE_LOOKUP:STATE_I:ANY_FILTER",size); + strncpy(uncore_base,"snb_unc_cbo",size); + return event_name; + break; + + case 58: /* IvyBridge */ + strncpy(event_name,"UNC_CBO_CACHE_LOOKUP:STATE_I:ANY_FILTER",size); + strncpy(uncore_base,"ivb_unc_cbo",BUFSIZ); + return event_name; + break; + + case 62: /* Ivy Trail */ + case 45: /* SandyBridge EP */ + strncpy(event_name,"UNC_C_TOR_OCCUPANCY:ALL",size); + strncpy(uncore_base,"snbep_unc_cbo",size); + return event_name; + break; + + case 60: + case 70: + case 69: /* Haswell: note libpfm4 has no haswell unc support */ + return NULL; + break; + + case 63: /*haswell EP*/ + strncpy(event_name,"UNC_C_COUNTER0_OCCUPANCY",size); + strncpy(uncore_base,"hswep_unc_cbo",size); + return event_name; + break; + + case 61: + case 71: + case 86: /* Broadwell: note libpfm4 has no broadwell unc support */ + return NULL; + break; + + case 79: /* Broadwell-EP */ + strncpy(event_name,"UNC_C_COUNTER0_OCCUPANCY",size); + strncpy(uncore_base,"bdx_unc_cbo",size); + return event_name; + break; + + case 78: + case 94: /* Skylake: note libpfm4 has no skylake unc support */ + return NULL; + break; + + case 85: /* Skylake-X */ + /* note libpfm4 has no skylake-x unc support */ + return NULL; + break; + + case 87: /*Knights Landing*/ + strncpy(event_name,"UNC_M_D_CLOCKTICKS",size); + strncpy(uncore_base,"knl_unc_imc",size); + return event_name; + break; + } + } + return NULL; + } + + return NULL; +} diff --git a/src/components/perf_event_uncore/tests/perf_event_uncore_lib.h b/src/components/perf_event_uncore/tests/perf_event_uncore_lib.h new file mode 100644 index 0000000..a7f136e --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_uncore_lib.h @@ -0,0 +1,2 @@ +char *get_uncore_event(char *event, int size); +char *get_uncore_cbox_event(char *event_name, char *uncore_base, int size); diff --git a/src/components/perf_event_uncore/tests/perf_event_uncore_multiple.c b/src/components/perf_event_uncore/tests/perf_event_uncore_multiple.c new file mode 100644 index 0000000..d38e579 --- /dev/null +++ b/src/components/perf_event_uncore/tests/perf_event_uncore_multiple.c @@ -0,0 +1,192 @@ +/* + * This file tests measuring uncore and non-uncore events at the same time + * + * Despite perf_event supporting this, PAPI had to do this with + * separate event sets on separate components. + * + * PAPI does not allow two eventsets to be running simultaneously + * on the same component, nor does it allow events in the same + * event set to have different domains/granularities. + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "perf_event_uncore_lib.h" + +int main( int argc, char **argv ) { + + int retval,quiet; + int EventSet = PAPI_NULL; + int EventSet2 = PAPI_NULL; + long long values[1],values2[1]; + char *uncore_event=NULL; + char event_name[BUFSIZ]; + int uncore_cidx=-1; + const PAPI_component_info_t *info; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Find the uncore PMU */ + uncore_cidx=PAPI_get_component_index("perf_event_uncore"); + if (uncore_cidx<0) { + if (!quiet) { + printf("perf_event_uncore component not found\n"); + } + test_skip(__FILE__,__LINE__,"perf_event_uncore component not found",0); + } + + /* Check if component disabled */ + info=PAPI_get_component_info(uncore_cidx); + if (info->disabled) { + if (!quiet) { + printf("perf_event_uncore component disabled\n"); + } + test_skip(__FILE__,__LINE__,"uncore component disabled",0); + } + + /* Get a relevant event name */ + uncore_event=get_uncore_event(event_name, BUFSIZ); + if (uncore_event==NULL) { + if (!quiet) { + printf("Could not find an uncore event for this processor\n"); + } + test_skip( __FILE__, __LINE__, + "PAPI does not support uncore on this processor", + PAPI_ENOSUPP ); + } + + /* Create an eventset */ + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Create another eventset */ + retval = PAPI_create_eventset(&EventSet2); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset",retval); + } + + /* Set a component for the EventSet */ + retval = PAPI_assign_eventset_component(EventSet, uncore_cidx); + + /* we need to set to a certain cpu for uncore to work */ + + PAPI_cpu_option_t cpu_opt; + + cpu_opt.eventset=EventSet; + cpu_opt.cpu_num=0; + + retval = PAPI_set_opt(PAPI_CPU_ATTACH,(PAPI_option_t*)&cpu_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Could not PAPI_CPU_ATTACH\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to PAPI_CPU_ATTACH; need to run as root", + retval); + } + + /* we need to set the granularity to system-wide for uncore to work */ + + PAPI_granularity_option_t gran_opt; + + gran_opt.def_cidx=0; + gran_opt.eventset=EventSet; + gran_opt.granularity=PAPI_GRN_SYS; + + retval = PAPI_set_opt(PAPI_GRANUL,(PAPI_option_t*)&gran_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Could not set PAPI_GRN_SYS\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_GRN_SYS", + retval); + } + + /* we need to set domain to be as inclusive as possible */ + + PAPI_domain_option_t domain_opt; + + domain_opt.def_cidx=0; + domain_opt.eventset=EventSet; + domain_opt.domain=PAPI_DOM_ALL; + + retval = PAPI_set_opt(PAPI_DOMAIN,(PAPI_option_t*)&domain_opt); + if (retval != PAPI_OK) { + if (!quiet) { + printf("Could not set PAPI_DOM_ALL\n"); + } + test_skip( __FILE__, __LINE__, + "this test; trying to set PAPI_DOM_ALL; need to run as root", + retval); + } + + /* Add our uncore event */ + retval = PAPI_add_named_event(EventSet, uncore_event); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to use event %s\n", uncore_event); + } + test_fail(__FILE__, __LINE__, "adding uncore event ",retval); + } + + /* Add PAPI_TOT_CYC */ + retval = PAPI_add_named_event(EventSet2, "PAPI_TOT_CYC"); + if (retval != PAPI_OK) { + if ( !quiet ) { + printf("Error trying to add PAPI_TOT_CYC\n"); + } + test_fail(__FILE__, __LINE__, "adding PAPI_TOT_CYC ",retval); + } + + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our work code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + retval = PAPI_stop( EventSet2, values2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf("Uncore and regular event test:\n"); + printf("Using uncore event %s\n",uncore_event); + printf("\t%s: %lld\n",uncore_event,values[0]); + printf("\t%s: %lld\n","PAPI_TOT_CYC",values2[0]); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/perfctr/Rules.perfctr b/src/components/perfctr/Rules.perfctr new file mode 100644 index 0000000..debc4a1 --- /dev/null +++ b/src/components/perfctr/Rules.perfctr @@ -0,0 +1,9 @@ + +COMPSRCS += components/perfctr/perfctr.c components/perfctr/perfctr-x86.c +COMPOBJS += perfctr.o perfctr-x86.o + +perfctr.o: components/perfctr/perfctr.c components/perfctr/perfctr-x86.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr/perfctr.c -o perfctr.o + +perfctr-x86.o: components/perfctr/perfctr-x86.c components/perfctr/perfctr-x86.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr/perfctr-x86.c -o perfctr-x86.o diff --git a/src/components/perfctr/perfctr-x86.c b/src/components/perfctr/perfctr-x86.c new file mode 100644 index 0000000..9089560 --- /dev/null +++ b/src/components/perfctr/perfctr-x86.c @@ -0,0 +1,1219 @@ +/* +* File: perfctr-x86.c +* Author: Brian Sheely +* bsheely@eecs.utk.edu +* Mods: +* +*/ + +#include +#include + +#include "papi.h" +#include "papi_memory.h" +#include "papi_internal.h" +#include "perfctr-x86.h" +#include "perfmon/pfmlib.h" +#include "extras.h" +#include "papi_vector.h" +#include "papi_libpfm_events.h" + +#include "papi_preset.h" +#include "linux-memory.h" + +/* Contains source for the Modified Bipartite Allocation scheme */ +#include "papi_bipartite.h" + +/* Prototypes for entry points found in perfctr.c */ +extern int _perfctr_init_component( int ); +extern int _perfctr_ctl( hwd_context_t * ctx, int code, + _papi_int_option_t * option ); +extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si, + void *context ); + +extern int _perfctr_init_thread( hwd_context_t * ctx ); +extern int _perfctr_shutdown_thread( hwd_context_t * ctx ); + +#include "linux-common.h" +#include "linux-timer.h" + +extern papi_mdi_t _papi_hwi_system_info; + +extern papi_vector_t _perfctr_vector; + +#if defined(PERFCTR26) +#define evntsel_aux p4.escr +#endif + +#if defined(PAPI_PENTIUM4_VEC_MMX) +#define P4_VEC "MMX" +#else +#define P4_VEC "SSE" +#endif + +#if defined(PAPI_PENTIUM4_FP_X87) +#define P4_FPU " X87" +#elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP) +#define P4_FPU " X87 SSE_SP" +#elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP) +#define P4_FPU " SSE_SP SSE_DP" +#else +#define P4_FPU " X87 SSE_DP" +#endif + +/* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */ +#if defined(PAPI_OPTERON_FP_RETIRED) +#define AMD_FPU "RETIRED" +#elif defined(PAPI_OPTERON_FP_SSE_SP) +#define AMD_FPU "SSE_SP" +#elif defined(PAPI_OPTERON_FP_SSE_DP) +#define AMD_FPU "SSE_DP" +#else +#define AMD_FPU "SPECULATIVE" +#endif + +static inline int is_pentium4(void) { + if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) && + ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) { + return 1; + } + + return 0; + +} + +#ifdef DEBUG +static void +print_alloc( X86_reg_alloc_t * a ) +{ + SUBDBG( "X86_reg_alloc:\n" ); + SUBDBG( " selector: %#x\n", a->ra_selector ); + SUBDBG( " rank: %#x\n", a->ra_rank ); + SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] ); +} + +void +print_control( const struct perfctr_cpu_control *control ) +{ + unsigned int i; + SUBDBG( "Control used:\n" ); + SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on ); + SUBDBG( "nractrs\t\t\t%u\n", control->nractrs ); + SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs ); + + for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) { + if ( control->pmc_map[i] >= 18 ) { + SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] ); + } else { + SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] ); + } + SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] ); + if ( control->ireset[i] ) { + SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] ); + } + } +} +#endif + +static int +_x86_init_control_state( hwd_control_state_t *ptr ) +{ + int i, def_mode = 0; + + if ( is_pentium4() ) { + if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) + def_mode |= ESCR_T0_USR; + if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) + def_mode |= ESCR_T0_OS; + + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + ptr->control.cpu_control.evntsel_aux[i] |= def_mode; + } + ptr->control.cpu_control.tsc_on = 1; + ptr->control.cpu_control.nractrs = 0; + ptr->control.cpu_control.nrictrs = 0; + +#ifdef VPERFCTR_CONTROL_CLOEXEC + ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; + SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); +#endif + } else { + + if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) + def_mode |= PERF_USR; + if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) + def_mode |= PERF_OS; + + ptr->allocated_registers.selector = 0; + switch ( _papi_hwi_system_info.hw_info.model ) { + case PERFCTR_X86_GENERIC: + case PERFCTR_X86_WINCHIP_C6: + case PERFCTR_X86_WINCHIP_2: + case PERFCTR_X86_VIA_C3: + case PERFCTR_X86_INTEL_P5: + case PERFCTR_X86_INTEL_P5MMX: + case PERFCTR_X86_INTEL_PII: + case PERFCTR_X86_INTEL_P6: + case PERFCTR_X86_INTEL_PIII: +#ifdef PERFCTR_X86_INTEL_CORE + case PERFCTR_X86_INTEL_CORE: +#endif +#ifdef PERFCTR_X86_INTEL_PENTM + case PERFCTR_X86_INTEL_PENTM: +#endif + ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE; + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + ptr->control.cpu_control.evntsel[i] |= def_mode; + ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; + } + break; +#ifdef PERFCTR_X86_INTEL_CORE2 + case PERFCTR_X86_INTEL_CORE2: +#endif +#ifdef PERFCTR_X86_INTEL_ATOM + case PERFCTR_X86_INTEL_ATOM: +#endif +#ifdef PERFCTR_X86_INTEL_NHLM + case PERFCTR_X86_INTEL_NHLM: +#endif +#ifdef PERFCTR_X86_INTEL_WSTMR + case PERFCTR_X86_INTEL_WSTMR: +#endif +#ifdef PERFCTR_X86_AMD_K8 + case PERFCTR_X86_AMD_K8: +#endif +#ifdef PERFCTR_X86_AMD_K8C + case PERFCTR_X86_AMD_K8C: +#endif +#ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */ + case PERFCTR_X86_AMD_FAM10H: +#endif + case PERFCTR_X86_AMD_K7: + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode; + ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; + } + break; + } +#ifdef VPERFCTR_CONTROL_CLOEXEC + ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; + SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); +#endif + + /* Make sure the TSC is always on */ + ptr->control.cpu_control.tsc_on = 1; + } + return ( PAPI_OK ); +} + +int +_x86_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + int i, did = 0; + int num_cntrs = _perfctr_vector.cmp_info.num_cntrs; + + /* Clear the current domain set for this event set */ + /* We don't touch the Enable bit in this code */ + if ( is_pentium4() ) { + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel_aux[i] &= + ~( ESCR_T0_OS | ESCR_T0_USR ); + } + + if ( domain & PAPI_DOM_USER ) { + did = 1; + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR; + } + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS; + } + } + } else { + for ( i = 0; i < num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR ); + } + + if ( domain & PAPI_DOM_USER ) { + did = 1; + for ( i = 0; i < num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel[i] |= PERF_USR; + } + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + for ( i = 0; i < num_cntrs; i++ ) { + cntrl->control.cpu_control.evntsel[i] |= PERF_OS; + } + } + } + + if ( !did ) + return ( PAPI_EINVAL ); + else + return ( PAPI_OK ); +} + +/* This function examines the event to determine + if it can be mapped to counter ctr. + Returns true if it can, false if it can't. */ +static int +_bpt_map_avail( hwd_reg_alloc_t * dst, int ctr ) +{ + return ( int ) ( dst->ra_selector & ( 1 << ctr ) ); +} + +/* This function forces the event to + be mapped to only counter ctr. + Returns nothing. */ +static void +_bpt_map_set( hwd_reg_alloc_t * dst, int ctr ) +{ + dst->ra_selector = ( unsigned int ) ( 1 << ctr ); + dst->ra_rank = 1; + + if ( is_pentium4() ) { + /* Pentium 4 requires that both an escr and a counter are selected. + Find which counter mask contains this counter. + Set the opposite escr to empty (-1) */ + if ( dst->ra_bits.counter[0] & dst->ra_selector ) + dst->ra_escr[1] = -1; + else + dst->ra_escr[0] = -1; + } +} + +/* This function examines the event to determine + if it has a single exclusive mapping. + Returns true if exlusive, false if non-exclusive. */ +static int +_bpt_map_exclusive( hwd_reg_alloc_t * dst ) +{ + return ( dst->ra_rank == 1 ); +} + +/* This function compares the dst and src events + to determine if any resources are shared. Typically the src event + is exclusive, so this detects a conflict if true. + Returns true if conflict, false if no conflict. */ +static int +_bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) +{ + if ( is_pentium4() ) { + int retval1, retval2; + /* Pentium 4 needs to check for conflict of both counters and esc registers */ + /* selectors must share bits */ + retval1 = ( ( dst->ra_selector & src->ra_selector ) || + /* or escrs must equal each other and not be set to -1 */ + ( ( dst->ra_escr[0] == src->ra_escr[0] ) && + ( ( int ) dst->ra_escr[0] != -1 ) ) || + ( ( dst->ra_escr[1] == src->ra_escr[1] ) && + ( ( int ) dst->ra_escr[1] != -1 ) ) ); + /* Pentium 4 also needs to check for conflict on pebs registers */ + /* pebs enables must both be non-zero */ + retval2 = + ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && + /* and not equal to each other */ + ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || + /* same for pebs_matrix_vert */ + ( ( dst->ra_bits.pebs_matrix_vert && + src->ra_bits.pebs_matrix_vert ) && + ( dst->ra_bits.pebs_matrix_vert != + src->ra_bits.pebs_matrix_vert ) ) ); + if ( retval2 ) { + SUBDBG( "pebs conflict!\n" ); + } + return ( retval1 | retval2 ); + } + + return ( int ) ( dst->ra_selector & src->ra_selector ); +} + +/* This function removes shared resources available to the src event + from the resources available to the dst event, + and reduces the rank of the dst event accordingly. Typically, + the src event will be exclusive, but the code shouldn't assume it. + Returns nothing. */ +static void +_bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) +{ + int i; + unsigned shared; + + if ( is_pentium4() ) { +#ifdef DEBUG + SUBDBG( "src, dst\n" ); + print_alloc( src ); + print_alloc( dst ); +#endif + + /* check for a pebs conflict */ + /* pebs enables must both be non-zero */ + i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && + /* and not equal to each other */ + ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || + /* same for pebs_matrix_vert */ + ( ( dst->ra_bits.pebs_matrix_vert && + src->ra_bits.pebs_matrix_vert ) + && ( dst->ra_bits.pebs_matrix_vert != + src->ra_bits.pebs_matrix_vert ) ) ); + if ( i ) { + SUBDBG( "pebs conflict! clearing selector\n" ); + dst->ra_selector = 0; + return; + } else { + /* remove counters referenced by any shared escrs */ + if ( ( dst->ra_escr[0] == src->ra_escr[0] ) && + ( ( int ) dst->ra_escr[0] != -1 ) ) { + dst->ra_selector &= ~dst->ra_bits.counter[0]; + dst->ra_escr[0] = -1; + } + if ( ( dst->ra_escr[1] == src->ra_escr[1] ) && + ( ( int ) dst->ra_escr[1] != -1 ) ) { + dst->ra_selector &= ~dst->ra_bits.counter[1]; + dst->ra_escr[1] = -1; + } + + /* remove any remaining shared counters */ + shared = ( dst->ra_selector & src->ra_selector ); + if ( shared ) + dst->ra_selector ^= shared; + } + /* recompute rank */ + for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) + if ( dst->ra_selector & ( 1 << i ) ) + dst->ra_rank++; +#ifdef DEBUG + SUBDBG( "new dst\n" ); + print_alloc( dst ); +#endif + } else { + shared = dst->ra_selector & src->ra_selector; + if ( shared ) + dst->ra_selector ^= shared; + for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) + if ( dst->ra_selector & ( 1 << i ) ) + dst->ra_rank++; + } +} + +static void +_bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) +{ + dst->ra_selector = src->ra_selector; + + if ( is_pentium4() ) { + dst->ra_escr[0] = src->ra_escr[0]; + dst->ra_escr[1] = src->ra_escr[1]; + } +} + +/* Register allocation */ +static int +_x86_allocate_registers( EventSetInfo_t * ESI ) +{ + int i, j, natNum; + hwd_reg_alloc_t event_list[MAX_COUNTERS]; + hwd_register_t *ptr; + + /* Initialize the local structure needed + for counter allocation and optimization. */ + natNum = ESI->NativeCount; + + if ( is_pentium4() ) { + SUBDBG( "native event count: %d\n", natNum ); + } + + for ( i = 0; i < natNum; i++ ) { + /* retrieve the mapping information about this native event */ + _papi_libpfm_ntv_code_to_bits_perfctr( ( unsigned int ) ESI->NativeInfoArray[i]. + ni_event, &event_list[i].ra_bits ); + + if ( is_pentium4() ) { + /* combine counter bit masks for both esc registers into selector */ + event_list[i].ra_selector = + event_list[i].ra_bits.counter[0] | event_list[i].ra_bits. + counter[1]; + } else { + /* make sure register allocator only looks at legal registers */ + event_list[i].ra_selector = + event_list[i].ra_bits.selector & ALLCNTRS; +#ifdef PERFCTR_X86_INTEL_CORE2 + if ( _papi_hwi_system_info.hw_info.model == + PERFCTR_X86_INTEL_CORE2 ) + event_list[i].ra_selector |= + ( ( event_list[i].ra_bits. + selector >> 16 ) << 2 ) & ALLCNTRS; +#endif + } + /* calculate native event rank, which is no. of counters it can live on */ + event_list[i].ra_rank = 0; + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( event_list[i].ra_selector & ( 1 << j ) ) { + event_list[i].ra_rank++; + } + } + + if ( is_pentium4() ) { + event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0]; + event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1]; +#ifdef DEBUG + SUBDBG( "i: %d\n", i ); + print_alloc( &event_list[i] ); +#endif + } + } + if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */ + for ( i = 0; i < natNum; i++ ) { +#ifdef PERFCTR_X86_INTEL_CORE2 + if ( _papi_hwi_system_info.hw_info.model == + PERFCTR_X86_INTEL_CORE2 ) + event_list[i].ra_bits.selector = event_list[i].ra_selector; +#endif +#ifdef DEBUG + if ( is_pentium4() ) { + SUBDBG( "i: %d\n", i ); + print_alloc( &event_list[i] ); + } +#endif + /* Copy all info about this native event to the NativeInfo struct */ + ptr = ESI->NativeInfoArray[i].ni_bits; + *ptr = event_list[i].ra_bits; + + if ( is_pentium4() ) { + /* The selector contains the counter bit position. Turn it into a number + and store it in the first counter value, zeroing the second. */ + ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1; + ptr->counter[1] = 0; + } + + /* Array order on perfctr is event ADD order, not counter #... */ + ESI->NativeInfoArray[i].ni_position = i; + } + return PAPI_OK; + } else + return PAPI_ECNFLCT; +} + +static void +clear_cs_events( hwd_control_state_t * this_state ) +{ + unsigned int i, j; + + /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */ + j = this_state->control.cpu_control.nractrs + + this_state->control.cpu_control.nrictrs; + + /* Remove all counter control command values from eventset. */ + for ( i = 0; i < j; i++ ) { + SUBDBG( "Clearing pmc event entry %d\n", i ); + if ( is_pentium4() ) { + this_state->control.cpu_control.pmc_map[i] = 0; + this_state->control.cpu_control.evntsel[i] = 0; + this_state->control.cpu_control.evntsel_aux[i] = + this_state->control.cpu_control. + evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR ); + } else { + this_state->control.cpu_control.pmc_map[i] = i; + this_state->control.cpu_control.evntsel[i] + = this_state->control.cpu_control. + evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR ); + } + this_state->control.cpu_control.ireset[i] = 0; + } + + if ( is_pentium4() ) { + /* Clear pebs stuff */ + this_state->control.cpu_control.p4.pebs_enable = 0; + this_state->control.cpu_control.p4.pebs_matrix_vert = 0; + } + + /* clear both a and i counter counts */ + this_state->control.cpu_control.nractrs = 0; + this_state->control.cpu_control.nrictrs = 0; + +#ifdef DEBUG + if ( is_pentium4() ) + print_control( &this_state->control.cpu_control ); +#endif +} + +/* This function clears the current contents of the control structure and + updates it with whatever resources are allocated for all the native events + in the native info structure array. */ +static int +_x86_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * ctx ) +{ + ( void ) ctx; /*unused */ + unsigned int i, k, retval = PAPI_OK; + hwd_register_t *bits,*bits2; + struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control; + + /* clear out the events from the control state */ + clear_cs_events( this_state ); + + if ( is_pentium4() ) { + /* fill the counters we're using */ + for ( i = 0; i < ( unsigned int ) count; i++ ) { + /* dereference the mapping information about this native event */ + bits = native[i].ni_bits; + + /* Add counter control command values to eventset */ + cpu_control->pmc_map[i] = bits->counter[0]; + cpu_control->evntsel[i] = bits->cccr; + cpu_control->ireset[i] = bits->ireset; + cpu_control->pmc_map[i] |= FAST_RDPMC; + cpu_control->evntsel_aux[i] |= bits->event; + + /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events. + Replay_events count L1 and L2 cache events. There is only one of each for + the entire eventset. Therefore, there can be only one unique replay_event + per eventset. This means L1 and L2 can't be counted together. Which stinks. + This conflict should be trapped in the allocation scheme, but we'll test for it + here too, just in case. */ + if ( bits->pebs_enable ) { + /* if pebs_enable isn't set, just copy */ + if ( cpu_control->p4.pebs_enable == 0 ) { + cpu_control->p4.pebs_enable = bits->pebs_enable; + /* if pebs_enable conflicts, flag an error */ + } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) { + SUBDBG + ( "WARNING: P4_update_control_state -- pebs_enable conflict!" ); + retval = PAPI_ECNFLCT; + } + /* if pebs_enable == bits->pebs_enable, do nothing */ + } + if ( bits->pebs_matrix_vert ) { + /* if pebs_matrix_vert isn't set, just copy */ + if ( cpu_control->p4.pebs_matrix_vert == 0 ) { + cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert; + /* if pebs_matrix_vert conflicts, flag an error */ + } else if ( cpu_control->p4.pebs_matrix_vert != + bits->pebs_matrix_vert ) { + SUBDBG + ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" ); + retval = PAPI_ECNFLCT; + } + /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */ + } + } + this_state->control.cpu_control.nractrs = count; + + /* Make sure the TSC is always on */ + this_state->control.cpu_control.tsc_on = 1; + +#ifdef DEBUG + print_control( &this_state->control.cpu_control ); +#endif + } else { + switch ( _papi_hwi_system_info.hw_info.model ) { +#ifdef PERFCTR_X86_INTEL_CORE2 + case PERFCTR_X86_INTEL_CORE2: + /* fill the counters we're using */ + for ( i = 0; i < ( unsigned int ) count; i++ ) { + bits2 = native[i].ni_bits; + for ( k = 0; k < MAX_COUNTERS; k++ ) + if ( bits2->selector & ( 1 << k ) ) { + break; + } + if ( k > 1 ) + this_state->control.cpu_control.pmc_map[i] = + ( k - 2 ) | 0x40000000; + else + this_state->control.cpu_control.pmc_map[i] = k; + + /* Add counter control command values to eventset */ + this_state->control.cpu_control.evntsel[i] |= + bits2->counter_cmd; + } + break; +#endif + default: + /* fill the counters we're using */ + for ( i = 0; i < ( unsigned int ) count; i++ ) { + /* Add counter control command values to eventset */ + bits2 = native[i].ni_bits; + this_state->control.cpu_control.evntsel[i] |= + bits2->counter_cmd; + } + } + this_state->control.cpu_control.nractrs = ( unsigned int ) count; + } + return retval; +} + +static int +_x86_start( hwd_context_t * ctx, hwd_control_state_t * state ) +{ + int error; +#ifdef DEBUG + print_control( &state->control.cpu_control ); +#endif + + if ( state->rvperfctr != NULL ) { + if ( ( error = + rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) { + SUBDBG( "rvperfctr_control returns: %d\n", error ); + PAPIERROR( RCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + } + + if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) { + SUBDBG( "vperfctr_control returns: %d\n", error ); + PAPIERROR( VCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); +} + +static int +_x86_stop( hwd_context_t * ctx, hwd_control_state_t * state ) +{ + int error; + + if ( state->rvperfctr != NULL ) { + if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) { + PAPIERROR( RCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + } + + error = vperfctr_stop( ctx->perfctr ); + if ( error < 0 ) { + SUBDBG( "vperfctr_stop returns: %d\n", error ); + PAPIERROR( VCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); +} + +static int +_x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp, + int flags ) +{ + if ( flags & PAPI_PAUSED ) { + vperfctr_read_state( ctx->perfctr, &spc->state, NULL ); + if ( !is_pentium4() ) { + unsigned int i = 0; + for ( i = 0; + i < + spc->control.cpu_control.nractrs + + spc->control.cpu_control.nrictrs; i++ ) { + SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i, + spc->state.pmc[i] ); + } + } + } else { + SUBDBG( "vperfctr_read_ctrs\n" ); + if ( spc->rvperfctr != NULL ) { + rvperfctr_read_ctrs( spc->rvperfctr, &spc->state ); + } else { + vperfctr_read_ctrs( ctx->perfctr, &spc->state ); + } + } + *dp = ( long long * ) spc->state.pmc; +#ifdef DEBUG + { + if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { + unsigned int i; + if ( is_pentium4() ) { + for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) { + SUBDBG( "raw val hardware index %d is %lld\n", i, + ( long long ) spc->state.pmc[i] ); + } + } else { + for ( i = 0; + i < + spc->control.cpu_control.nractrs + + spc->control.cpu_control.nrictrs; i++ ) { + SUBDBG( "raw val hardware index %d is %lld\n", i, + ( long long ) spc->state.pmc[i] ); + } + } + } + } +#endif + return ( PAPI_OK ); +} + +static int +_x86_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl ) +{ + return ( _x86_start( ctx, cntrl ) ); +} + +/* Perfctr requires that interrupting counters appear at the end of the pmc list + In the case a user wants to interrupt on a counter in an evntset that is not + among the last events, we need to move the perfctr virtual events around to + make it last. This function swaps two perfctr events, and then adjust the + position entries in both the NativeInfoArray and the EventInfoArray to keep + everything consistent. */ +static void +swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1, + int cntr2 ) +{ + unsigned int ui; + int si, i, j; + + for ( i = 0; i < ESI->NativeCount; i++ ) { + if ( ESI->NativeInfoArray[i].ni_position == cntr1 ) + ESI->NativeInfoArray[i].ni_position = cntr2; + else if ( ESI->NativeInfoArray[i].ni_position == cntr2 ) + ESI->NativeInfoArray[i].ni_position = cntr1; + } + + for ( i = 0; i < ESI->NumberOfEvents; i++ ) { + for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) { + if ( ESI->EventInfoArray[i].pos[j] == cntr1 ) + ESI->EventInfoArray[i].pos[j] = cntr2; + else if ( ESI->EventInfoArray[i].pos[j] == cntr2 ) + ESI->EventInfoArray[i].pos[j] = cntr1; + } + } + + ui = contr->cpu_control.pmc_map[cntr1]; + contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2]; + contr->cpu_control.pmc_map[cntr2] = ui; + + ui = contr->cpu_control.evntsel[cntr1]; + contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2]; + contr->cpu_control.evntsel[cntr2] = ui; + + if ( is_pentium4() ) { + ui = contr->cpu_control.evntsel_aux[cntr1]; + contr->cpu_control.evntsel_aux[cntr1] = + contr->cpu_control.evntsel_aux[cntr2]; + contr->cpu_control.evntsel_aux[cntr2] = ui; + } + + si = contr->cpu_control.ireset[cntr1]; + contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2]; + contr->cpu_control.ireset[cntr2] = si; +} + +static int +_x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ) +{ + hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state ); + struct hwd_pmc_control *contr = &(ctl->control); + int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0; + OVFDBG( "EventIndex=%d\n", EventIndex ); + +#ifdef DEBUG + if ( is_pentium4() ) + print_control( &(contr->cpu_control) ); +#endif + + /* The correct event to overflow is EventIndex */ + ncntrs = _perfctr_vector.cmp_info.num_cntrs; + i = ESI->EventInfoArray[EventIndex].pos[0]; + + if ( i >= ncntrs ) { + PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs ); + return PAPI_EINVAL; + } + + if ( threshold != 0 ) { /* Set an overflow threshold */ + retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig, + NEED_CONTEXT, + _perfctr_vector.cmp_info.CmpIdx ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* overflow interrupt occurs on the NEXT event after overflow occurs + thus we subtract 1 from the threshold. */ + contr->cpu_control.ireset[i] = ( -threshold + 1 ); + + if ( is_pentium4() ) + contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0; + else + contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE; + + contr->cpu_control.nrictrs++; + contr->cpu_control.nractrs--; + nricntrs = ( int ) contr->cpu_control.nrictrs; + nracntrs = ( int ) contr->cpu_control.nractrs; + contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig; + + /* move this event to the bottom part of the list if needed */ + if ( i < nracntrs ) + swap_events( ESI, contr, i, nracntrs ); + OVFDBG( "Modified event set\n" ); + } else { + if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) { + contr->cpu_control.ireset[i] = 0; + contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 ); + contr->cpu_control.nrictrs--; + contr->cpu_control.nractrs++; + } else if ( !is_pentium4() && + contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) { + contr->cpu_control.ireset[i] = 0; + contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE ); + contr->cpu_control.nrictrs--; + contr->cpu_control.nractrs++; + } + + nricntrs = ( int ) contr->cpu_control.nrictrs; + nracntrs = ( int ) contr->cpu_control.nractrs; + + /* move this event to the top part of the list if needed */ + if ( i >= nracntrs ) + swap_events( ESI, contr, i, nracntrs - 1 ); + + if ( !nricntrs ) + contr->si_signo = 0; + + OVFDBG( "Modified event set\n" ); + + retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig ); + } + +#ifdef DEBUG + if ( is_pentium4() ) + print_control( &(contr->cpu_control) ); +#endif + OVFDBG( "End of call. Exit code: %d\n", retval ); + return ( retval ); +} + +static int +_x86_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI ) +{ + ( void ) master; /*unused */ + ( void ) ESI; /*unused */ + return ( PAPI_OK ); +} + + + +/* these define cccr and escr register bits, and the p4 event structure */ +#include "perfmon/pfmlib_pentium4.h" +#include "../lib/pfmlib_pentium4_priv.h" + +#define P4_REPLAY_REAL_MASK 0x00000003 + +extern pentium4_escr_reg_t pentium4_escrs[]; +extern pentium4_cccr_reg_t pentium4_cccrs[]; +extern pentium4_event_t pentium4_events[]; + + +static pentium4_replay_regs_t p4_replay_regs[] = { + /* 0 */ {.enb = 0, + /* dummy */ + .mat_vert = 0, + }, + /* 1 */ {.enb = 0, + /* dummy */ + .mat_vert = 0, + }, + /* 2 */ {.enb = 0x01000001, + /* 1stL_cache_load_miss_retired */ + .mat_vert = 0x00000001, + }, + /* 3 */ {.enb = 0x01000002, + /* 2ndL_cache_load_miss_retired */ + .mat_vert = 0x00000001, + }, + /* 4 */ {.enb = 0x01000004, + /* DTLB_load_miss_retired */ + .mat_vert = 0x00000001, + }, + /* 5 */ {.enb = 0x01000004, + /* DTLB_store_miss_retired */ + .mat_vert = 0x00000002, + }, + /* 6 */ {.enb = 0x01000004, + /* DTLB_all_miss_retired */ + .mat_vert = 0x00000003, + }, + /* 7 */ {.enb = 0x01018001, + /* Tagged_mispred_branch */ + .mat_vert = 0x00000010, + }, + /* 8 */ {.enb = 0x01000200, + /* MOB_load_replay_retired */ + .mat_vert = 0x00000001, + }, + /* 9 */ {.enb = 0x01000400, + /* split_load_retired */ + .mat_vert = 0x00000001, + }, + /* 10 */ {.enb = 0x01000400, + /* split_store_retired */ + .mat_vert = 0x00000002, + }, +}; + +/* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */ +static int pfm2intel[] = + { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }; + + + + +/* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */ +/* Also, libpfm assumes events can live on different counters with different codes. This call only returns + the first occurence found. */ +/* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be + generally useful it should be fixed. - dkt */ +static int +_pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code ) +{ + pfmlib_regmask_t cnt, impl; + unsigned int num; + unsigned int i, first = 1; + int ret; + + if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt, + pfm_strerror( ret ) ); + return PAPI_ESYS; + } + if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) ); + return PAPI_ESYS; + } + if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl, + pfm_strerror( ret ) ); + return PAPI_ESYS; + } + + *selector = 0; + for ( i = 0; num; i++ ) { + if ( pfm_regmask_isset( &impl, i ) ) + num--; + if ( pfm_regmask_isset( &cnt, i ) ) { + if ( first ) { + if ( ( ret = + pfm_get_event_code_counter( event, i, + code ) ) != + PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s", + event, i, code, pfm_strerror( ret ) ); + return PAPI_ESYS; + } + first = 0; + } + *selector |= 1 << i; + } + } + return PAPI_OK; +} + +int +_papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode, + hwd_register_t *newbits ) +{ + unsigned int event, umask; + + X86_register_t *bits = (X86_register_t *)newbits; + + if ( is_pentium4() ) { + pentium4_escr_value_t escr_value; + pentium4_cccr_value_t cccr_value; + unsigned int num_masks, replay_mask, unit_masks[12]; + unsigned int event_mask; + unsigned int tag_value, tag_enable; + unsigned int i; + int j, escr, cccr, pmd; + + if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) + return PAPI_ENOEVNT; + + /* for each allowed escr (1 or 2) find the allowed cccrs. + for each allowed cccr find the pmd index + convert to an intel counter number; or it into bits->counter */ + for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) { + bits->counter[i] = 0; + escr = pentium4_events[event].allowed_escrs[i]; + if ( escr < 0 ) { + continue; + } + + bits->escr[i] = escr; + + for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) { + cccr = pentium4_escrs[escr].allowed_cccrs[j]; + if ( cccr < 0 ) { + continue; + } + + pmd = pentium4_cccrs[cccr].pmd; + bits->counter[i] |= ( 1 << pfm2intel[pmd] ); + } + } + + /* if there's only one valid escr, copy the values */ + if ( escr < 0 ) { + bits->escr[1] = bits->escr[0]; + bits->counter[1] = bits->counter[0]; + } + + /* Calculate the event-mask value. Invalid masks + * specified by the caller are ignored. */ + tag_value = 0; + tag_enable = 0; + event_mask = _pfm_convert_umask( event, umask ); + + if ( event_mask & 0xF0000 ) { + tag_enable = 1; + tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS ); + } + + event_mask &= 0x0FFFF; /* mask off possible tag bits */ + + /* Set up the ESCR and CCCR register values. */ + escr_value.val = 0; + escr_value.bits.t1_usr = 0; /* controlled by kernel */ + escr_value.bits.t1_os = 0; /* controlled by kernel */ +// escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0; +// escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0; + escr_value.bits.tag_enable = tag_enable; + escr_value.bits.tag_value = tag_value; + escr_value.bits.event_mask = event_mask; + escr_value.bits.event_select = pentium4_events[event].event_select; + escr_value.bits.reserved = 0; + + /* initialize the proper bits in the cccr register */ + cccr_value.val = 0; + cccr_value.bits.reserved1 = 0; + cccr_value.bits.enable = 1; + cccr_value.bits.escr_select = pentium4_events[event].escr_select; + cccr_value.bits.active_thread = 3; + /* FIXME: This is set to count when either logical + * CPU is active. Need a way to distinguish + * between logical CPUs when HT is enabled. + * the docs say these bits should always + * be set. */ + cccr_value.bits.compare = 0; + /* FIXME: What do we do with "threshold" settings? */ + cccr_value.bits.complement = 0; + /* FIXME: What do we do with "threshold" settings? */ + cccr_value.bits.threshold = 0; + /* FIXME: What do we do with "threshold" settings? */ + cccr_value.bits.force_ovf = 0; + /* FIXME: Do we want to allow "forcing" overflow + * interrupts on all counter increments? */ + cccr_value.bits.ovf_pmi_t0 = 0; + cccr_value.bits.ovf_pmi_t1 = 0; + /* PMI taken care of by kernel typically */ + cccr_value.bits.reserved2 = 0; + cccr_value.bits.cascade = 0; + /* FIXME: How do we handle "cascading" counters? */ + cccr_value.bits.overflow = 0; + + /* these flags are always zero, from what I can tell... */ + bits->pebs_enable = 0; /* flag for PEBS counting */ + bits->pebs_matrix_vert = 0; + /* flag for PEBS_MATRIX_VERT, whatever that is */ + + /* ...unless the event is replay_event */ + if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) { + escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK; + num_masks = prepare_umask( umask, unit_masks ); + for ( i = 0; i < num_masks; i++ ) { + replay_mask = unit_masks[i]; + if ( replay_mask > 1 && replay_mask < 11 ) { + /* process each valid mask we find */ + bits->pebs_enable |= p4_replay_regs[replay_mask].enb; + bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert; + } + } + } + + /* store the escr and cccr values */ + bits->event = escr_value.val; + bits->cccr = cccr_value.val; + bits->ireset = 0; /* I don't really know what this does */ + SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val ); + } else { + + int ret, code; + + if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) + return PAPI_ENOEVNT; + + if ( ( ret = _pfm_get_counter_info( event, &bits->selector, + &code ) ) != PAPI_OK ) + return ret; + + bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) ); + + SUBDBG( "selector: %#x\n", bits->selector ); + SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event, + umask, code, ( ( hwd_register_t * ) bits )->counter_cmd ); + } + + return PAPI_OK; +} + + + +papi_vector_t _perfctr_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "perfctr", + .description = "Linux perfctr CPU counters", + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 1, + .fast_virtual_timer = 1, + .attach = 1, + .attach_must_ptrace = 1, + .cntr_umasks = 1, + } + , + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( X86_perfctr_context_t ), + .control_state = sizeof ( X86_perfctr_control_t ), + .reg_value = sizeof ( X86_register_t ), + .reg_alloc = sizeof ( X86_reg_alloc_t ), + } + , + + /* function pointers in this component */ + .init_control_state = _x86_init_control_state, + .start = _x86_start, + .stop = _x86_stop, + .read = _x86_read, + .allocate_registers = _x86_allocate_registers, + .update_control_state = _x86_update_control_state, + .set_domain = _x86_set_domain, + .reset = _x86_reset, + .set_overflow = _x86_set_overflow, + .stop_profiling = _x86_stop_profiling, + + .init_component = _perfctr_init_component, + .ctl = _perfctr_ctl, + .dispatch_timer = _perfctr_dispatch_timer, + .init_thread = _perfctr_init_thread, + .shutdown_thread = _perfctr_shutdown_thread, + + /* from libpfm */ + .ntv_enum_events = _papi_libpfm_ntv_enum_events, + .ntv_name_to_code = _papi_libpfm_ntv_name_to_code, + .ntv_code_to_name = _papi_libpfm_ntv_code_to_name, + .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr, + .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr, + +}; + + diff --git a/src/components/perfctr/perfctr-x86.h b/src/components/perfctr/perfctr-x86.h new file mode 100644 index 0000000..ff72eca --- /dev/null +++ b/src/components/perfctr/perfctr-x86.h @@ -0,0 +1,142 @@ +#ifndef _PERFCTR_X86_H +#define _PERFCTR_X86_H + +#include "perfmon/pfmlib.h" +#include "libperfctr.h" +#include "papi_lock.h" + +#define MAX_COUNTERS 18 +#define MAX_COUNTER_TERMS 8 +#define HW_OVERFLOW 1 +#define hwd_pmc_control vperfctr_control + +#include "linux-context.h" + +/* bit fields unique to P4 */ +#define ESCR_T0_OS (1 << 3) +#define ESCR_T0_USR (1 << 2) +#define CCCR_OVF_PMI_T0 (1 << 26) +#define FAST_RDPMC (1 << 31) + +#ifndef CONFIG_SMP +/* Assert that CONFIG_SMP is set before including asm/atomic.h to + * get bus-locking atomic_* operations when building on UP kernels */ +#define CONFIG_SMP +#endif + + +/* Used in resources.selector to determine on which counters an event can live. */ +#define CNTR1 0x1 +#define CNTR2 0x2 +#define CNTR3 0x4 +#define CNTR4 0x8 +#define CNTR5 0x10 +#define CNTRS12 (CNTR1|CNTR2) +#define ALLCNTRS (CNTR1|CNTR2|CNTR3|CNTR4|CNTR5) + +#define HAS_MESI 0x0100 // indicates this event supports MESI modifiers +#define HAS_MOESI 0x0200 // indicates this event supports MOESI modifiers +#define HAS_UMASK 0x0400 // indicates this event has defined unit mask bits +#define MOESI_M 0x1000 // modified bit +#define MOESI_O 0x0800 // owner bit +#define MOESI_E 0x0400 // exclusive bit +#define MOESI_S 0x0200 // shared bit +#define MOESI_I 0x0100 // invalid bit +#define MOESI_M_INTEL MOESI_O // modified bit on Intel processors +#define MOESI_ALL 0x1F00 // mask for MOESI bits in event code or counter_cmd +#define UNIT_MASK_ALL 0xFF00 // mask for unit mask bits in event code or counter_cmd + +/* Masks to craft an eventcode to perfctr's liking */ +#define PERF_CTR_MASK 0xFF000000 +#define PERF_INV_CTR_MASK 0x00800000 +#define PERF_ENABLE 0x00400000 +#define PERF_INT_ENABLE 0x00100000 +#define PERF_PIN_CONTROL 0x00080000 +#define PERF_EDGE_DETECT 0x00040000 +#define PERF_OS 0x00020000 +#define PERF_USR 0x00010000 +#define PERF_UNIT_MASK 0x0000FF00 +#define PERF_EVNT_MASK 0x000000FF + +#define AI_ERROR "No support for a-mode counters after adding an i-mode counter" +#define VOPEN_ERROR "vperfctr_open() returned NULL, please run perfex -i to verify your perfctr installation" +#define GOPEN_ERROR "gperfctr_open() returned NULL" +#define VINFO_ERROR "vperfctr_info() returned < 0" +#define VCNTRL_ERROR "vperfctr_control() returned < 0" +#define RCNTRL_ERROR "rvperfctr_control() returned < 0" +#define GCNTRL_ERROR "gperfctr_control() returned < 0" +#define FOPEN_ERROR "fopen(%s) returned NULL" +#define STATE_MAL_ERROR "Error allocating perfctr structures" +#define MODEL_ERROR "This is not a supported cpu." + +typedef struct X86_register +{ + unsigned int selector; // mask for which counters in use + int counter_cmd; // event code + /****************** P4 elements *******************/ + unsigned counter[2]; // bitmap of valid counters for each escr + unsigned escr[2]; // bit offset for each of 2 valid escrs + unsigned cccr; // value to be loaded into cccr register + unsigned event; // value defining event to be loaded into escr register + unsigned pebs_enable; // flag for PEBS counting + unsigned pebs_matrix_vert; // flag for PEBS_MATRIX_VERT + unsigned ireset; +} X86_register_t; + +typedef struct X86_reg_alloc +{ + X86_register_t ra_bits; // info about this native event mapping + unsigned ra_selector; // bit mask showing which counters can carry this metric + unsigned ra_rank; // how many counters can carry this metric + /*************** P4 specific element ****************/ + unsigned ra_escr[2]; // bit field array showing which esc registers can carry this metric +} X86_reg_alloc_t; + +typedef struct hwd_native +{ + int index; // index in the native table, required + unsigned int selector; // which counters + unsigned char rank; // rank determines how many counters carry each metric + int position; // which counter this native event stays + int mod; + int link; +} hwd_native_t; + +typedef struct X86_perfctr_control +{ + hwd_native_t native[MAX_COUNTERS]; + int native_idx; + unsigned char master_selector; + X86_register_t allocated_registers; + struct vperfctr_control control; + struct perfctr_sum_ctrs state; + struct rvperfctr *rvperfctr; // Allow attach to be per-eventset +} X86_perfctr_control_t; + +typedef struct X86_perfctr_context +{ + struct vperfctr *perfctr; + int stat_fd; +} X86_perfctr_context_t; + +/* Override void* definitions from PAPI framework layer + with typedefs to conform to PAPI component layer code. */ +#undef hwd_reg_alloc_t +typedef X86_reg_alloc_t hwd_reg_alloc_t; +#undef hwd_register_t +typedef X86_register_t hwd_register_t; +#undef hwd_control_state_t +typedef X86_perfctr_control_t hwd_control_state_t; +#undef hwd_context_t +typedef X86_perfctr_context_t hwd_context_t; + +typedef struct native_event_entry +{ + char name[PAPI_MAX_STR_LEN]; // name of this event + char *description; // description of this event + X86_register_t resources; // resources required by this native event +} native_event_entry_t; + +typedef pfmlib_event_t pfm_register_t; + +#endif diff --git a/src/components/perfctr/perfctr.c b/src/components/perfctr/perfctr.c new file mode 100644 index 0000000..12a2dbb --- /dev/null +++ b/src/components/perfctr/perfctr.c @@ -0,0 +1,441 @@ +/* +* File: perfctr.c +* Author: Philip Mucci +* mucci at cs.utk.edu +* Mods: Kevin London +* london at cs.utk.edu +* Mods: Maynard Johnson +* maynardj at us.ibm.com +* Mods: Brian Sheely +* bsheely at eecs.utk.edu +*/ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_internal.h" + +#ifdef PPC64 +#include "perfctr-ppc64.h" +#else +#include "perfctr-x86.h" +#include "papi_libpfm_events.h" +#endif + +#include "papi_vector.h" + +#include "papi_memory.h" +#include "extras.h" + +#include "linux-common.h" +#include "linux-context.h" + +extern papi_vector_t _perfctr_vector; + +#ifdef PPC64 +extern int setup_ppc64_presets( int cputype, int cidx ); +#endif + +/* This should be in a linux.h header file maybe. */ +#define FOPEN_ERROR "fopen(%s) returned NULL" + +#if defined(PERFCTR26) +#define PERFCTR_CPU_NAME(pi) perfctr_info_cpu_name(pi) +#define PERFCTR_CPU_NRCTRS(pi) perfctr_info_nrctrs(pi) +#else +#define PERFCTR_CPU_NAME perfctr_cpu_name +#define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs +#endif + +#if !defined(PPC64) +static inline int +xlate_cpu_type_to_vendor( unsigned perfctr_cpu_type ) +{ + switch ( perfctr_cpu_type ) { + case PERFCTR_X86_INTEL_P5: + case PERFCTR_X86_INTEL_P5MMX: + case PERFCTR_X86_INTEL_P6: + case PERFCTR_X86_INTEL_PII: + case PERFCTR_X86_INTEL_PIII: + case PERFCTR_X86_INTEL_P4: + case PERFCTR_X86_INTEL_P4M2: +#ifdef PERFCTR_X86_INTEL_P4M3 + case PERFCTR_X86_INTEL_P4M3: +#endif +#ifdef PERFCTR_X86_INTEL_PENTM + case PERFCTR_X86_INTEL_PENTM: +#endif +#ifdef PERFCTR_X86_INTEL_CORE + case PERFCTR_X86_INTEL_CORE: +#endif +#ifdef PERFCTR_X86_INTEL_CORE2 + case PERFCTR_X86_INTEL_CORE2: +#endif +#ifdef PERFCTR_X86_INTEL_ATOM /* family 6 model 28 */ + case PERFCTR_X86_INTEL_ATOM: +#endif +#ifdef PERFCTR_X86_INTEL_NHLM /* family 6 model 26 */ + case PERFCTR_X86_INTEL_NHLM: +#endif +#ifdef PERFCTR_X86_INTEL_WSTMR + case PERFCTR_X86_INTEL_WSTMR: +#endif + return ( PAPI_VENDOR_INTEL ); +#ifdef PERFCTR_X86_AMD_K8 + case PERFCTR_X86_AMD_K8: +#endif +#ifdef PERFCTR_X86_AMD_K8C + case PERFCTR_X86_AMD_K8C: +#endif +#ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */ + case PERFCTR_X86_AMD_FAM10: +#endif + case PERFCTR_X86_AMD_K7: + return ( PAPI_VENDOR_AMD ); + default: + return ( PAPI_VENDOR_UNKNOWN ); + } +} +#endif + +long long tb_scale_factor = ( long long ) 1; /* needed to scale get_cycles on PPC series */ + +int +_perfctr_init_component( int cidx ) +{ + int retval; + struct perfctr_info info; + char abiv[PAPI_MIN_STR_LEN]; + +#if defined(PERFCTR26) + int fd; +#else + struct vperfctr *dev; +#endif + +#if defined(PERFCTR26) + /* Get info from the kernel */ + /* Use lower level calls per Mikael to get the perfctr info + without actually creating a new kernel-side state. + Also, close the fd immediately after retrieving the info. + This is much lighter weight and doesn't reserve the counter + resources. Also compatible with perfctr 2.6.14. + */ + fd = _vperfctr_open( 0 ); + if ( fd < 0 ) { + strncpy(_perfctr_vector.cmp_info.disabled_reason, + VOPEN_ERROR,PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + retval = perfctr_info( fd, &info ); + close( fd ); + if ( retval < 0 ) { + strncpy(_perfctr_vector.cmp_info.disabled_reason, + VINFO_ERROR,PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + /* copy tsc multiplier to local variable */ + /* this field appears in perfctr 2.6 and higher */ + tb_scale_factor = ( long long ) info.tsc_to_cpu_mult; +#else + /* Opened once for all threads. */ + if ( ( dev = vperfctr_open( ) ) == NULL ) { + strncpy(_perfctr_vector.cmp_info.disabled_reason, + VOPEN_ERROR,PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev ); + + /* Get info from the kernel */ + retval = vperfctr_info( dev, &info ); + if ( retval < 0 ) { + strncpy(_perfctr_vector.cmp_info.disabled_reason, + VINFO_ERROR,PAPI_MAX_STR_LEN); + return ( PAPI_ESYS ); + } + vperfctr_close( dev ); +#endif + + /* Fill in what we can of the papi_system_info. */ + retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* Setup memory info */ + retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, + ( int ) info.cpu_type ); + if ( retval ) + return ( retval ); + + strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" ); + strcpy( _perfctr_vector.cmp_info.version, "$Revision$" ); + sprintf( abiv, "0x%08X", info.abi_version ); + strcpy( _perfctr_vector.cmp_info.support_version, abiv ); + strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version ); + _perfctr_vector.cmp_info.CmpIdx = cidx; + _perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info ); + _perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs; + if ( info.cpu_features & PERFCTR_FEATURE_RDPMC ) + _perfctr_vector.cmp_info.fast_counter_read = 1; + else + _perfctr_vector.cmp_info.fast_counter_read = 0; + _perfctr_vector.cmp_info.fast_real_timer = 1; + _perfctr_vector.cmp_info.fast_virtual_timer = 1; + _perfctr_vector.cmp_info.attach = 1; + _perfctr_vector.cmp_info.attach_must_ptrace = 1; + _perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER; +#if !defined(PPC64) + /* AMD and Intel ia386 processors all support unit mask bits */ + _perfctr_vector.cmp_info.cntr_umasks = 1; +#endif +#if defined(PPC64) + _perfctr_vector.cmp_info.available_domains = + PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; +#else + _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL; +#endif + _perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR; + _perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR; + if ( info.cpu_features & PERFCTR_FEATURE_PCINT ) + _perfctr_vector.cmp_info.hardware_intr = 1; + else + _perfctr_vector.cmp_info.hardware_intr = 0; + SUBDBG( "Hardware/OS %s support counter generated interrupts\n", + _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" ); + + strcpy( _papi_hwi_system_info.hw_info.model_string, + PERFCTR_CPU_NAME( &info ) ); + _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type; +#if defined(PPC64) + _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM; + if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 ) + strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); +#else + _papi_hwi_system_info.hw_info.vendor = + xlate_cpu_type_to_vendor( info.cpu_type ); +#endif + + /* Setup presets last. Some platforms depend on earlier info */ +#if !defined(PPC64) +// retval = setup_p3_vector_table(vtable); + if ( !retval ) + retval = _papi_libpfm_init(&_perfctr_vector, cidx ); +#else + /* Setup native and preset events */ +// retval = ppc64_setup_vector_table(vtable); + if ( !retval ) + retval = perfctr_ppc64_setup_native_table( ); + if ( !retval ) + retval = setup_ppc64_presets( info.cpu_type, cidx ); +#endif + if ( retval ) + return ( retval ); + + return ( PAPI_OK ); +} + +static int +attach( hwd_control_state_t * ctl, unsigned long tid ) +{ + struct vperfctr_control tmp; + +#ifdef VPERFCTR_CONTROL_CLOEXEC + tmp.flags = VPERFCTR_CONTROL_CLOEXEC; +#endif + + ctl->rvperfctr = rvperfctr_open( ( int ) tid ); + if ( ctl->rvperfctr == NULL ) { + PAPIERROR( VOPEN_ERROR ); + return ( PAPI_ESYS ); + } + SUBDBG( "_papi_hwd_ctl rvperfctr_open() = %p\n", ctl->rvperfctr ); + + /* Initialize the per thread/process virtualized TSC */ + memset( &tmp, 0x0, sizeof ( tmp ) ); + tmp.cpu_control.tsc_on = 1; + + /* Start the per thread/process virtualized TSC */ + if ( rvperfctr_control( ctl->rvperfctr, &tmp ) < 0 ) { + PAPIERROR( RCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + + return ( PAPI_OK ); +} /* end attach() */ + +static int +detach( hwd_control_state_t * ctl ) +{ + rvperfctr_close( ctl->rvperfctr ); + return ( PAPI_OK ); +} /* end detach() */ + +static inline int +round_requested_ns( int ns ) +{ + if ( ns < _papi_os_info.itimer_res_ns ) { + return _papi_os_info.itimer_res_ns; + } else { + int leftover_ns = ns % _papi_os_info.itimer_res_ns; + return ns + leftover_ns; + } +} + +int +_perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + ( void ) ctx; /*unused */ + switch ( code ) { + case PAPI_DOMAIN: + case PAPI_DEFDOM: +#if defined(PPC64) + return ( _perfctr_vector. + set_domain( option->domain.ESI, option->domain.domain ) ); +#else + return ( _perfctr_vector. + set_domain( option->domain.ESI->ctl_state, + option->domain.domain ) ); +#endif + case PAPI_GRANUL: + case PAPI_DEFGRN: + return PAPI_ECMP; + case PAPI_ATTACH: + return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) ); + case PAPI_DETACH: + return ( detach( option->attach.ESI->ctl_state ) ); + case PAPI_DEF_ITIMER: + { + /* flags are currently ignored, eventually the flags will be able + to specify whether or not we use POSIX itimers (clock_gettimer) */ + if ( ( option->itimer.itimer_num == ITIMER_REAL ) && + ( option->itimer.itimer_sig != SIGALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && + ( option->itimer.itimer_sig != SIGVTALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_PROF ) && + ( option->itimer.itimer_sig != SIGPROF ) ) + return PAPI_EINVAL; + if ( option->itimer.ns > 0 ) + option->itimer.ns = round_requested_ns( option->itimer.ns ); + /* At this point, we assume the user knows what he or + she is doing, they maybe doing something arch specific */ + return PAPI_OK; + } + case PAPI_DEF_MPX_NS: + { + option->multiplex.ns = + ( unsigned long ) round_requested_ns( ( int ) option->multiplex. + ns ); + return ( PAPI_OK ); + } + case PAPI_DEF_ITIMER_NS: + { + option->itimer.ns = round_requested_ns( option->itimer.ns ); + return ( PAPI_OK ); + } + default: + return ( PAPI_ENOSUPP ); + } +} + +void +_perfctr_dispatch_timer( int signal, siginfo_t * si, void *context ) +{ + ( void ) signal; /*unused */ + _papi_hwi_context_t ctx; + ThreadInfo_t *master = NULL; + int isHardware = 0; + caddr_t address; + int cidx = _perfctr_vector.cmp_info.CmpIdx; + hwd_context_t *our_context; + + ctx.si = si; + ctx.ucontext = ( ucontext_t * ) context; + +#define OVERFLOW_MASK si->si_pmc_ovf_mask +#define GEN_OVERFLOW 0 + + address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( ctx ) ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, &isHardware, + OVERFLOW_MASK, GEN_OVERFLOW, &master, + _perfctr_vector.cmp_info.CmpIdx ); + + /* We are done, resume interrupting counters */ + if ( isHardware ) { + our_context=(hwd_context_t *) master->context[cidx]; + errno = vperfctr_iresume( our_context->perfctr ); + if ( errno < 0 ) { + PAPIERROR( "vperfctr_iresume errno %d", errno ); + } + } +} + + +int +_perfctr_init_thread( hwd_context_t * ctx ) +{ + struct vperfctr_control tmp; + int error; + + /* Initialize our thread/process pointer. */ + if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) { +#ifdef VPERFCTR_OPEN_CREAT_EXCL + /* New versions of perfctr have this, which allows us to + get a previously created context, i.e. one created after + a fork and now we're inside a new process that has been exec'd */ + if ( errno ) { + if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) { + return PAPI_ESYS; + } + } else { + return PAPI_ESYS; + } +#else + return PAPI_ESYS; +#endif + } + SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr ); + + /* Initialize the per thread/process virtualized TSC */ + memset( &tmp, 0x0, sizeof ( tmp ) ); + tmp.cpu_control.tsc_on = 1; + +#ifdef VPERFCTR_CONTROL_CLOEXEC + tmp.flags = VPERFCTR_CONTROL_CLOEXEC; + SUBDBG( "close on exec\t\t\t%u\n", tmp.flags ); +#endif + + /* Start the per thread/process virtualized TSC */ + error = vperfctr_control( ctx->perfctr, &tmp ); + if ( error < 0 ) { + SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n", + error ); + return PAPI_ESYS; + } + + return PAPI_OK; +} + +/* This routine is for shutting down threads, including the + master thread. */ + +int +_perfctr_shutdown_thread( hwd_context_t * ctx ) +{ +#ifdef DEBUG + int retval = vperfctr_unlink( ctx->perfctr ); + SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr, + retval ); +#else + vperfctr_unlink( ctx->perfctr ); +#endif + vperfctr_close( ctx->perfctr ); + SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr ); + memset( ctx, 0x0, sizeof ( hwd_context_t ) ); + return ( PAPI_OK ); +} diff --git a/src/components/perfctr_ppc/Rules.perfctr_ppc b/src/components/perfctr_ppc/Rules.perfctr_ppc new file mode 100644 index 0000000..151018e --- /dev/null +++ b/src/components/perfctr_ppc/Rules.perfctr_ppc @@ -0,0 +1,9 @@ + +COMPSRCS += components/perfctr/perfctr.c components/perfctr_ppc/perfctr-ppc64.c +COMPOBJS += perfctr.o perfctr-ppc64.o + +perfctr.o: components/perfctr/perfctr.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr/perfctr.c -o perfctr.o + +perfctr-ppc64.o: components/perfctr_ppc/perfctr-ppc64.c components/perfctr_ppc/perfctr-ppc64.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfctr-ppc/perfct_-ppc64.c -o perfctr-ppc64.o diff --git a/src/components/perfctr_ppc/linux-ppc64.h b/src/components/perfctr_ppc/linux-ppc64.h new file mode 100644 index 0000000..127356d --- /dev/null +++ b/src/components/perfctr_ppc/linux-ppc64.h @@ -0,0 +1,47 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: linux-ppc64.h +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +*/ + +#ifndef _LINUX_PPC64_H /* _LINUX_PPC64_H */ +#define _LINUX_PPC64_H + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define POWER_MAX_COUNTERS MAX_COUNTERS +#define MAX_COUNTER_TERMS MAX_COUNTERS + +#include "linux-context.h" + + +#endif /* _LINUX_PPC64_H */ diff --git a/src/components/perfctr_ppc/perfctr-ppc64.c b/src/components/perfctr_ppc/perfctr-ppc64.c new file mode 100644 index 0000000..c088e4c --- /dev/null +++ b/src/components/perfctr_ppc/perfctr-ppc64.c @@ -0,0 +1,758 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: perfctr-ppc64.c +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +*/ + +/* PAPI stuff */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include SUBSTRATE + +#ifdef PERFCTR26 +#define PERFCTR_CPU_NAME perfctr_info_cpu_name + +#define PERFCTR_CPU_NRCTRS perfctr_info_nrctrs +#else +#define PERFCTR_CPU_NAME perfctr_cpu_name +#define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs +#endif + +static hwi_search_t preset_name_map_PPC64[PAPI_MAX_PRESET_EVENTS] = { +#if defined(_POWER5) || defined(_POWER5p) + {PAPI_L1_DCM, {DERIVED_ADD, {PNE_PM_LD_MISS_L1, PNE_PM_ST_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 data cache misses */ + {PAPI_L1_DCA, {DERIVED_ADD, {PNE_PM_LD_REF_L1, PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 data cache access */ + /* can't count level 1 data cache hits due to hardware limitations. */ + {PAPI_L1_LDM, {0, {PNE_PM_LD_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 load misses */ + {PAPI_L1_STM, {0, {PNE_PM_ST_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 store misses */ + {PAPI_L1_DCW, {0, {PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 D cache write */ + {PAPI_L1_DCR, {0, {PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 D cache read */ + /* can't count level 2 data cache reads due to hardware limitations. */ + /* can't count level 2 data cache hits due to hardware limitations. */ + {PAPI_L2_DCM, {0, {PNE_PM_DATA_FROM_L2MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 2 data cache misses */ + {PAPI_L2_LDM, {0, {PNE_PM_DATA_FROM_L2MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 2 cache read misses */ + {PAPI_L3_DCR, {0, {PNE_PM_DATA_FROM_L2MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 3 data cache reads */ + /* can't count level 3 data cache hits due to hardware limitations. */ + {PAPI_L3_DCM, {DERIVED_ADD, {PNE_PM_DATA_FROM_LMEM, PNE_PM_DATA_FROM_RMEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 data cache misses (reads & writes) */ + {PAPI_L3_LDM, {DERIVED_ADD, {PNE_PM_DATA_FROM_LMEM, PNE_PM_DATA_FROM_RMEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 data cache read misses */ + /* can't count level 1 instruction cache accesses due to hardware limitations. */ + {PAPI_L1_ICH, {0, {PNE_PM_INST_FROM_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 1 inst cache hits */ + /* can't count level 1 instruction cache misses due to hardware limitations. */ + /* can't count level 2 instruction cache accesses due to hardware limitations. */ + /* can't count level 2 instruction cache hits due to hardware limitations. */ + {PAPI_L2_ICM, {0, {PNE_PM_INST_FROM_L2MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 inst cache misses */ + {PAPI_L3_ICA, {0, {PNE_PM_INST_FROM_L2MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 inst cache accesses */ + /* can't count level 3 instruction cache hits due to hardware limitations. */ + {PAPI_L3_ICM, {DERIVED_ADD, {PNE_PM_DATA_FROM_LMEM, PNE_PM_DATA_FROM_RMEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 instruction cache misses (reads & writes) */ + {PAPI_FMA_INS, {0, {PNE_PM_FPU_FMA, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FMA instructions completed */ + {PAPI_TOT_IIS, {0, {PNE_PM_INST_DISP, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total instructions issued */ + {PAPI_TOT_INS, {0, {PNE_PM_INST_CMPL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total instructions executed */ + {PAPI_INT_INS, {0, {PNE_PM_FXU_FIN, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Integer instructions executed */ + {PAPI_FP_OPS, {DERIVED_ADD, {PNE_PM_FPU_1FLOP, PNE_PM_FPU_FMA, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Floating point instructions executed */ + {PAPI_FP_INS, {0, {PNE_PM_FPU_FIN, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Floating point instructions executed */ + {PAPI_TOT_CYC, {0, {PNE_PM_RUN_CYC, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Processor cycles gated by the run latch */ + {PAPI_FDV_INS, {0, {PNE_PM_FPU_FDIV, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FD ins */ + {PAPI_FSQ_INS, {0, {PNE_PM_FPU_FSQRT, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FSq ins */ + {PAPI_TLB_DM, {0, {PNE_PM_DTLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Data translation lookaside buffer misses */ + {PAPI_TLB_IM, {0, {PNE_PM_ITLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Instr translation lookaside buffer misses */ + {PAPI_TLB_TL, {DERIVED_ADD, {PNE_PM_DTLB_MISS, PNE_PM_ITLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total translation lookaside buffer misses */ + {PAPI_HW_INT, {0, {PNE_PM_EXT_INT, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Hardware interrupts */ + {PAPI_STL_ICY, {0, {PNE_PM_0INST_FETCH, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Cycles with No Instruction Issue */ + {PAPI_LD_INS, {0, {PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Load instructions */ + {PAPI_SR_INS, {0, {PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Store instructions */ + {PAPI_LST_INS, {DERIVED_ADD, {PNE_PM_ST_REF_L1, PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Load and Store instructions */ + {PAPI_BR_INS, {0, {PNE_PM_BR_ISSUED, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Branch instructions */ + {PAPI_BR_MSP, {DERIVED_ADD, {PNE_PM_BR_MPRED_CR, PNE_PM_BR_MPRED_TA, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Branch mispredictions */ + {PAPI_FXU_IDL, {0, {PNE_PM_FXU_IDLE, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Cycles integer units are idle */ + {0, {0, {PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}} /* end of list */ +#else +#ifdef _PPC970 + {PAPI_L2_DCM, {0, {PNE_PM_DATA_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 2 data cache misses */ + {PAPI_L2_DCR, {DERIVED_ADD, {PNE_PM_DATA_FROM_L2, PNE_PM_DATA_FROM_L25_MOD, PNE_PM_DATA_FROM_L25_SHR, PNE_PM_DATA_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 data cache read attempts */ + {PAPI_L2_DCH, {DERIVED_ADD, {PNE_PM_DATA_FROM_L2, PNE_PM_DATA_FROM_L25_MOD, PNE_PM_DATA_FROM_L25_SHR, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 data cache hits */ + {PAPI_L2_LDM, {0, {PNE_PM_DATA_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 data cache read misses */ + /* no PAPI_L1_ICA since PM_INST_FROM_L1 and PM_INST_FROM_L2 cannot be counted simultaneously. */ + {PAPI_L1_ICM, {DERIVED_ADD, {PNE_PM_INST_FROM_L2, PNE_PM_INST_FROM_L25_SHR, PNE_PM_INST_FROM_L25_MOD, PNE_PM_INST_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 1 inst cache misses */ + {PAPI_L2_ICA, {DERIVED_ADD, {PNE_PM_INST_FROM_L2, PNE_PM_INST_FROM_L25_SHR, PNE_PM_INST_FROM_L25_MOD, PNE_PM_INST_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 inst cache accesses */ + {PAPI_L2_ICH, {DERIVED_ADD, {PNE_PM_INST_FROM_L2, PNE_PM_INST_FROM_L25_SHR, PNE_PM_INST_FROM_L25_MOD, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 inst cache hits */ + {PAPI_L2_ICM, {0, {PNE_PM_INST_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 2 inst cache misses */ +#endif +/* Common preset events for PPC970 */ + {PAPI_L1_DCM, {DERIVED_ADD, {PNE_PM_LD_MISS_L1, PNE_PM_ST_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 data cache misses */ + {PAPI_L1_DCA, {DERIVED_ADD, {PNE_PM_LD_REF_L1, PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 data cache access */ + {PAPI_FXU_IDL, {0, {PNE_PM_FXU_IDLE, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Cycles integer units are idle */ + {PAPI_L1_LDM, {0, {PNE_PM_LD_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 load misses */ + {PAPI_L1_STM, {0, {PNE_PM_ST_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 store misses */ + {PAPI_L1_DCW, {0, {PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 D cache write */ + {PAPI_L1_DCR, {0, {PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Level 1 D cache read */ + {PAPI_FMA_INS, {0, {PNE_PM_FPU_FMA, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FMA instructions completed */ + {PAPI_TOT_IIS, {0, {PNE_PM_INST_DISP, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total instructions issued */ + {PAPI_TOT_INS, {0, {PNE_PM_INST_CMPL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total instructions executed */ + {PAPI_INT_INS, {0, {PNE_PM_FXU_FIN, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Integer instructions executed */ + {PAPI_FP_OPS, {DERIVED_POSTFIX, {PNE_PM_FPU0_FIN, PNE_PM_FPU1_FIN, PNE_PM_FPU_FMA, PNE_PM_FPU_STF, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, "N0|N1|+|N2|+|N3|-|"}}, /*Floating point instructions executed */ + {PAPI_FP_INS, {0, {PNE_PM_FPU_FIN, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Floating point instructions executed */ + {PAPI_TOT_CYC, {0, {PNE_PM_CYC, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total cycles */ + {PAPI_FDV_INS, {0, {PNE_PM_FPU_FDIV, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FD ins */ + {PAPI_FSQ_INS, {0, {PNE_PM_FPU_FSQRT, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*FSq ins */ + {PAPI_TLB_DM, {0, {PNE_PM_DTLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Data translation lookaside buffer misses */ + {PAPI_TLB_IM, {0, {PNE_PM_ITLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Instr translation lookaside buffer misses */ + {PAPI_TLB_TL, {DERIVED_ADD, {PNE_PM_DTLB_MISS, PNE_PM_ITLB_MISS, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Total translation lookaside buffer misses */ + {PAPI_HW_INT, {0, {PNE_PM_EXT_INT, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Hardware interrupts */ + {PAPI_STL_ICY, {0, {PNE_PM_0INST_FETCH, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Cycles with No Instruction Issue */ + {PAPI_LD_INS, {0, {PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Load instructions */ + {PAPI_SR_INS, {0, {PNE_PM_ST_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Store instructions */ + {PAPI_LST_INS, {DERIVED_ADD, {PNE_PM_ST_REF_L1, PNE_PM_LD_REF_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /*Load and Store instructions */ + {PAPI_BR_INS, {0, {PNE_PM_BR_ISSUED, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Branch instructions */ + {PAPI_BR_MSP, {DERIVED_ADD, {PNE_PM_BR_MPRED_CR, PNE_PM_BR_MPRED_TA, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Branch mispredictions */ + {PAPI_L1_DCH, {DERIVED_POSTFIX, {PNE_PM_LD_REF_L1, PNE_PM_LD_MISS_L1, PNE_PM_ST_REF_L1, PNE_PM_ST_MISS_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, "N0|N1|-|N2|+|N3|-|"}}, /* Level 1 data cache hits */ + /* no PAPI_L2_STM, PAPI_L2_DCW nor PAPI_L2_DCA since stores/writes to L2 aren't countable */ + {PAPI_L3_DCM, {0, {PNE_PM_DATA_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 data cache misses (reads & writes) */ + {PAPI_L3_LDM, {0, {PNE_PM_DATA_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 data cache read misses */ + {PAPI_L1_ICH, {0, {PNE_PM_INST_FROM_L1, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 1 inst cache hits */ + {PAPI_L3_ICM, {0, {PNE_PM_INST_FROM_MEM, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}}, /* Level 3 inst cache misses */ + {0, {0, {PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL, PAPI_NULL}, {0}}} /* end of list */ +#endif +}; +hwi_search_t *preset_search_map; + +#if defined(_POWER5) || defined(_POWER5p) +unsigned long long pmc_sel_mask[NUM_COUNTER_MASKS] = { + PMC1_SEL_MASK, + PMC2_SEL_MASK, + PMC3_SEL_MASK, + PMC4_SEL_MASK +}; +#else +unsigned long long pmc_sel_mask[NUM_COUNTER_MASKS] = { + PMC1_SEL_MASK, + PMC2_SEL_MASK, + PMC3_SEL_MASK, + PMC4_SEL_MASK, + PMC5_SEL_MASK, + PMC6_SEL_MASK, + PMC7_SEL_MASK, + PMC8_SEL_MASK, + PMC8a_SEL_MASK +}; +#endif + +static void +clear_unused_pmcsel_bits( hwd_control_state_t * cntrl ) +{ + struct perfctr_cpu_control *cpu_ctl = &cntrl->control.cpu_control; + int i; + int num_used_counters = cpu_ctl->nractrs + cpu_ctl->nrictrs; + unsigned int used_counters = 0x0; + for ( i = 0; i < num_used_counters; i++ ) { + used_counters |= 1 << cpu_ctl->pmc_map[i]; + } +#if defined(_POWER5) || defined(_POWER5p) + int freeze_pmc5_pmc6 = 0; /* for Power5 use only */ +#endif + + for ( i = 0; i < MAX_COUNTERS; i++ ) { + unsigned int active_counter = ( ( 1 << i ) & used_counters ); + if ( !active_counter ) { +#if defined(_POWER5) || defined(_POWER5p) + if ( i > 3 ) + freeze_pmc5_pmc6++; + else + cpu_ctl->ppc64.mmcr1 &= pmc_sel_mask[i]; +#else + if ( i < 2 ) { + cpu_ctl->ppc64.mmcr0 &= pmc_sel_mask[i]; + } else { + cpu_ctl->ppc64.mmcr1 &= pmc_sel_mask[i]; + if ( i == ( MAX_COUNTERS - 1 ) ) + cpu_ctl->ppc64.mmcra &= pmc_sel_mask[NUM_COUNTER_MASKS - 1]; + } +#endif + } + } +#if defined(_POWER5) || defined(_POWER5p) + if ( freeze_pmc5_pmc6 == 2 ) + cpu_ctl->ppc64.mmcr0 |= PMC5_PMC6_FREEZE; +#endif +} +static int +set_domain( hwd_control_state_t * cntrl, unsigned int domain ) +{ + int did = 0; + + /* A bit setting of '0' indicates "count this context". + * Start off by turning off counting for all contexts; + * then, selectively re-enable. + */ + cntrl->control.cpu_control.ppc64.mmcr0 |= + PERF_USER | PERF_KERNEL | PERF_HYPERVISOR; + if ( domain & PAPI_DOM_USER ) { + cntrl->control.cpu_control.ppc64.mmcr0 |= PERF_USER; + cntrl->control.cpu_control.ppc64.mmcr0 ^= PERF_USER; + did = 1; + } + if ( domain & PAPI_DOM_KERNEL ) { + cntrl->control.cpu_control.ppc64.mmcr0 |= PERF_KERNEL; + cntrl->control.cpu_control.ppc64.mmcr0 ^= PERF_KERNEL; + did = 1; + } + if ( domain & PAPI_DOM_SUPERVISOR ) { + cntrl->control.cpu_control.ppc64.mmcr0 |= PERF_HYPERVISOR; + cntrl->control.cpu_control.ppc64.mmcr0 ^= PERF_HYPERVISOR; + did = 1; + } + + if ( did ) { + return ( PAPI_OK ); + } else { + return ( PAPI_EINVAL ); + } + +} + + +//extern native_event_entry_t *native_table; +//extern hwi_search_t _papi_hwd_preset_map[]; +extern papi_mdi_t _papi_hwi_system_info; + +#ifdef DEBUG +void +print_control( const struct perfctr_cpu_control *control ) +{ + unsigned int i; + + SUBDBG( "Control used:\n" ); + SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on ); + SUBDBG( "nractrs\t\t\t%u\n", control->nractrs ); + SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs ); + SUBDBG( "mmcr0\t\t\t0x%X\n", control->ppc64.mmcr0 ); + SUBDBG( "mmcr1\t\t\t0x%llX\n", + ( unsigned long long ) control->ppc64.mmcr1 ); + SUBDBG( "mmcra\t\t\t0x%X\n", control->ppc64.mmcra ); + + for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) { + SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] ); + if ( control->ireset[i] ) { + SUBDBG( "ireset[%d]\t%X\n", i, control->ireset[i] ); + } + } + +} +#endif + + +/* Assign the global native and preset table pointers, find the native + table's size in memory and then call the preset setup routine. */ +int +setup_ppc64_presets( int cputype ) +{ + preset_search_map = preset_name_map_PPC64; + return ( _papi_hwi_setup_all_presets( preset_search_map, NULL ) ); +} + +/*called when an EventSet is allocated */ +int +_papi_hwd_init_control_state( hwd_control_state_t * ptr ) +{ + int i = 0; + for ( i = 0; i < _papi_hwi_system_info.sub_info.num_cntrs; i++ ) { + ptr->control.cpu_control.pmc_map[i] = i; + } + ptr->control.cpu_control.tsc_on = 1; + set_domain( ptr, _papi_hwi_system_info.sub_info.default_domain ); + return ( PAPI_OK ); +} + +/* At init time, the higher level library should always allocate and + reserve EventSet zero. */ + + +/* Called once per process. */ +/* No longer needed if not implemented +int _papi_hwd_shutdown_global(void) { + return (PAPI_OK); +} */ + + +/* this function recusively does Modified Bipartite Graph counter allocation + success return 1 + fail return 0 +*/ +static int +do_counter_allocation( ppc64_reg_alloc_t * event_list, int size ) +{ + int i, j, group = -1; + unsigned int map[GROUP_INTS]; + + for ( i = 0; i < GROUP_INTS; i++ ) { + map[i] = event_list[0].ra_group[i]; + } + + for ( i = 1; i < size; i++ ) { + for ( j = 0; j < GROUP_INTS; j++ ) + map[j] &= event_list[i].ra_group[j]; + } + + for ( i = 0; i < GROUP_INTS; i++ ) { + if ( map[i] ) { + group = ffs( map[i] ) - 1 + i * 32; + break; + } + } + + if ( group < 0 ) + return group; /* allocation fail */ + else { + for ( i = 0; i < size; i++ ) { + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( event_list[i].ra_counter_cmd[j] >= 0 + && event_list[i].ra_counter_cmd[j] == + group_map[group].counter_cmd[j] ) + event_list[i].ra_position = j; + } + } + return group; + } +} + + +/* Register allocation */ +int +_papi_hwd_allocate_registers( EventSetInfo_t * ESI ) +{ + hwd_control_state_t *this_state = &ESI->machdep; + int i, j, natNum, index; + ppc64_reg_alloc_t event_list[MAX_COUNTERS]; + int group; + + /* not yet successfully mapped, but have enough slots for events */ + + /* Initialize the local structure needed + for counter allocation and optimization. */ + natNum = ESI->NativeCount; + for ( i = 0; i < natNum; i++ ) { + event_list[i].ra_position = -1; + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( ( index = + native_name_map[ESI->NativeInfoArray[i]. + ni_event & PAPI_NATIVE_AND_MASK].index ) < + 0 ) + return PAPI_ECNFLCT; + event_list[i].ra_counter_cmd[j] = + native_table[index].resources.counter_cmd[j]; + } + for ( j = 0; j < GROUP_INTS; j++ ) { + if ( ( index = + native_name_map[ESI->NativeInfoArray[i]. + ni_event & PAPI_NATIVE_AND_MASK].index ) < + 0 ) + return PAPI_ECNFLCT; + event_list[i].ra_group[j] = native_table[index].resources.group[j]; + } + } + if ( ( group = do_counter_allocation( event_list, natNum ) ) >= 0 ) { /* successfully mapped */ + /* copy counter allocations info back into NativeInfoArray */ + this_state->group_id = group; + for ( i = 0; i < natNum; i++ ) { +// ESI->NativeInfoArray[i].ni_position = event_list[i].ra_position; + this_state->control.cpu_control.pmc_map[i] = + event_list[i].ra_position; + ESI->NativeInfoArray[i].ni_position = i; + } + /* update the control structure based on the NativeInfoArray */ + SUBDBG( "Group ID: %d\n", group ); + + return PAPI_OK; + } else { + return PAPI_ECNFLCT; + } +} + +/* This function clears the current contents of the control structure and + updates it with whatever resources are allocated for all the native events + in the native info structure array. */ +int +_papi_hwd_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * context ) +{ + + + this_state->control.cpu_control.nractrs = + count - this_state->control.cpu_control.nrictrs; + // save control state + unsigned int save_mmcr0_ctlbits = + PERF_CONTROL_MASK & this_state->control.cpu_control.ppc64.mmcr0; + + this_state->control.cpu_control.ppc64.mmcr0 = + group_map[this_state->group_id].mmcr0 | save_mmcr0_ctlbits; + + unsigned long long mmcr1 = + ( ( unsigned long long ) group_map[this_state->group_id].mmcr1U ) << 32; + mmcr1 += group_map[this_state->group_id].mmcr1L; + this_state->control.cpu_control.ppc64.mmcr1 = mmcr1; + + this_state->control.cpu_control.ppc64.mmcra = + group_map[this_state->group_id].mmcra; + + clear_unused_pmcsel_bits( this_state ); + return PAPI_OK; +} + + +int +_papi_hwd_start( hwd_context_t * ctx, hwd_control_state_t * state ) +{ + int error; +/* clear_unused_pmcsel_bits(this_state); moved to update_control_state */ +#ifdef DEBUG + print_control( &state->control.cpu_control ); +#endif + if ( state->rvperfctr != NULL ) { + if ( ( error = + rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) { + SUBDBG( "rvperfctr_control returns: %d\n", error ); + PAPIERROR( RCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + } + if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) { + SUBDBG( "vperfctr_control returns: %d\n", error ); + PAPIERROR( VCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); +} + +int +_papi_hwd_stop( hwd_context_t * ctx, hwd_control_state_t * state ) +{ + if ( state->rvperfctr != NULL ) { + if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) { + PAPIERROR( RCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + } + if ( vperfctr_stop( ctx->perfctr ) < 0 ) { + PAPIERROR( VCNTRL_ERROR ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); +} + +int +_papi_hwd_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp, + int flags ) +{ + if ( flags & PAPI_PAUSED ) { + vperfctr_read_state( ctx->perfctr, &spc->state, NULL ); + } else { + SUBDBG( "vperfctr_read_ctrs\n" ); + if ( spc->rvperfctr != NULL ) { + rvperfctr_read_ctrs( spc->rvperfctr, &spc->state ); + } else { + vperfctr_read_ctrs( ctx->perfctr, &spc->state ); + } + } + + *dp = ( long long * ) spc->state.pmc; +#ifdef DEBUG + { + if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { + int i; + for ( i = 0; + i < + spc->control.cpu_control.nractrs + + spc->control.cpu_control.nrictrs; i++ ) { + SUBDBG( "raw val hardware index %d is %lld\n", i, + ( long long ) spc->state.pmc[i] ); + } + } + } +#endif + return ( PAPI_OK ); +} + + +int +_papi_hwd_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl ) +{ + return ( _papi_hwd_start( ctx, cntrl ) ); +} + + +/* This routine is for shutting down threads, including the + master thread. */ +int +_papi_hwd_shutdown( hwd_context_t * ctx ) +{ + int retval = vperfctr_unlink( ctx->perfctr ); + SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr, + retval ); + vperfctr_close( ctx->perfctr ); + SUBDBG( "_papi_hwd_shutdown vperfctr_close(%p)\n", ctx->perfctr ); + memset( ctx, 0x0, sizeof ( hwd_context_t ) ); + + if ( retval ) + return ( PAPI_ESYS ); + return ( PAPI_OK ); +} + + +/* Perfctr requires that interrupting counters appear at the end of the pmc list + In the case a user wants to interrupt on a counter in an evntset that is not + among the last events, we need to move the perfctr virtual events around to + make it last. This function swaps two perfctr events, and then adjust the + position entries in both the NativeInfoArray and the EventInfoArray to keep + everything consistent. +*/ +static void +swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1, + int cntr2 ) +{ + unsigned int ui; + int si, i, j; + + for ( i = 0; i < ESI->NativeCount; i++ ) { + if ( ESI->NativeInfoArray[i].ni_position == cntr1 ) + ESI->NativeInfoArray[i].ni_position = cntr2; + else if ( ESI->NativeInfoArray[i].ni_position == cntr2 ) + ESI->NativeInfoArray[i].ni_position = cntr1; + } + for ( i = 0; i < ESI->NumberOfEvents; i++ ) { + for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) { + if ( ESI->EventInfoArray[i].pos[j] == cntr1 ) + ESI->EventInfoArray[i].pos[j] = cntr2; + else if ( ESI->EventInfoArray[i].pos[j] == cntr2 ) + ESI->EventInfoArray[i].pos[j] = cntr1; + } + } + ui = contr->cpu_control.pmc_map[cntr1]; + contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2]; + contr->cpu_control.pmc_map[cntr2] = ui; + + si = contr->cpu_control.ireset[cntr1]; + contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2]; + contr->cpu_control.ireset[cntr2] = si; +} + + +int +_papi_hwd_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + hwd_control_state_t *this_state = &ESI->machdep; + struct hwd_pmc_control *contr = &this_state->control; + int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0; + + OVFDBG( "EventIndex=%d, threshold = %d\n", EventIndex, threshold ); + + /* The correct event to overflow is EventIndex */ + ncntrs = _papi_hwi_system_info.sub_info.num_cntrs; + i = ESI->EventInfoArray[EventIndex].pos[0]; + if ( i >= ncntrs ) { + OVFDBG( "Selector id (%d) larger than ncntrs (%d)\n", i, ncntrs ); + return PAPI_EINVAL; + } + if ( threshold != 0 ) { /* Set an overflow threshold */ + if ( ESI->EventInfoArray[EventIndex].derived ) { + OVFDBG( "Can't overflow on a derived event.\n" ); + return PAPI_EINVAL; + } + + if ( ( retval = + _papi_hwi_start_signal( _papi_hwi_system_info.sub_info. + hardware_intr_sig, + NEED_CONTEXT ) ) != PAPI_OK ) + return ( retval ); + + contr->cpu_control.ireset[i] = PMC_OVFL - threshold; + nricntrs = ++contr->cpu_control.nrictrs; + nracntrs = --contr->cpu_control.nractrs; + contr->si_signo = _papi_hwi_system_info.sub_info.hardware_intr_sig; + contr->cpu_control.ppc64.mmcr0 |= PERF_INT_ENABLE; + + /* move this event to the bottom part of the list if needed */ + if ( i < nracntrs ) + swap_events( ESI, contr, i, nracntrs ); + + OVFDBG( "Modified event set\n" ); + } else { + if ( contr->cpu_control.ppc64.mmcr0 & PERF_INT_ENABLE ) { + contr->cpu_control.ireset[i] = 0; + nricntrs = --contr->cpu_control.nrictrs; + nracntrs = ++contr->cpu_control.nractrs; + if ( !nricntrs ) + contr->cpu_control.ppc64.mmcr0 &= ( ~PERF_INT_ENABLE ); + } + /* move this event to the top part of the list if needed */ + if ( i >= nracntrs ) + swap_events( ESI, contr, i, nracntrs - 1 ); + if ( !nricntrs ) + contr->si_signo = 0; + + OVFDBG( "Modified event set\n" ); + + retval = + _papi_hwi_stop_signal( _papi_hwi_system_info.sub_info. + hardware_intr_sig ); + } +#ifdef DEBUG + print_control( &contr->cpu_control ); +#endif + OVFDBG( "%s:%d: Hardware overflow is still experimental.\n", __FILE__, + __LINE__ ); + OVFDBG( "End of call. Exit code: %d\n", retval ); + + return ( retval ); +} + + + +int +_papi_hwd_set_profile( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + /* This function is not used and shouldn't be called. */ + return PAPI_ECMP; +} + + +int +_papi_hwd_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI ) +{ + ESI->profile.overflowcount = 0; + return PAPI_OK; +} + +int +_papi_hwd_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + return set_domain( cntrl, domain ); +} + +/* Routines to support an opaque native event table */ +char * +_papi_hwd_ntv_code_to_name( unsigned int EventCode ) +{ + if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= + _papi_hwi_system_info.sub_info.num_native_events ) + return ( '\0' ); // return a null string for invalid events + return ( native_name_map[EventCode & PAPI_NATIVE_AND_MASK].name ); +} + +int +_papi_hwd_ntv_code_to_bits( unsigned int EventCode, hwd_register_t * bits ) +{ + if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= + _papi_hwi_system_info.sub_info.num_native_events ) { + return ( PAPI_ENOEVNT ); + } + + memcpy( bits, + &native_table[native_name_map[EventCode & PAPI_NATIVE_AND_MASK]. + index].resources, sizeof ( hwd_register_t ) ); + return ( PAPI_OK ); +} + +static void +copy_value( unsigned int val, char *nam, char *names, unsigned int *values, + int len ) +{ + *values = val; + strncpy( names, nam, len ); + names[len - 1] = 0; +} + + +char * +_papi_hwd_ntv_code_to_descr( unsigned int EventCode ) +{ + if ( ( EventCode & PAPI_NATIVE_AND_MASK ) >= + _papi_hwi_system_info.sub_info.num_native_events ) { + return "\0"; + } + return ( native_table + [native_name_map[EventCode & PAPI_NATIVE_AND_MASK].index]. + description ); +} + +int +_papi_hwd_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + if ( modifier == PAPI_ENUM_EVENTS ) { + int index = *EventCode & PAPI_NATIVE_AND_MASK; + if ( index + 1 == MAX_NATNAME_MAP_INDEX ) { + return ( PAPI_ENOEVNT ); + } else { + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } + } else if ( modifier == PAPI_PWR4_ENUM_GROUPS ) { +/* Use this modifier for all supported PPC64 processors. */ + unsigned int group = ( *EventCode & 0x00FF0000 ) >> 16; + int index = *EventCode & 0x000001FF; + int i; + unsigned int tmpg; + + *EventCode = *EventCode & 0xFF00FFFF; + for ( i = 0; i < GROUP_INTS; i++ ) { + tmpg = native_table[index].resources.group[i]; + if ( group != 0 ) { + while ( ( ffs( tmpg ) + i * 32 ) <= group && tmpg != 0 ) + tmpg = tmpg ^ ( 1 << ( ffs( tmpg ) - 1 ) ); + } + if ( tmpg != 0 ) { + group = ffs( tmpg ) + i * 32; + *EventCode = *EventCode | ( group << 16 ); + return ( PAPI_OK ); + } + } + if ( index + 1 == MAX_NATNAME_MAP_INDEX ) { + return ( PAPI_ENOEVNT ); + } + *EventCode = *EventCode + 1; + return ( PAPI_OK ); + } else + return ( PAPI_EINVAL ); +} + +papi_svector_t _ppc64_vector_table[] = { + {( void ( * )( ) ) _papi_hwd_init_control_state, + VEC_PAPI_HWD_INIT_CONTROL_STATE}, + {( void ( * )( ) ) _papi_hwd_allocate_registers, + VEC_PAPI_HWD_ALLOCATE_REGISTERS}, + {( void ( * )( ) ) _papi_hwd_update_control_state, + VEC_PAPI_HWD_UPDATE_CONTROL_STATE}, + {( void ( * )( ) ) _papi_hwd_start, VEC_PAPI_HWD_START}, + {( void ( * )( ) ) _papi_hwd_stop, VEC_PAPI_HWD_STOP}, + {( void ( * )( ) ) _papi_hwd_read, VEC_PAPI_HWD_READ}, + {( void ( * )( ) ) _papi_hwd_reset, VEC_PAPI_HWD_RESET}, + {( void ( * )( ) ) _papi_hwd_shutdown, VEC_PAPI_HWD_SHUTDOWN}, + {( void ( * )( ) ) _papi_hwd_set_overflow, VEC_PAPI_HWD_SET_OVERFLOW}, + {( void ( * )( ) ) _papi_hwd_set_profile, VEC_PAPI_HWD_SET_PROFILE}, + {( void ( * )( ) ) _papi_hwd_stop_profiling, VEC_PAPI_HWD_STOP_PROFILING}, + {( void ( * )( ) ) _papi_hwd_set_domain, VEC_PAPI_HWD_SET_DOMAIN}, + {( void ( * )( ) ) *_papi_hwd_ntv_code_to_name, + VEC_PAPI_HWD_NTV_CODE_TO_NAME}, + {( void ( * )( ) ) _papi_hwd_ntv_code_to_bits, + VEC_PAPI_HWD_NTV_CODE_TO_BITS}, + {( void ( * )( ) ) *_papi_hwd_ntv_code_to_descr, + VEC_PAPI_HWD_NTV_CODE_TO_DESCR}, + {( void ( * )( ) ) *_papi_hwd_ntv_enum_events, + VEC_PAPI_HWD_NTV_ENUM_EVENTS}, + {NULL, VEC_PAPI_END} +}; + +int +ppc64_setup_vector_table( papi_vectors_t * vtable ) +{ + int retval = PAPI_OK; + retval = _papi_hwi_setup_vector_table( vtable, _ppc64_vector_table ); +} diff --git a/src/components/perfctr_ppc/perfctr-ppc64.h b/src/components/perfctr_ppc/perfctr-ppc64.h new file mode 100644 index 0000000..90d3592 --- /dev/null +++ b/src/components/perfctr_ppc/perfctr-ppc64.h @@ -0,0 +1,201 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: perfctr-ppc64.h +* CVS: $Id$ +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +*/ + +#ifndef _PAPI_PERFCTR_PPC64 /* _PAPI_PERFCTR_PPC64 */ +#define _PAPI_PERFCTR_PPC64 + +#if defined(_POWER5) || defined(_POWER5p) +#define MAX_COUNTERS 6 +#define NUM_COUNTER_MASKS 4 +/* masks for PMC1-4 should be AND'ed into MMCR1 */ +#define PMC1_SEL_MASK 0xFFFFFFFF00FFFFFFULL +#define PMC2_SEL_MASK 0xFFFFFFFFFF00FFFFULL +#define PMC3_SEL_MASK 0xFFFFFFFFFFFF00FFULL +#define PMC4_SEL_MASK 0xFFFFFFFFFFFFFF00ULL +/* PMC5_6_FREEZE should be OR'ed into MMCR0 */ +#define PMC5_PMC6_FREEZE 0x00000010UL +#else +#define MAX_COUNTERS 8 +#define NUM_COUNTER_MASKS MAX_COUNTERS+1 +/* assume ppc970 for now */ +#define PMC1_SEL_MASK 0xFFFFF0FFUL +#define PMC2_SEL_MASK 0xFFFFFFE1UL +#define PMC3_SEL_MASK 0xFFFFFFFF87FFFFFFULL +#define PMC4_SEL_MASK 0xFFFFFFFFFC3FFFFFULL +#define PMC5_SEL_MASK 0xFFFFFFFFFFE1FFFFULL +#define PMC6_SEL_MASK 0xFFFFFFFFFFFF0FFFULL +#define PMC7_SEL_MASK 0xFFFFFFFFFFFFF87FULL +#define PMC8_SEL_MASK 0xFFFFFFFFFFFFFFC3ULL +#define PMC8a_SEL_MASK 0xFFFDFFFFUL +#endif + + + +#include "papi.h" +#include "ppc64_events.h" +#include "linux-ppc64.h" +#include "papi_preset.h" +#include "libperfctr.h" + +#define HW_OVERFLOW 1 +//#define PAPI_MAX_STR_LEN 129 + +// control bits MMCR0 +#define PERF_INT_ENABLE 0x0000C000 // enables interrupts on PMC1 as well as PMC2-PMCj (2<=j<=MAX_COUNTERS) +#define PMC_OVFL 0x80000000 +#define PERF_KERNEL 0x40000000 +#define PERF_USER 0x20000000 +#define PERF_HYPERVISOR 0x00000001 +#define PERF_CONTROL_MASK 0xFFFFE001 + + +#define AI_ERROR "No support for a-mode counters after adding an i-mode counter" +#define VOPEN_ERROR "vperfctr_open() returned NULL, please run perfex -i to verify your perfctr installation" +#define GOPEN_ERROR "gperfctr_open() returned NULL" +#define VINFO_ERROR "vperfctr_info() returned < 0" +#define VCNTRL_ERROR "vperfctr_control() returned < 0" +#define RCNTRL_ERROR "rvperfctr_control() returned < 0" +#define GCNTRL_ERROR "gperfctr_control() returned < 0" +#define FOPEN_ERROR "fopen(%s) returned NULL" +#define STATE_MAL_ERROR "Error allocating perfctr structures" +#define MODEL_ERROR "This is not a PowerPC" +#define EVENT_INFO_FILE_ERROR "Event info file error" + +#define MUTEX_LOCKED 1 +#define MUTEX_OPEN 0 + +extern volatile unsigned int lock[]; + +#include + +// similar to __arch_compare_and_exchange_val_32_acq() from libc's atomic.h +static inline unsigned long +_papi_hwd_trylock( unsigned int *lock ) +{ + unsigned long tmp, tmp2; + __asm__ volatile ( " li %1,%3\n" + "1: lwarx %0,0,%2\n" + " cmpwi 0,%0,%4\n" + " bne- 2f\n" + " stwcx. %1,0,%2\n" + " bne- 1b\n" + " isync\n" "2:":"=&r" ( tmp ), "=&r"( tmp2 ) + :"b"( lock ), "i"( MUTEX_LOCKED ), "i"( MUTEX_OPEN ) + :"cr0", "memory" ); + return tmp; +} + +#define _papi_hwd_lock(locknum) \ + do { } while (_papi_hwd_trylock((unsigned int *)(&(lock[(locknum)]))) != MUTEX_OPEN) + +#define _papi_hwd_unlock(locknum) \ + do { \ + __asm__ volatile("lwsync": : :"memory"); \ + lock[(locknum)] = MUTEX_OPEN; \ + } while(0) + + +// prototypes +int setup_ppc64_native_table( void ); + +typedef struct hwd_native +{ + /* index in the native table, required */ + int index; + /* Which counters can be used? */ + unsigned int selector; + /* Rank determines how many counters carry each metric */ + unsigned char rank; + /* which counter this native event stays */ + int position; + int mod; + int link; +} hwd_native_t; + +typedef struct ppc64_reg_alloc +{ + int ra_position; + unsigned int ra_group[GROUP_INTS]; + int ra_counter_cmd[MAX_COUNTERS]; +} ppc64_reg_alloc_t; + + +/* typedefs to conform to hardware independent PAPI code. */ +typedef ppc64_reg_alloc_t hwd_reg_alloc_t; + +typedef struct ppc64_perfctr_control +{ + /* Buffer to pass to the kernel to control the counters */ + int group_id; + /* Interrupt interval */ + int timer_ms; + +// the members below are from perfctr-p3.h + hwd_native_t native[MAX_COUNTERS]; + int native_idx; + unsigned char master_selector; + hwd_register_t allocated_registers; + struct vperfctr_control control; + struct perfctr_sum_ctrs state; + /* Allow attach to be per-eventset. */ + struct rvperfctr *rvperfctr; +} ppc64_perfctr_control_t; + +typedef struct ppc64_perfctr_context +{ + struct vperfctr *perfctr; +} ppc64_perfctr_context_t; + +/* typedefs to conform to hardware independent PAPI code. */ +typedef ppc64_perfctr_control_t hwd_control_state_t; +typedef ppc64_perfctr_context_t hwd_context_t; +#define hwd_pmc_control vperfctr_control + +typedef struct ntv_event +{ + char symbol[PAPI_MAX_STR_LEN]; + unsigned int event_num; + char *short_description; + char *description; +} ntv_event_t; + +typedef struct ntv_event_info +{ + int maxevents[MAX_COUNTERS]; + int maxpmcs; + ntv_event_t *wev[MAX_COUNTERS]; +} ntv_event_info_t; + + +typedef struct event_group +{ + int group_id; + unsigned int mmcr0; + unsigned int mmcr1L; + unsigned int mmcr1U; + unsigned int mmcra; + unsigned int events[MAX_COUNTERS]; +} event_group_t; + +typedef struct ntv_event_group_info +{ + int maxgroups; + event_group_t *event_groups[MAX_GROUPS]; +} ntv_event_group_info_t; + + +// prototypes +ntv_event_info_t *perfctr_get_native_evt_info( void ); +ntv_event_group_info_t *perfctr_get_native_group_info( void ); + +#endif /* _PAPI_PERFCTR_PPC64 */ diff --git a/src/components/perfctr_ppc/power5+_events.h b/src/components/perfctr_ppc/power5+_events.h new file mode 100644 index 0000000..8979c3d --- /dev/null +++ b/src/components/perfctr_ppc/power5+_events.h @@ -0,0 +1,518 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +#ifndef _PAPI_POWER5p_EVENTS_H +#define _PAPI_POWER5p_EVENTS_H + +/* +* File: power5+_events.h +* CVS: +* Author: Corey Ashford +* cjashfor@us.ibm.com +* Mods: +* +* +* (C) Copyright IBM Corporation, 2006, 2007. All Rights Reserved. +* Contributed by Corey Ashford +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "papiStdEventDefs.h" + +#define GROUP_INTS 6 +#define PAPI_MAX_NATIVE_EVENTS 512 +#define MAX_GROUPS (GROUP_INTS * 32) +#define MAX_NATNAME_MAP_INDEX 483 + + +enum native_name +{ + PNE_PM_0INST_CLB_CYC = PAPI_NATIVE_MASK, + PNE_PM_1INST_CLB_CYC, + PNE_PM_1PLUS_PPC_CMPL, + PNE_PM_2INST_CLB_CYC, + PNE_PM_3INST_CLB_CYC, + PNE_PM_4INST_CLB_CYC, + PNE_PM_5INST_CLB_CYC, + PNE_PM_6INST_CLB_CYC, + PNE_PM_BRQ_FULL_CYC, + PNE_PM_BR_ISSUED, + PNE_PM_BR_MPRED_CR, + PNE_PM_BR_MPRED_TA, + PNE_PM_BR_UNCOND, + PNE_PM_CLB_EMPTY_CYC, + PNE_PM_CLB_FULL_CYC, + PNE_PM_CRQ_FULL_CYC, + PNE_PM_CR_MAP_FULL_CYC, + PNE_PM_CYC, + PNE_PM_DATA_FROM_L2, + PNE_PM_DATA_FROM_L25_SHR, + PNE_PM_DATA_FROM_L275_MOD, + PNE_PM_DATA_FROM_L3, + PNE_PM_DATA_FROM_L35_SHR, + PNE_PM_DATA_FROM_L375_MOD, + PNE_PM_DATA_FROM_RMEM, + PNE_PM_DATA_TABLEWALK_CYC, + PNE_PM_DC_INV_L2, + PNE_PM_DC_PREF_OUT_OF_STREAMS, + PNE_PM_DC_PREF_DST, + PNE_PM_DC_PREF_STREAM_ALLOC, + PNE_PM_DSLB_MISS, + PNE_PM_DTLB_MISS, + PNE_PM_DTLB_MISS_4K, + PNE_PM_DTLB_REF, + PNE_PM_DTLB_REF_4K, + PNE_PM_EE_OFF, + PNE_PM_EE_OFF_EXT_INT, + PNE_PM_FAB_CMD_ISSUED, + PNE_PM_FAB_CMD_RETRIED, + PNE_PM_FAB_DCLAIM_ISSUED, + PNE_PM_FAB_DCLAIM_RETRIED, + PNE_PM_FAB_HOLDtoNN_EMPTY, + PNE_PM_FAB_HOLDtoVN_EMPTY, + PNE_PM_FAB_M1toP1_SIDECAR_EMPTY, + PNE_PM_FAB_M1toVNorNN_SIDECAR_EMPTY, + PNE_PM_FAB_P1toM1_SIDECAR_EMPTY, + PNE_PM_FAB_P1toVNorNN_SIDECAR_EMPTY, + PNE_PM_FAB_PNtoNN_DIRECT, + PNE_PM_FAB_PNtoNN_SIDECAR, + PNE_PM_FAB_PNtoVN_DIRECT, + PNE_PM_FAB_PNtoVN_SIDECAR, + PNE_PM_FAB_VBYPASS_EMPTY, + PNE_PM_FLUSH, + PNE_PM_FLUSH_BR_MPRED, + PNE_PM_FLUSH_IMBAL, + PNE_PM_FLUSH_SB, + PNE_PM_FLUSH_SYNC, + PNE_PM_FPR_MAP_FULL_CYC, + PNE_PM_FPU0_1FLOP, + PNE_PM_FPU0_DENORM, + PNE_PM_FPU0_FDIV, + PNE_PM_FPU0_FEST, + PNE_PM_FPU0_FIN, + PNE_PM_FPU0_FMA, + PNE_PM_FPU0_FMOV_FEST, + PNE_PM_FPU0_FPSCR, + PNE_PM_FPU0_FRSP_FCONV, + PNE_PM_FPU0_FSQRT, + PNE_PM_FPU0_FULL_CYC, + PNE_PM_FPU0_SINGLE, + PNE_PM_FPU0_STALL3, + PNE_PM_FPU0_STF, + PNE_PM_FPU1_1FLOP, + PNE_PM_FPU1_DENORM, + PNE_PM_FPU1_FDIV, + PNE_PM_FPU1_FEST, + PNE_PM_FPU1_FIN, + PNE_PM_FPU1_FMA, + PNE_PM_FPU1_FMOV_FEST, + PNE_PM_FPU1_FRSP_FCONV, + PNE_PM_FPU1_FSQRT, + PNE_PM_FPU1_FULL_CYC, + PNE_PM_FPU1_SINGLE, + PNE_PM_FPU1_STALL3, + PNE_PM_FPU1_STF, + PNE_PM_FPU_1FLOP, + PNE_PM_FPU_DENORM, + PNE_PM_FPU_FDIV, + PNE_PM_FPU_FEST, + PNE_PM_FPU_FULL_CYC, + PNE_PM_FPU_SINGLE, + PNE_PM_FXLS0_FULL_CYC, + PNE_PM_FXLS1_FULL_CYC, + PNE_PM_FXLS_FULL_CYC, + PNE_PM_FXU0_FIN, + PNE_PM_FXU1_FIN, + PNE_PM_FXU_IDLE, + PNE_PM_GCT_FULL_CYC, + PNE_PM_GCT_NOSLOT_CYC, + PNE_PM_GCT_USAGE_00to59_CYC, + PNE_PM_GPR_MAP_FULL_CYC, + PNE_PM_GRP_BR_REDIR, + PNE_PM_GRP_BR_REDIR_NONSPEC, + PNE_PM_GRP_DISP_BLK_SB_CYC, + PNE_PM_GRP_DISP_REJECT, + PNE_PM_GRP_DISP_VALID, + PNE_PM_GRP_IC_MISS, + PNE_PM_GRP_IC_MISS_BR_REDIR_NONSPEC, + PNE_PM_GRP_IC_MISS_NONSPEC, + PNE_PM_GRP_MRK, + PNE_PM_IC_DEMAND_L2_BHT_REDIRECT, + PNE_PM_IC_DEMAND_L2_BR_REDIRECT, + PNE_PM_IC_PREF_REQ, + PNE_PM_IERAT_XLATE_WR, + PNE_PM_IERAT_XLATE_WR_LP, + PNE_PM_IOPS_CMPL, + PNE_PM_INST_DISP_ATTEMPT, + PNE_PM_INST_FETCH_CYC, + PNE_PM_INST_FROM_L2, + PNE_PM_INST_FROM_L25_SHR, + PNE_PM_INST_FROM_L2MISS, + PNE_PM_INST_FROM_L3, + PNE_PM_INST_FROM_L35_SHR, + PNE_PM_ISLB_MISS, + PNE_PM_ITLB_MISS, + PNE_PM_L1_DCACHE_RELOAD_VALID, + PNE_PM_L1_PREF, + PNE_PM_L1_WRITE_CYC, + PNE_PM_L2SA_MOD_INV, + PNE_PM_L2SA_MOD_TAG, + PNE_PM_L2SA_RCLD_DISP, + PNE_PM_L2SA_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SA_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SA_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SA_RCST_DISP, + PNE_PM_L2SA_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SA_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SA_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SA_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SA_SHR_INV, + PNE_PM_L2SA_SHR_MOD, + PNE_PM_L2SA_ST_HIT, + PNE_PM_L2SA_ST_REQ, + PNE_PM_L2SB_MOD_INV, + PNE_PM_L2SB_MOD_TAG, + PNE_PM_L2SB_RCLD_DISP, + PNE_PM_L2SB_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SB_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SB_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SB_RCST_DISP, + PNE_PM_L2SB_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SB_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SB_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SB_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SB_SHR_INV, + PNE_PM_L2SB_SHR_MOD, + PNE_PM_L2SB_ST_HIT, + PNE_PM_L2SB_ST_REQ, + PNE_PM_L2SC_MOD_INV, + PNE_PM_L2SC_MOD_TAG, + PNE_PM_L2SC_RCLD_DISP, + PNE_PM_L2SC_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SC_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SC_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SC_RCST_DISP, + PNE_PM_L2SC_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SC_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SC_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SC_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SC_SHR_INV, + PNE_PM_L2SC_SHR_MOD, + PNE_PM_L2SC_ST_HIT, + PNE_PM_L2SC_ST_REQ, + PNE_PM_L2_PREF, + PNE_PM_L3SA_ALL_BUSY, + PNE_PM_L3SA_HIT, + PNE_PM_L3SA_MOD_INV, + PNE_PM_L3SA_MOD_TAG, + PNE_PM_L3SA_REF, + PNE_PM_L3SA_SHR_INV, + PNE_PM_L3SA_SNOOP_RETRY, + PNE_PM_L3SB_ALL_BUSY, + PNE_PM_L3SB_HIT, + PNE_PM_L3SB_MOD_INV, + PNE_PM_L3SB_MOD_TAG, + PNE_PM_L3SB_REF, + PNE_PM_L3SB_SHR_INV, + PNE_PM_L3SB_SNOOP_RETRY, + PNE_PM_L3SC_ALL_BUSY, + PNE_PM_L3SC_HIT, + PNE_PM_L3SC_MOD_INV, + PNE_PM_L3SC_MOD_TAG, + PNE_PM_L3SC_REF, + PNE_PM_L3SC_SHR_INV, + PNE_PM_L3SC_SNOOP_RETRY, + PNE_PM_LARX_LSU0, + PNE_PM_LD_MISS_L1_LSU0, + PNE_PM_LD_MISS_L1_LSU1, + PNE_PM_LD_REF_L1, + PNE_PM_LD_REF_L1_LSU0, + PNE_PM_BR_PRED_TA, + PNE_PM_LR_CTR_MAP_FULL_CYC, + PNE_PM_LSU0_BUSY_REJECT, + PNE_PM_LSU0_DERAT_MISS, + PNE_PM_LSU0_FLUSH_LRQ, + PNE_PM_LSU0_FLUSH_SRQ, + PNE_PM_LSU0_FLUSH_ULD, + PNE_PM_LSU0_FLUSH_UST, + PNE_PM_LSU0_LDF, + PNE_PM_LSU0_NCLD, + PNE_PM_LSU0_REJECT_ERAT_MISS, + PNE_PM_LSU0_REJECT_LMQ_FULL, + PNE_PM_LSU0_REJECT_RELOAD_CDF, + PNE_PM_LSU0_REJECT_SRQ, + PNE_PM_LSU0_SRQ_STFWD, + PNE_PM_LSU1_BUSY_REJECT, + PNE_PM_LSU1_DERAT_MISS, + PNE_PM_LSU1_FLUSH_LRQ, + PNE_PM_LSU1_FLUSH_SRQ, + PNE_PM_LSU1_FLUSH_ULD, + PNE_PM_LSU1_FLUSH_UST, + PNE_PM_LSU1_LDF, + PNE_PM_LSU1_NCLD, + PNE_PM_LSU1_REJECT_ERAT_MISS, + PNE_PM_LSU1_REJECT_LMQ_FULL, + PNE_PM_LSU1_REJECT_RELOAD_CDF, + PNE_PM_LSU1_REJECT_SRQ, + PNE_PM_LSU1_SRQ_STFWD, + PNE_PM_LSU_FLUSH, + PNE_PM_LSU_FLUSH_LRQ_FULL, + PNE_PM_LSU_FLUSH_SRQ, + PNE_PM_LSU_FLUSH_SRQ_FULL, + PNE_PM_LSU_FLUSH_ULD, + PNE_PM_LSU_LDF, + PNE_PM_LSU_LMQ_FULL_CYC, + PNE_PM_LSU_LMQ_LHR_MERGE, + PNE_PM_LSU_LMQ_S0_ALLOC, + PNE_PM_LSU_LMQ_S0_VALID, + PNE_PM_LSU_LRQ_FULL_CYC, + PNE_PM_LSU_LRQ_S0_ALLOC, + PNE_PM_LSU_LRQ_S0_VALID, + PNE_PM_LSU_REJECT_ERAT_MISS, + PNE_PM_LSU_REJECT_SRQ, + PNE_PM_LSU_SRQ_FULL_CYC, + PNE_PM_LSU_SRQ_S0_ALLOC, + PNE_PM_LSU_SRQ_S0_VALID, + PNE_PM_LSU_SRQ_SYNC_CYC, + PNE_PM_LWSYNC_HELD, + PNE_PM_MEM_FAST_PATH_RD_DISP, + PNE_PM_IC_PREF_INSTALL, + PNE_PM_MEM_HI_PRIO_WR_CMPL, + PNE_PM_MEM_NONSPEC_RD_CANCEL, + PNE_PM_MEM_LO_PRIO_WR_CMPL, + PNE_PM_MEM_PWQ_DISP, + PNE_PM_MEM_PWQ_DISP_Q2or3, + PNE_PM_MEM_PW_CMPL, + PNE_PM_MEM_PW_GATH, + PNE_PM_MEM_RQ_DISP_Q0to3, + PNE_PM_MEM_RQ_DISP, + PNE_PM_MEM_RQ_DISP_Q4to7, + PNE_PM_MEM_RQ_DISP_Q8to11, + PNE_PM_MEM_SPEC_RD_CANCEL, + PNE_PM_MEM_WQ_DISP_Q0to7, + PNE_PM_MEM_WQ_DISP_Q8to15, + PNE_PM_MEM_WQ_DISP_DCLAIM, + PNE_PM_MEM_WQ_DISP_WRITE, + PNE_PM_MRK_DATA_FROM_L2, + PNE_PM_MRK_DATA_FROM_L25_SHR, + PNE_PM_MRK_DATA_FROM_L275_MOD, + PNE_PM_MRK_DATA_FROM_L3, + PNE_PM_MRK_DATA_FROM_L35_SHR, + PNE_PM_MRK_DATA_FROM_L375_MOD, + PNE_PM_MRK_DATA_FROM_RMEM, + PNE_PM_MRK_DSLB_MISS, + PNE_PM_MRK_DTLB_MISS, + PNE_PM_MRK_DTLB_MISS_4K, + PNE_PM_MRK_DTLB_REF, + PNE_PM_MRK_DTLB_REF_4K, + PNE_PM_MRK_GRP_DISP, + PNE_PM_MRK_GRP_ISSUED, + PNE_PM_MRK_IMR_RELOAD, + PNE_PM_MRK_L1_RELOAD_VALID, + PNE_PM_MRK_LD_MISS_L1, + PNE_PM_MRK_LD_MISS_L1_LSU0, + PNE_PM_MRK_LD_MISS_L1_LSU1, + PNE_PM_MRK_LSU0_FLUSH_LRQ, + PNE_PM_MRK_LSU0_FLUSH_SRQ, + PNE_PM_MRK_LSU0_FLUSH_ULD, + PNE_PM_MRK_LSU0_FLUSH_UST, + PNE_PM_MRK_LSU1_FLUSH_LRQ, + PNE_PM_MRK_LSU1_FLUSH_SRQ, + PNE_PM_MRK_LSU1_FLUSH_ULD, + PNE_PM_MRK_LSU1_FLUSH_UST, + PNE_PM_MRK_LSU_FLUSH_ULD, + PNE_PM_MRK_LSU_SRQ_INST_VALID, + PNE_PM_MRK_STCX_FAIL, + PNE_PM_MRK_ST_CMPL, + PNE_PM_MRK_ST_MISS_L1, + PNE_PM_PMC4_OVERFLOW, + PNE_PM_PMC5_OVERFLOW, + PNE_PM_INST_CMPL, + PNE_PM_PTEG_FROM_L2, + PNE_PM_PTEG_FROM_L25_SHR, + PNE_PM_PTEG_FROM_L275_MOD, + PNE_PM_PTEG_FROM_L3, + PNE_PM_PTEG_FROM_L35_SHR, + PNE_PM_PTEG_FROM_L375_MOD, + PNE_PM_PTEG_FROM_RMEM, + PNE_PM_PTEG_RELOAD_VALID, + PNE_PM_RUN_CYC, + PNE_PM_SNOOP_DCLAIM_RETRY_QFULL, + PNE_PM_SNOOP_PARTIAL_RTRY_QFULL, + PNE_PM_SNOOP_PW_RETRY_RQ, + PNE_PM_SNOOP_PW_RETRY_WQ_PWQ, + PNE_PM_SNOOP_RD_RETRY_QFULL, + PNE_PM_SNOOP_RD_RETRY_RQ, + PNE_PM_SNOOP_RD_RETRY_WQ, + PNE_PM_SNOOP_RETRY_1AHEAD, + PNE_PM_SNOOP_TLBIE, + PNE_PM_SNOOP_WR_RETRY_QFULL, + PNE_PM_SNOOP_WR_RETRY_RQ, + PNE_PM_SNOOP_WR_RETRY_WQ, + PNE_PM_STCX_FAIL, + PNE_PM_STCX_PASS, + PNE_PM_ST_MISS_L1, + PNE_PM_ST_REF_L1_LSU0, + PNE_PM_ST_REF_L1_LSU1, + PNE_PM_SUSPENDED, + PNE_PM_TB_BIT_TRANS, + PNE_PM_THRD_L2MISS_BOTH_CYC, + PNE_PM_THRD_ONE_RUN_CYC, + PNE_PM_THRD_PRIO_1_CYC, + PNE_PM_THRD_PRIO_2_CYC, + PNE_PM_THRD_PRIO_3_CYC, + PNE_PM_THRD_PRIO_4_CYC, + PNE_PM_THRD_PRIO_5_CYC, + PNE_PM_THRD_PRIO_6_CYC, + PNE_PM_THRD_PRIO_7_CYC, + PNE_PM_THRD_PRIO_DIFF_0_CYC, + PNE_PM_THRD_PRIO_DIFF_1or2_CYC, + PNE_PM_THRD_PRIO_DIFF_3or4_CYC, + PNE_PM_THRD_PRIO_DIFF_5or6_CYC, + PNE_PM_THRD_PRIO_DIFF_minus1or2_CYC, + PNE_PM_THRD_PRIO_DIFF_minus3or4_CYC, + PNE_PM_THRD_PRIO_DIFF_minus5or6_CYC, + PNE_PM_THRD_SEL_OVER_CLB_EMPTY, + PNE_PM_THRD_SEL_OVER_GCT_IMBAL, + PNE_PM_THRD_SEL_OVER_ISU_HOLD, + PNE_PM_THRD_SEL_OVER_L2MISS, + PNE_PM_THRD_SEL_T0, + PNE_PM_THRD_SEL_T1, + PNE_PM_THRD_SMT_HANG, + PNE_PM_TLBIE_HELD, + PNE_PM_TLB_MISS, + PNE_PM_XER_MAP_FULL_CYC, + PNE_PM_BR_PRED_CR, + PNE_PM_MEM_RQ_DISP_Q12to15, + PNE_PM_MEM_RQ_DISP_Q16to19, + PNE_PM_SNOOP_RETRY_AB_COLLISION, + PNE_PM_CMPLU_STALL_DCACHE_MISS, + PNE_PM_CMPLU_STALL_FDIV, + PNE_PM_CMPLU_STALL_FXU, + PNE_PM_CMPLU_STALL_LSU, + PNE_PM_DATA_FROM_L25_MOD, + PNE_PM_DATA_FROM_L35_MOD, + PNE_PM_DATA_FROM_LMEM, + PNE_PM_DTLB_MISS_64K, + PNE_PM_DTLB_REF_64K, + PNE_PM_FPU_FMA, + PNE_PM_FPU_FRSP_FCONV, + PNE_PM_FPU_FSQRT, + PNE_PM_FPU_STALL3, + PNE_PM_FPU_STF, + PNE_PM_FXU_BUSY, + PNE_PM_MRK_FXU_FIN, + PNE_PM_GCT_EMPTY_CYC, + PNE_PM_GCT_NOSLOT_IC_MISS, + PNE_PM_GCT_USAGE_60to79_CYC, + PNE_PM_GRP_DISP, + PNE_PM_HV_CYC, + PNE_PM_INST_FROM_L1, + PNE_PM_INST_FROM_L25_MOD, + PNE_PM_INST_FROM_L35_MOD, + PNE_PM_INST_FROM_LMEM, + PNE_PM_LSU_BUSY_REJECT, + PNE_PM_LSU_DERAT_MISS, + PNE_PM_LSU_FLUSH_LRQ, + PNE_PM_LSU_FLUSH_UST, + PNE_PM_LSU_LMQ_SRQ_EMPTY_CYC, + PNE_PM_LSU_REJECT_LMQ_FULL, + PNE_PM_LSU_REJECT_RELOAD_CDF, + PNE_PM_LSU_SRQ_STFWD, + PNE_PM_MRK_BRU_FIN, + PNE_PM_MRK_DATA_FROM_L25_MOD, + PNE_PM_MRK_DATA_FROM_L25_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L275_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L2_CYC, + PNE_PM_MRK_DATA_FROM_L35_MOD, + PNE_PM_MRK_DATA_FROM_L35_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L375_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L3_CYC, + PNE_PM_MRK_DATA_FROM_LMEM, + PNE_PM_MRK_DTLB_MISS_64K, + PNE_PM_MRK_DTLB_REF_64K, + PNE_PM_MRK_GRP_BR_REDIR, + PNE_PM_MRK_LSU_FLUSH_UST, + PNE_PM_MRK_ST_GPS, + PNE_PM_PMC1_OVERFLOW, + PNE_PM_PTEG_FROM_L25_MOD, + PNE_PM_PTEG_FROM_L35_MOD, + PNE_PM_PTEG_FROM_LMEM, + PNE_PM_SLB_MISS, + PNE_PM_ST_REF_L1, + PNE_PM_THRD_GRP_CMPL_BOTH_CYC, + PNE_PM_DATA_FROM_L275_SHR, + PNE_PM_DATA_FROM_L2MISS, + PNE_PM_DATA_FROM_L375_SHR, + PNE_PM_DTLB_MISS_16M, + PNE_PM_DTLB_REF_16M, + PNE_PM_FPU_FMOV_FEST, + PNE_PM_FXU0_BUSY_FXU1_IDLE, + PNE_PM_FXU_FIN, + PNE_PM_GCT_NOSLOT_SRQ_FULL, + PNE_PM_GCT_USAGE_80to99_CYC, + PNE_PM_GRP_CMPL, + PNE_PM_GRP_DISP_SUCCESS, + PNE_PM_INST_DISP, + PNE_PM_INST_FROM_L275_SHR, + PNE_PM_INST_FROM_L375_SHR, + PNE_PM_INST_FROM_PREF, + PNE_PM_LD_MISS_L1, + PNE_PM_MRK_DATA_FROM_L275_SHR, + PNE_PM_MRK_DATA_FROM_L2MISS, + PNE_PM_MRK_DATA_FROM_L375_SHR, + PNE_PM_MRK_DTLB_MISS_16M, + PNE_PM_MRK_DTLB_REF_16M, + PNE_PM_MRK_FPU_FIN, + PNE_PM_MRK_INST_FIN, + PNE_PM_MRK_LSU_FLUSH_LRQ, + PNE_PM_MRK_ST_CMPL_INT, + PNE_PM_PMC2_OVERFLOW, + PNE_PM_PMC6_OVERFLOW, + PNE_PM_PTEG_FROM_L275_SHR, + PNE_PM_PTEG_FROM_L2MISS, + PNE_PM_PTEG_FROM_L375_SHR, + PNE_PM_STOP_COMPLETION, + PNE_PM_THRESH_TIMEO, + PNE_PM_0INST_FETCH, + PNE_PM_BR_PRED_CR_TA, + PNE_PM_CMPLU_STALL_DIV, + PNE_PM_CMPLU_STALL_ERAT_MISS, + PNE_PM_CMPLU_STALL_FPU, + PNE_PM_CMPLU_STALL_REJECT, + PNE_PM_DTLB_MISS_16G, + PNE_PM_DTLB_REF_16G, + PNE_PM_EXT_INT, + PNE_PM_FPU_FIN, + PNE_PM_FXU1_BUSY_FXU0_IDLE, + PNE_PM_GCT_NOSLOT_BR_MPRED, + PNE_PM_INST_FROM_L275_MOD, + PNE_PM_INST_FROM_L375_MOD, + PNE_PM_INST_FROM_RMEM, + PNE_PM_LSU_SRQ_EMPTY_CYC, + PNE_PM_MRK_CRU_FIN, + PNE_PM_MRK_DATA_FROM_L25_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L275_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L35_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L375_MOD_CYC, + PNE_PM_MRK_DATA_FROM_LMEM_CYC, + PNE_PM_MRK_DATA_FROM_RMEM_CYC, + PNE_PM_MRK_DTLB_MISS_16G, + PNE_PM_MRK_DTLB_REF_16G, + PNE_PM_MRK_GRP_CMPL, + PNE_PM_MRK_GRP_IC_MISS, + PNE_PM_MRK_GRP_TIMEO, + PNE_PM_MRK_LSU_FIN, + PNE_PM_MRK_LSU_FLUSH_SRQ, + PNE_PM_PMC3_OVERFLOW, + PNE_PM_WORK_HELD, + PNE_PM_RUN_INST_CMPL, + NATNAME_GUARD, +}; + +#endif diff --git a/src/components/perfctr_ppc/power5+_events_map.c b/src/components/perfctr_ppc/power5+_events_map.c new file mode 100644 index 0000000..9fcb6b5 --- /dev/null +++ b/src/components/perfctr_ppc/power5+_events_map.c @@ -0,0 +1,986 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: power5+_events_map.c +* Author: Eric Kjeldergaard +* kjelderg@linux.ibm.com +* Mods: +* +* +* Copyright (c) International Business Machines, 2006. +* Contributed by Eric Kjeldergaard +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "perfctr-ppc64.h" + +PPC64_native_map_t native_name_map[MAX_NATNAME_MAP_INDEX] = { + {"PM_0INST_CLB_CYC", -1} + , + {"PM_1INST_CLB_CYC", -1} + , + {"PM_1PLUS_PPC_CMPL", -1} + , + {"PM_2INST_CLB_CYC", -1} + , + {"PM_3INST_CLB_CYC", -1} + , + {"PM_4INST_CLB_CYC", -1} + , + {"PM_5INST_CLB_CYC", -1} + , + {"PM_6INST_CLB_CYC", -1} + , + {"PM_BRQ_FULL_CYC", -1} + , + {"PM_BR_ISSUED", -1} + , + {"PM_BR_MPRED_CR", -1} + , + {"PM_BR_MPRED_TA", -1} + , + {"PM_BR_UNCOND", -1} + , + {"PM_CLB_EMPTY_CYC", -1} + , + {"PM_CLB_FULL_CYC", -1} + , + {"PM_CRQ_FULL_CYC", -1} + , + {"PM_CR_MAP_FULL_CYC", -1} + , + {"PM_CYC", -1} + , + {"PM_DATA_FROM_L2", -1} + , + {"PM_DATA_FROM_L25_SHR", -1} + , + {"PM_DATA_FROM_L275_MOD", -1} + , + {"PM_DATA_FROM_L3", -1} + , + {"PM_DATA_FROM_L35_SHR", -1} + , + {"PM_DATA_FROM_L375_MOD", -1} + , + {"PM_DATA_FROM_RMEM", -1} + , + {"PM_DATA_TABLEWALK_CYC", -1} + , + {"PM_DC_INV_L2", -1} + , + {"PM_DC_PREF_OUT_OF_STREAMS", -1} + , + {"PM_DC_PREF_DST", -1} + , + {"PM_DC_PREF_STREAM_ALLOC", -1} + , + {"PM_DSLB_MISS", -1} + , + {"PM_DTLB_MISS", -1} + , + {"PM_DTLB_MISS_4K", -1} + , + {"PM_DTLB_REF", -1} + , + {"PM_DTLB_REF_4K", -1} + , + {"PM_EE_OFF", -1} + , + {"PM_EE_OFF_EXT_INT", -1} + , + {"PM_FAB_CMD_ISSUED", -1} + , + {"PM_FAB_CMD_RETRIED", -1} + , + {"PM_FAB_DCLAIM_ISSUED", -1} + , + {"PM_FAB_DCLAIM_RETRIED", -1} + , + {"PM_FAB_HOLDtoNN_EMPTY", -1} + , + {"PM_FAB_HOLDtoVN_EMPTY", -1} + , + {"PM_FAB_M1toP1_SIDECAR_EMPTY", -1} + , + {"PM_FAB_M1toVNorNN_SIDECAR_EMPTY", -1} + , + {"PM_FAB_P1toM1_SIDECAR_EMPTY", -1} + , + {"PM_FAB_P1toVNorNN_SIDECAR_EMPTY", -1} + , + {"PM_FAB_PNtoNN_DIRECT", -1} + , + {"PM_FAB_PNtoNN_SIDECAR", -1} + , + {"PM_FAB_PNtoVN_DIRECT", -1} + , + {"PM_FAB_PNtoVN_SIDECAR", -1} + , + {"PM_FAB_VBYPASS_EMPTY", -1} + , + {"PM_FLUSH", -1} + , + {"PM_FLUSH_BR_MPRED", -1} + , + {"PM_FLUSH_IMBAL", -1} + , + {"PM_FLUSH_SB", -1} + , + {"PM_FLUSH_SYNC", -1} + , + {"PM_FPR_MAP_FULL_CYC", -1} + , + {"PM_FPU0_1FLOP", -1} + , + {"PM_FPU0_DENORM", -1} + , + {"PM_FPU0_FDIV", -1} + , + {"PM_FPU0_FEST", -1} + , + {"PM_FPU0_FIN", -1} + , + {"PM_FPU0_FMA", -1} + , + {"PM_FPU0_FMOV_FEST", -1} + , + {"PM_FPU0_FPSCR", -1} + , + {"PM_FPU0_FRSP_FCONV", -1} + , + {"PM_FPU0_FSQRT", -1} + , + {"PM_FPU0_FULL_CYC", -1} + , + {"PM_FPU0_SINGLE", -1} + , + {"PM_FPU0_STALL3", -1} + , + {"PM_FPU0_STF", -1} + , + {"PM_FPU1_1FLOP", -1} + , + {"PM_FPU1_DENORM", -1} + , + {"PM_FPU1_FDIV", -1} + , + {"PM_FPU1_FEST", -1} + , + {"PM_FPU1_FIN", -1} + , + {"PM_FPU1_FMA", -1} + , + {"PM_FPU1_FMOV_FEST", -1} + , + {"PM_FPU1_FRSP_FCONV", -1} + , + {"PM_FPU1_FSQRT", -1} + , + {"PM_FPU1_FULL_CYC", -1} + , + {"PM_FPU1_SINGLE", -1} + , + {"PM_FPU1_STALL3", -1} + , + {"PM_FPU1_STF", -1} + , + {"PM_FPU_1FLOP", -1} + , + {"PM_FPU_DENORM", -1} + , + {"PM_FPU_FDIV", -1} + , + {"PM_FPU_FEST", -1} + , + {"PM_FPU_FULL_CYC", -1} + , + {"PM_FPU_SINGLE", -1} + , + {"PM_FXLS0_FULL_CYC", -1} + , + {"PM_FXLS1_FULL_CYC", -1} + , + {"PM_FXLS_FULL_CYC", -1} + , + {"PM_FXU0_FIN", -1} + , + {"PM_FXU1_FIN", -1} + , + {"PM_FXU_IDLE", -1} + , + {"PM_GCT_FULL_CYC", -1} + , + {"PM_GCT_NOSLOT_CYC", -1} + , + {"PM_GCT_USAGE_00to59_CYC", -1} + , + {"PM_GPR_MAP_FULL_CYC", -1} + , + {"PM_GRP_BR_REDIR", -1} + , + {"PM_GRP_BR_REDIR_NONSPEC", -1} + , + {"PM_GRP_DISP_BLK_SB_CYC", -1} + , + {"PM_GRP_DISP_REJECT", -1} + , + {"PM_GRP_DISP_VALID", -1} + , + {"PM_GRP_IC_MISS", -1} + , + {"PM_GRP_IC_MISS_BR_REDIR_NONSPEC", -1} + , + {"PM_GRP_IC_MISS_NONSPEC", -1} + , + {"PM_GRP_MRK", -1} + , + {"PM_IC_DEMAND_L2_BHT_REDIRECT", -1} + , + {"PM_IC_DEMAND_L2_BR_REDIRECT", -1} + , + {"PM_IC_PREF_REQ", -1} + , + {"PM_IERAT_XLATE_WR", -1} + , + {"PM_IERAT_XLATE_WR_LP", -1} + , + {"PM_IOPS_CMPL", -1} + , + {"PM_INST_DISP_ATTEMPT", -1} + , + {"PM_INST_FETCH_CYC", -1} + , + {"PM_INST_FROM_L2", -1} + , + {"PM_INST_FROM_L25_SHR", -1} + , + {"PM_INST_FROM_L2MISS", -1} + , + {"PM_INST_FROM_L3", -1} + , + {"PM_INST_FROM_L35_SHR", -1} + , + {"PM_ISLB_MISS", -1} + , + {"PM_ITLB_MISS", -1} + , + {"PM_L1_DCACHE_RELOAD_VALID", -1} + , + {"PM_L1_PREF", -1} + , + {"PM_L1_WRITE_CYC", -1} + , + {"PM_L2SA_MOD_INV", -1} + , + {"PM_L2SA_MOD_TAG", -1} + , + {"PM_L2SA_RCLD_DISP", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SA_RCST_DISP", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SA_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SA_SHR_INV", -1} + , + {"PM_L2SA_SHR_MOD", -1} + , + {"PM_L2SA_ST_HIT", -1} + , + {"PM_L2SA_ST_REQ", -1} + , + {"PM_L2SB_MOD_INV", -1} + , + {"PM_L2SB_MOD_TAG", -1} + , + {"PM_L2SB_RCLD_DISP", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SB_RCST_DISP", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SB_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SB_SHR_INV", -1} + , + {"PM_L2SB_SHR_MOD", -1} + , + {"PM_L2SB_ST_HIT", -1} + , + {"PM_L2SB_ST_REQ", -1} + , + {"PM_L2SC_MOD_INV", -1} + , + {"PM_L2SC_MOD_TAG", -1} + , + {"PM_L2SC_RCLD_DISP", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SC_RCST_DISP", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SC_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SC_SHR_INV", -1} + , + {"PM_L2SC_SHR_MOD", -1} + , + {"PM_L2SC_ST_HIT", -1} + , + {"PM_L2SC_ST_REQ", -1} + , + {"PM_L2_PREF", -1} + , + {"PM_L3SA_ALL_BUSY", -1} + , + {"PM_L3SA_HIT", -1} + , + {"PM_L3SA_MOD_INV", -1} + , + {"PM_L3SA_MOD_TAG", -1} + , + {"PM_L3SA_REF", -1} + , + {"PM_L3SA_SHR_INV", -1} + , + {"PM_L3SA_SNOOP_RETRY", -1} + , + {"PM_L3SB_ALL_BUSY", -1} + , + {"PM_L3SB_HIT", -1} + , + {"PM_L3SB_MOD_INV", -1} + , + {"PM_L3SB_MOD_TAG", -1} + , + {"PM_L3SB_REF", -1} + , + {"PM_L3SB_SHR_INV", -1} + , + {"PM_L3SB_SNOOP_RETRY", -1} + , + {"PM_L3SC_ALL_BUSY", -1} + , + {"PM_L3SC_HIT", -1} + , + {"PM_L3SC_MOD_INV", -1} + , + {"PM_L3SC_MOD_TAG", -1} + , + {"PM_L3SC_REF", -1} + , + {"PM_L3SC_SHR_INV", -1} + , + {"PM_L3SC_SNOOP_RETRY", -1} + , + {"PM_LARX_LSU0", -1} + , + {"PM_LD_MISS_L1_LSU0", -1} + , + {"PM_LD_MISS_L1_LSU1", -1} + , + {"PM_LD_REF_L1", -1} + , + {"PM_LD_REF_L1_LSU0", -1} + , + {"PM_BR_PRED_TA", -1} + , + {"PM_LR_CTR_MAP_FULL_CYC", -1} + , + {"PM_LSU0_BUSY_REJECT", -1} + , + {"PM_LSU0_DERAT_MISS", -1} + , + {"PM_LSU0_FLUSH_LRQ", -1} + , + {"PM_LSU0_FLUSH_SRQ", -1} + , + {"PM_LSU0_FLUSH_ULD", -1} + , + {"PM_LSU0_FLUSH_UST", -1} + , + {"PM_LSU0_LDF", -1} + , + {"PM_LSU0_NCLD", -1} + , + {"PM_LSU0_REJECT_ERAT_MISS", -1} + , + {"PM_LSU0_REJECT_LMQ_FULL", -1} + , + {"PM_LSU0_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU0_REJECT_SRQ", -1} + , + {"PM_LSU0_SRQ_STFWD", -1} + , + {"PM_LSU1_BUSY_REJECT", -1} + , + {"PM_LSU1_DERAT_MISS", -1} + , + {"PM_LSU1_FLUSH_LRQ", -1} + , + {"PM_LSU1_FLUSH_SRQ", -1} + , + {"PM_LSU1_FLUSH_ULD", -1} + , + {"PM_LSU1_FLUSH_UST", -1} + , + {"PM_LSU1_LDF", -1} + , + {"PM_LSU1_NCLD", -1} + , + {"PM_LSU1_REJECT_ERAT_MISS", -1} + , + {"PM_LSU1_REJECT_LMQ_FULL", -1} + , + {"PM_LSU1_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU1_REJECT_SRQ", -1} + , + {"PM_LSU1_SRQ_STFWD", -1} + , + {"PM_LSU_FLUSH", -1} + , + {"PM_LSU_FLUSH_LRQ_FULL", -1} + , + {"PM_LSU_FLUSH_SRQ", -1} + , + {"PM_LSU_FLUSH_SRQ_FULL", -1} + , + {"PM_LSU_FLUSH_ULD", -1} + , + {"PM_LSU_LDF", -1} + , + {"PM_LSU_LMQ_FULL_CYC", -1} + , + {"PM_LSU_LMQ_LHR_MERGE", -1} + , + {"PM_LSU_LMQ_S0_ALLOC", -1} + , + {"PM_LSU_LMQ_S0_VALID", -1} + , + {"PM_LSU_LRQ_FULL_CYC", -1} + , + {"PM_LSU_LRQ_S0_ALLOC", -1} + , + {"PM_LSU_LRQ_S0_VALID", -1} + , + {"PM_LSU_REJECT_ERAT_MISS", -1} + , + {"PM_LSU_REJECT_SRQ", -1} + , + {"PM_LSU_SRQ_FULL_CYC", -1} + , + {"PM_LSU_SRQ_S0_ALLOC", -1} + , + {"PM_LSU_SRQ_S0_VALID", -1} + , + {"PM_LSU_SRQ_SYNC_CYC", -1} + , + {"PM_LWSYNC_HELD", -1} + , + {"PM_MEM_FAST_PATH_RD_DISP", -1} + , + {"PM_IC_PREF_INSTALL", -1} + , + {"PM_MEM_HI_PRIO_WR_CMPL", -1} + , + {"PM_MEM_NONSPEC_RD_CANCEL", -1} + , + {"PM_MEM_LO_PRIO_WR_CMPL", -1} + , + {"PM_MEM_PWQ_DISP", -1} + , + {"PM_MEM_PWQ_DISP_Q2or3", -1} + , + {"PM_MEM_PW_CMPL", -1} + , + {"PM_MEM_PW_GATH", -1} + , + {"PM_MEM_RQ_DISP_Q0to3", -1} + , + {"PM_MEM_RQ_DISP", -1} + , + {"PM_MEM_RQ_DISP_Q4to7", -1} + , + {"PM_MEM_RQ_DISP_Q8to11", -1} + , + {"PM_MEM_SPEC_RD_CANCEL", -1} + , + {"PM_MEM_WQ_DISP_Q0to7", -1} + , + {"PM_MEM_WQ_DISP_Q8to15", -1} + , + {"PM_MEM_WQ_DISP_DCLAIM", -1} + , + {"PM_MEM_WQ_DISP_WRITE", -1} + , + {"PM_MRK_DATA_FROM_L2", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR", -1} + , + {"PM_MRK_DATA_FROM_L275_MOD", -1} + , + {"PM_MRK_DATA_FROM_L3", -1} + , + {"PM_MRK_DATA_FROM_L35_SHR", -1} + , + {"PM_MRK_DATA_FROM_L375_MOD", -1} + , + {"PM_MRK_DATA_FROM_RMEM", -1} + , + {"PM_MRK_DSLB_MISS", -1} + , + {"PM_MRK_DTLB_MISS", -1} + , + {"PM_MRK_DTLB_MISS_4K", -1} + , + {"PM_MRK_DTLB_REF", -1} + , + {"PM_MRK_DTLB_REF_4K", -1} + , + {"PM_MRK_GRP_DISP", -1} + , + {"PM_MRK_GRP_ISSUED", -1} + , + {"PM_MRK_IMR_RELOAD", -1} + , + {"PM_MRK_L1_RELOAD_VALID", -1} + , + {"PM_MRK_LD_MISS_L1", -1} + , + {"PM_MRK_LD_MISS_L1_LSU0", -1} + , + {"PM_MRK_LD_MISS_L1_LSU1", -1} + , + {"PM_MRK_LSU0_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_ULD", -1} + , + {"PM_MRK_LSU0_FLUSH_UST", -1} + , + {"PM_MRK_LSU1_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_ULD", -1} + , + {"PM_MRK_LSU1_FLUSH_UST", -1} + , + {"PM_MRK_LSU_FLUSH_ULD", -1} + , + {"PM_MRK_LSU_SRQ_INST_VALID", -1} + , + {"PM_MRK_STCX_FAIL", -1} + , + {"PM_MRK_ST_CMPL", -1} + , + {"PM_MRK_ST_MISS_L1", -1} + , + {"PM_PMC4_OVERFLOW", -1} + , + {"PM_PMC5_OVERFLOW", -1} + , + {"PM_INST_CMPL", -1} + , + {"PM_PTEG_FROM_L2", -1} + , + {"PM_PTEG_FROM_L25_SHR", -1} + , + {"PM_PTEG_FROM_L275_MOD", -1} + , + {"PM_PTEG_FROM_L3", -1} + , + {"PM_PTEG_FROM_L35_SHR", -1} + , + {"PM_PTEG_FROM_L375_MOD", -1} + , + {"PM_PTEG_FROM_RMEM", -1} + , + {"PM_PTEG_RELOAD_VALID", -1} + , + {"PM_RUN_CYC", -1} + , + {"PM_SNOOP_DCLAIM_RETRY_QFULL", -1} + , + {"PM_SNOOP_PARTIAL_RTRY_QFULL", -1} + , + {"PM_SNOOP_PW_RETRY_RQ", -1} + , + {"PM_SNOOP_PW_RETRY_WQ_PWQ", -1} + , + {"PM_SNOOP_RD_RETRY_QFULL", -1} + , + {"PM_SNOOP_RD_RETRY_RQ", -1} + , + {"PM_SNOOP_RD_RETRY_WQ", -1} + , + {"PM_SNOOP_RETRY_1AHEAD", -1} + , + {"PM_SNOOP_TLBIE", -1} + , + {"PM_SNOOP_WR_RETRY_QFULL", -1} + , + {"PM_SNOOP_WR_RETRY_RQ", -1} + , + {"PM_SNOOP_WR_RETRY_WQ", -1} + , + {"PM_STCX_FAIL", -1} + , + {"PM_STCX_PASS", -1} + , + {"PM_ST_MISS_L1", -1} + , + {"PM_ST_REF_L1_LSU0", -1} + , + {"PM_ST_REF_L1_LSU1", -1} + , + {"PM_SUSPENDED", -1} + , + {"PM_TB_BIT_TRANS", -1} + , + {"PM_THRD_L2MISS_BOTH_CYC", -1} + , + {"PM_THRD_ONE_RUN_CYC", -1} + , + {"PM_THRD_PRIO_1_CYC", -1} + , + {"PM_THRD_PRIO_2_CYC", -1} + , + {"PM_THRD_PRIO_3_CYC", -1} + , + {"PM_THRD_PRIO_4_CYC", -1} + , + {"PM_THRD_PRIO_5_CYC", -1} + , + {"PM_THRD_PRIO_6_CYC", -1} + , + {"PM_THRD_PRIO_7_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_0_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_1or2_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_3or4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_5or6_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus1or2_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus3or4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus5or6_CYC", -1} + , + {"PM_THRD_SEL_OVER_CLB_EMPTY", -1} + , + {"PM_THRD_SEL_OVER_GCT_IMBAL", -1} + , + {"PM_THRD_SEL_OVER_ISU_HOLD", -1} + , + {"PM_THRD_SEL_OVER_L2MISS", -1} + , + {"PM_THRD_SEL_T0", -1} + , + {"PM_THRD_SEL_T1", -1} + , + {"PM_THRD_SMT_HANG", -1} + , + {"PM_TLBIE_HELD", -1} + , + {"PM_TLB_MISS", -1} + , + {"PM_XER_MAP_FULL_CYC", -1} + , + {"PM_BR_PRED_CR", -1} + , + {"PM_MEM_RQ_DISP_Q12to15", -1} + , + {"PM_MEM_RQ_DISP_Q16to19", -1} + , + {"PM_SNOOP_RETRY_AB_COLLISION", -1} + , + {"PM_CMPLU_STALL_DCACHE_MISS", -1} + , + {"PM_CMPLU_STALL_FDIV", -1} + , + {"PM_CMPLU_STALL_FXU", -1} + , + {"PM_CMPLU_STALL_LSU", -1} + , + {"PM_DATA_FROM_L25_MOD", -1} + , + {"PM_DATA_FROM_L35_MOD", -1} + , + {"PM_DATA_FROM_LMEM", -1} + , + {"PM_DTLB_MISS_64K", -1} + , + {"PM_DTLB_REF_64K", -1} + , + {"PM_FPU_FMA", -1} + , + {"PM_FPU_FRSP_FCONV", -1} + , + {"PM_FPU_FSQRT", -1} + , + {"PM_FPU_STALL3", -1} + , + {"PM_FPU_STF", -1} + , + {"PM_FXU_BUSY", -1} + , + {"PM_MRK_FXU_FIN", -1} + , + {"PM_GCT_EMPTY_CYC", -1} + , + {"PM_GCT_NOSLOT_IC_MISS", -1} + , + {"PM_GCT_USAGE_60to79_CYC", -1} + , + {"PM_GRP_DISP", -1} + , + {"PM_HV_CYC", -1} + , + {"PM_INST_FROM_L1", -1} + , + {"PM_INST_FROM_L25_MOD", -1} + , + {"PM_INST_FROM_L35_MOD", -1} + , + {"PM_INST_FROM_LMEM", -1} + , + {"PM_LSU_BUSY_REJECT", -1} + , + {"PM_LSU_DERAT_MISS", -1} + , + {"PM_LSU_FLUSH_LRQ", -1} + , + {"PM_LSU_FLUSH_UST", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} + , + {"PM_LSU_REJECT_LMQ_FULL", -1} + , + {"PM_LSU_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU_SRQ_STFWD", -1} + , + {"PM_MRK_BRU_FIN", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L275_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L2_CYC", -1} + , + {"PM_MRK_DATA_FROM_L35_MOD", -1} + , + {"PM_MRK_DATA_FROM_L35_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L375_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L3_CYC", -1} + , + {"PM_MRK_DATA_FROM_LMEM", -1} + , + {"PM_MRK_DTLB_MISS_64K", -1} + , + {"PM_MRK_DTLB_REF_64K", -1} + , + {"PM_MRK_GRP_BR_REDIR", -1} + , + {"PM_MRK_LSU_FLUSH_UST", -1} + , + {"PM_MRK_ST_GPS", -1} + , + {"PM_PMC1_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L25_MOD", -1} + , + {"PM_PTEG_FROM_L35_MOD", -1} + , + {"PM_PTEG_FROM_LMEM", -1} + , + {"PM_SLB_MISS", -1} + , + {"PM_ST_REF_L1", -1} + , + {"PM_THRD_GRP_CMPL_BOTH_CYC", -1} + , + {"PM_DATA_FROM_L275_SHR", -1} + , + {"PM_DATA_FROM_L2MISS", -1} + , + {"PM_DATA_FROM_L375_SHR", -1} + , + {"PM_DTLB_MISS_16M", -1} + , + {"PM_DTLB_REF_16M", -1} + , + {"PM_FPU_FMOV_FEST", -1} + , + {"PM_FXU0_BUSY_FXU1_IDLE", -1} + , + {"PM_FXU_FIN", -1} + , + {"PM_GCT_NOSLOT_SRQ_FULL", -1} + , + {"PM_GCT_USAGE_80to99_CYC", -1} + , + {"PM_GRP_CMPL", -1} + , + {"PM_GRP_DISP_SUCCESS", -1} + , + {"PM_INST_DISP", -1} + , + {"PM_INST_FROM_L275_SHR", -1} + , + {"PM_INST_FROM_L375_SHR", -1} + , + {"PM_INST_FROM_PREF", -1} + , + {"PM_LD_MISS_L1", -1} + , + {"PM_MRK_DATA_FROM_L275_SHR", -1} + , + {"PM_MRK_DATA_FROM_L2MISS", -1} + , + {"PM_MRK_DATA_FROM_L375_SHR", -1} + , + {"PM_MRK_DTLB_MISS_16M", -1} + , + {"PM_MRK_DTLB_REF_16M", -1} + , + {"PM_MRK_FPU_FIN", -1} + , + {"PM_MRK_INST_FIN", -1} + , + {"PM_MRK_LSU_FLUSH_LRQ", -1} + , + {"PM_MRK_ST_CMPL_INT", -1} + , + {"PM_PMC2_OVERFLOW", -1} + , + {"PM_PMC6_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L275_SHR", -1} + , + {"PM_PTEG_FROM_L2MISS", -1} + , + {"PM_PTEG_FROM_L375_SHR", -1} + , + {"PM_STOP_COMPLETION", -1} + , + {"PM_THRESH_TIMEO", -1} + , + {"PM_0INST_FETCH", -1} + , + {"PM_BR_PRED_CR_TA", -1} + , + {"PM_CMPLU_STALL_DIV", -1} + , + {"PM_CMPLU_STALL_ERAT_MISS", -1} + , + {"PM_CMPLU_STALL_FPU", -1} + , + {"PM_CMPLU_STALL_REJECT", -1} + , + {"PM_DTLB_MISS_16G", -1} + , + {"PM_DTLB_REF_16G", -1} + , + {"PM_EXT_INT", -1} + , + {"PM_FPU_FIN", -1} + , + {"PM_FXU1_BUSY_FXU0_IDLE", -1} + , + {"PM_GCT_NOSLOT_BR_MPRED", -1} + , + {"PM_INST_FROM_L275_MOD", -1} + , + {"PM_INST_FROM_L375_MOD", -1} + , + {"PM_INST_FROM_RMEM", -1} + , + {"PM_LSU_SRQ_EMPTY_CYC", -1} + , + {"PM_MRK_CRU_FIN", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L275_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L35_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L375_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_LMEM_CYC", -1} + , + {"PM_MRK_DATA_FROM_RMEM_CYC", -1} + , + {"PM_MRK_DTLB_MISS_16G", -1} + , + {"PM_MRK_DTLB_REF_16G", -1} + , + {"PM_MRK_GRP_CMPL", -1} + , + {"PM_MRK_GRP_IC_MISS", -1} + , + {"PM_MRK_GRP_TIMEO", -1} + , + {"PM_MRK_LSU_FIN", -1} + , + {"PM_MRK_LSU_FLUSH_SRQ", -1} + , + {"PM_PMC3_OVERFLOW", -1} + , + {"PM_WORK_HELD", -1} + , + {"PM_RUN_INST_CMPL", -1} +}; diff --git a/src/components/perfctr_ppc/power5_events.h b/src/components/perfctr_ppc/power5_events.h new file mode 100644 index 0000000..80f22db --- /dev/null +++ b/src/components/perfctr_ppc/power5_events.h @@ -0,0 +1,494 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +#ifndef _PAPI_POWER5_EVENTS_H +#define _PAPI_POWER5_EVENTS_H + +/* +* File: power5_events.h +* CVS: +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "papiStdEventDefs.h" + +#define GROUP_INTS 5 +#define PAPI_MAX_NATIVE_EVENTS 512 +#define MAX_GROUPS (GROUP_INTS * 32) +#define MAX_NATNAME_MAP_INDEX 466 + + +enum native_name +{ + PNE_PM_0INST_CLB_CYC = PAPI_NATIVE_MASK, + PNE_PM_1INST_CLB_CYC, + PNE_PM_1PLUS_PPC_CMPL, + PNE_PM_2INST_CLB_CYC, + PNE_PM_3INST_CLB_CYC, + PNE_PM_4INST_CLB_CYC, + PNE_PM_5INST_CLB_CYC, + PNE_PM_6INST_CLB_CYC, + PNE_PM_BRQ_FULL_CYC, + PNE_PM_BR_UNCOND, + PNE_PM_CLB_FULL_CYC, + PNE_PM_CR_MAP_FULL_CYC, + PNE_PM_CYC, + PNE_PM_DATA_FROM_L2, + PNE_PM_DATA_FROM_L25_SHR, + PNE_PM_DATA_FROM_L275_MOD, + PNE_PM_DATA_FROM_L3, + PNE_PM_DATA_FROM_L35_SHR, + PNE_PM_DATA_FROM_L375_MOD, + PNE_PM_DATA_FROM_RMEM, + PNE_PM_DATA_TABLEWALK_CYC, + PNE_PM_DSLB_MISS, + PNE_PM_DTLB_MISS, + PNE_PM_DTLB_MISS_16M, + PNE_PM_DTLB_MISS_4K, + PNE_PM_DTLB_REF_16M, + PNE_PM_DTLB_REF_4K, + PNE_PM_FAB_CMD_ISSUED, + PNE_PM_FAB_DCLAIM_ISSUED, + PNE_PM_FAB_HOLDtoNN_EMPTY, + PNE_PM_FAB_HOLDtoVN_EMPTY, + PNE_PM_FAB_M1toP1_SIDECAR_EMPTY, + PNE_PM_FAB_P1toM1_SIDECAR_EMPTY, + PNE_PM_FAB_PNtoNN_DIRECT, + PNE_PM_FAB_PNtoVN_DIRECT, + PNE_PM_FPR_MAP_FULL_CYC, + PNE_PM_FPU0_1FLOP, + PNE_PM_FPU0_DENORM, + PNE_PM_FPU0_FDIV, + PNE_PM_FPU0_FMA, + PNE_PM_FPU0_FSQRT, + PNE_PM_FPU0_FULL_CYC, + PNE_PM_FPU0_SINGLE, + PNE_PM_FPU0_STALL3, + PNE_PM_FPU0_STF, + PNE_PM_FPU1_1FLOP, + PNE_PM_FPU1_DENORM, + PNE_PM_FPU1_FDIV, + PNE_PM_FPU1_FMA, + PNE_PM_FPU1_FSQRT, + PNE_PM_FPU1_FULL_CYC, + PNE_PM_FPU1_SINGLE, + PNE_PM_FPU1_STALL3, + PNE_PM_FPU1_STF, + PNE_PM_FPU_DENORM, + PNE_PM_FPU_FDIV, + PNE_PM_FPU_1FLOP, + PNE_PM_FPU_FULL_CYC, + PNE_PM_FPU_SINGLE, + PNE_PM_FXU_IDLE, + PNE_PM_GCT_NOSLOT_CYC, + PNE_PM_GCT_FULL_CYC, + PNE_PM_GCT_USAGE_00to59_CYC, + PNE_PM_GRP_BR_REDIR, + PNE_PM_GRP_BR_REDIR_NONSPEC, + PNE_PM_GRP_DISP_REJECT, + PNE_PM_GRP_DISP_VALID, + PNE_PM_GRP_IC_MISS, + PNE_PM_GRP_IC_MISS_BR_REDIR_NONSPEC, + PNE_PM_GRP_IC_MISS_NONSPEC, + PNE_PM_GRP_MRK, + PNE_PM_IC_PREF_REQ, + PNE_PM_IERAT_XLATE_WR, + PNE_PM_INST_CMPL, + PNE_PM_INST_DISP, + PNE_PM_INST_FETCH_CYC, + PNE_PM_INST_FROM_L2, + PNE_PM_INST_FROM_L25_SHR, + PNE_PM_INST_FROM_L3, + PNE_PM_INST_FROM_L35_SHR, + PNE_PM_ISLB_MISS, + PNE_PM_ITLB_MISS, + PNE_PM_L2SA_MOD_TAG, + PNE_PM_L2SA_RCLD_DISP, + PNE_PM_L2SA_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SA_RCST_DISP, + PNE_PM_L2SA_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SA_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SA_SHR_MOD, + PNE_PM_L2SA_ST_REQ, + PNE_PM_L2SB_MOD_TAG, + PNE_PM_L2SB_RCLD_DISP, + PNE_PM_L2SB_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SB_RCST_DISP, + PNE_PM_L2SB_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SB_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SB_SHR_MOD, + PNE_PM_L2SB_ST_REQ, + PNE_PM_L2SC_MOD_TAG, + PNE_PM_L2SC_RCLD_DISP, + PNE_PM_L2SC_RCLD_DISP_FAIL_RC_FULL, + PNE_PM_L2SC_RCST_DISP, + PNE_PM_L2SC_RCST_DISP_FAIL_RC_FULL, + PNE_PM_L2SC_RC_DISP_FAIL_CO_BUSY, + PNE_PM_L2SC_SHR_MOD, + PNE_PM_L2SC_ST_REQ, + PNE_PM_L3SA_ALL_BUSY, + PNE_PM_L3SA_MOD_TAG, + PNE_PM_L3SA_REF, + PNE_PM_L3SB_ALL_BUSY, + PNE_PM_L3SB_MOD_TAG, + PNE_PM_L3SB_REF, + PNE_PM_L3SC_ALL_BUSY, + PNE_PM_L3SC_MOD_TAG, + PNE_PM_L3SC_REF, + PNE_PM_LARX_LSU0, + PNE_PM_LR_CTR_MAP_FULL_CYC, + PNE_PM_LSU0_BUSY_REJECT, + PNE_PM_LSU0_DERAT_MISS, + PNE_PM_LSU0_FLUSH_LRQ, + PNE_PM_LSU0_FLUSH_SRQ, + PNE_PM_LSU0_FLUSH_ULD, + PNE_PM_LSU0_FLUSH_UST, + PNE_PM_LSU0_REJECT_ERAT_MISS, + PNE_PM_LSU0_REJECT_LMQ_FULL, + PNE_PM_LSU0_REJECT_RELOAD_CDF, + PNE_PM_LSU0_REJECT_SRQ_LHS, + PNE_PM_LSU0_SRQ_STFWD, + PNE_PM_LSU1_BUSY_REJECT, + PNE_PM_LSU1_DERAT_MISS, + PNE_PM_LSU1_FLUSH_LRQ, + PNE_PM_LSU1_FLUSH_SRQ, + PNE_PM_LSU1_FLUSH_ULD, + PNE_PM_LSU1_FLUSH_UST, + PNE_PM_LSU1_REJECT_ERAT_MISS, + PNE_PM_LSU1_REJECT_LMQ_FULL, + PNE_PM_LSU1_REJECT_RELOAD_CDF, + PNE_PM_LSU1_REJECT_SRQ_LHS, + PNE_PM_LSU1_SRQ_STFWD, + PNE_PM_LSU_BUSY_REJECT, + PNE_PM_LSU_FLUSH_LRQ_FULL, + PNE_PM_LSU_FLUSH_SRQ, + PNE_PM_LSU_FLUSH_ULD, + PNE_PM_LSU_LRQ_S0_ALLOC, + PNE_PM_LSU_LRQ_S0_VALID, + PNE_PM_LSU_REJECT_ERAT_MISS, + PNE_PM_LSU_REJECT_SRQ_LHS, + PNE_PM_LSU_SRQ_S0_ALLOC, + PNE_PM_LSU_SRQ_S0_VALID, + PNE_PM_LSU_SRQ_STFWD, + PNE_PM_MEM_FAST_PATH_RD_CMPL, + PNE_PM_MEM_HI_PRIO_PW_CMPL, + PNE_PM_MEM_HI_PRIO_WR_CMPL, + PNE_PM_MEM_PWQ_DISP, + PNE_PM_MEM_PWQ_DISP_BUSY2or3, + PNE_PM_MEM_READ_CMPL, + PNE_PM_MEM_RQ_DISP, + PNE_PM_MEM_RQ_DISP_BUSY8to15, + PNE_PM_MEM_WQ_DISP_BUSY1to7, + PNE_PM_MEM_WQ_DISP_WRITE, + PNE_PM_MRK_DATA_FROM_L2, + PNE_PM_MRK_DATA_FROM_L25_SHR, + PNE_PM_MRK_DATA_FROM_L275_MOD, + PNE_PM_MRK_DATA_FROM_L3, + PNE_PM_MRK_DATA_FROM_L35_SHR, + PNE_PM_MRK_DATA_FROM_L375_MOD, + PNE_PM_MRK_DATA_FROM_RMEM, + PNE_PM_MRK_DTLB_MISS_16M, + PNE_PM_MRK_DTLB_MISS_4K, + PNE_PM_MRK_DTLB_REF_16M, + PNE_PM_MRK_DTLB_REF_4K, + PNE_PM_MRK_GRP_DISP, + PNE_PM_MRK_GRP_ISSUED, + PNE_PM_MRK_IMR_RELOAD, + PNE_PM_MRK_LD_MISS_L1, + PNE_PM_MRK_LD_MISS_L1_LSU0, + PNE_PM_MRK_LD_MISS_L1_LSU1, + PNE_PM_MRK_STCX_FAIL, + PNE_PM_MRK_ST_CMPL, + PNE_PM_MRK_ST_MISS_L1, + PNE_PM_PMC4_OVERFLOW, + PNE_PM_PMC5_OVERFLOW, + PNE_PM_PTEG_FROM_L2, + PNE_PM_PTEG_FROM_L25_SHR, + PNE_PM_PTEG_FROM_L275_MOD, + PNE_PM_PTEG_FROM_L3, + PNE_PM_PTEG_FROM_L35_SHR, + PNE_PM_PTEG_FROM_L375_MOD, + PNE_PM_PTEG_FROM_RMEM, + PNE_PM_RUN_CYC, + PNE_PM_SNOOP_DCLAIM_RETRY_QFULL, + PNE_PM_SNOOP_PW_RETRY_RQ, + PNE_PM_SNOOP_RD_RETRY_QFULL, + PNE_PM_SNOOP_RD_RETRY_RQ, + PNE_PM_SNOOP_RETRY_1AHEAD, + PNE_PM_SNOOP_TLBIE, + PNE_PM_SNOOP_WR_RETRY_RQ, + PNE_PM_STCX_FAIL, + PNE_PM_STCX_PASS, + PNE_PM_SUSPENDED, + PNE_PM_TB_BIT_TRANS, + PNE_PM_THRD_ONE_RUN_CYC, + PNE_PM_THRD_PRIO_1_CYC, + PNE_PM_THRD_PRIO_2_CYC, + PNE_PM_THRD_PRIO_3_CYC, + PNE_PM_THRD_PRIO_4_CYC, + PNE_PM_THRD_PRIO_5_CYC, + PNE_PM_THRD_PRIO_6_CYC, + PNE_PM_THRD_PRIO_7_CYC, + PNE_PM_TLB_MISS, + PNE_PM_XER_MAP_FULL_CYC, + PNE_PM_INST_FROM_L2MISS, + PNE_PM_BR_PRED_TA, + PNE_PM_CMPLU_STALL_DCACHE_MISS, + PNE_PM_CMPLU_STALL_FDIV, + PNE_PM_CMPLU_STALL_FXU, + PNE_PM_CMPLU_STALL_LSU, + PNE_PM_DATA_FROM_L25_MOD, + PNE_PM_DATA_FROM_L35_MOD, + PNE_PM_DATA_FROM_LMEM, + PNE_PM_FPU_FSQRT, + PNE_PM_FPU_FMA, + PNE_PM_FPU_STALL3, + PNE_PM_FPU_STF, + PNE_PM_FXU_BUSY, + PNE_PM_FXU_FIN, + PNE_PM_GCT_NOSLOT_IC_MISS, + PNE_PM_GCT_USAGE_60totoVNorNN_SIDECAR_EMPTY, + PNE_PM_FAB_P1toVNorNN_SIDECAR_EMPTY, + PNE_PM_FAB_PNtoNN_SIDECAR, + PNE_PM_FAB_PNtoVN_SIDECAR, + PNE_PM_FAB_VBYPASS_EMPTY, + PNE_PM_FLUSH_BR_MPRED, + PNE_PM_FLUSH_IMBAL, + PNE_PM_FLUSH, + PNE_PM_FLUSH_SB, + PNE_PM_FLUSH_SYNC, + PNE_PM_FPU0_FEST, + PNE_PM_FPU0_FIN, + PNE_PM_FPU0_FMOV_FEST, + PNE_PM_FPU0_FPSCR, + PNE_PM_FPU0_FRSP_FCONV, + PNE_PM_FPU1_FEST, + PNE_PM_FPU1_FIN, + PNE_PM_FPU1_FMOV_FEST, + PNE_PM_FPU1_FRSP_FCONV, + PNE_PM_FPU_FMOV_FEST, + PNE_PM_FPU_FRSP_FCONV, + PNE_PM_FXLS0_FULL_CYC, + PNE_PM_FXLS1_FULL_CYC, + PNE_PM_FXU0_BUSY_FXU1_IDLE, + PNE_PM_FXU0_FIN, + PNE_PM_FXU1_FIN, + PNE_PM_GCT_NOSLOT_SRQ_FULL, + PNE_PM_GCT_USAGE_80to99_CYC, + PNE_PM_GPR_MAP_FULL_CYC, + PNE_PM_GRP_CMPL, + PNE_PM_GRP_DISP_BLK_SB_CYC, + PNE_PM_GRP_DISP_SUCCESS, + PNE_PM_IC_DEMAND_L2_BHT_REDIRECT, + PNE_PM_IC_DEMAND_L2_BR_REDIRECT, + PNE_PM_IC_PREF_INSTALL, + PNE_PM_INST_FROM_L275_SHR, + PNE_PM_INST_FROM_L375_SHR, + PNE_PM_INST_FROM_PREF, + PNE_PM_L1_DCACHE_RELOAD_VALID, + PNE_PM_L1_PREF, + PNE_PM_L1_WRITE_CYC, + PNE_PM_L2SA_MOD_INV, + PNE_PM_L2SA_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SA_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SA_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SA_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SA_SHR_INV, + PNE_PM_L2SA_ST_HIT, + PNE_PM_L2SB_MOD_INV, + PNE_PM_L2SB_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SB_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SB_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SB_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SB_SHR_INV, + PNE_PM_L2SB_ST_HIT, + PNE_PM_L2SC_MOD_INV, + PNE_PM_L2SC_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2SC_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2SC_RCST_DISP_FAIL_ADDR, + PNE_PM_L2SC_RCST_DISP_FAIL_OTHER, + PNE_PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL, + PNE_PM_L2SC_SHR_INV, + PNE_PM_L2SC_ST_HIT, + PNE_PM_L2_PREF, + PNE_PM_L3SA_HIT, + PNE_PM_L3SA_MOD_INV, + PNE_PM_L3SA_SHR_INV, + PNE_PM_L3SA_SNOOP_RETRY, + PNE_PM_L3SB_HIT, + PNE_PM_L3SB_MOD_INV, + PNE_PM_L3SB_SHR_INV, + PNE_PM_L3SB_SNOOP_RETRY, + PNE_PM_L3SC_HIT, + PNE_PM_L3SC_MOD_INV, + PNE_PM_L3SC_SHR_INV, + PNE_PM_L3SC_SNOOP_RETRY, + PNE_PM_LD_MISS_L1, + PNE_PM_LD_MISS_L1_LSU0, + PNE_PM_LD_MISS_L1_LSU1, + PNE_PM_LD_REF_L1_LSU0, + PNE_PM_LD_REF_L1_LSU1, + PNE_PM_LSU0_LDF, + PNE_PM_LSU0_NCLD, + PNE_PM_LSU1_LDF, + PNE_PM_LSU1_NCLD, + PNE_PM_LSU_FLUSH, + PNE_PM_LSU_FLUSH_SRQ_FULL, + PNE_PM_LSU_LMQ_FULL_CYC, + PNE_PM_LSU_LMQ_LHR_MERGE, + PNE_PM_LSU_LMQ_S0_ALLOC, + PNE_PM_LSU_LMQ_S0_VALID, + PNE_PM_LSU_LRQ_FULL_CYC, + PNE_PM_DC_PREF_STREAM_ALLOC_BLK, + PNE_PM_LSU_SRQ_FULL_CYC, + PNE_PM_LSU_SRQ_SYNC_CYC, + PNE_PM_LWSYNC_HELD, + PNE_PM_MEM_LO_PRIO_PW_CMPL, + PNE_PM_MEM_LO_PRIO_WR_CMPL, + PNE_PM_MEM_PW_CMPL, + PNE_PM_MEM_PW_GATH, + PNE_PM_MEM_RQ_DISP_BUSY1to7, + PNE_PM_MEM_SPEC_RD_CANCEL, + PNE_PM_MEM_WQ_DISP_BUSY8to15, + PNE_PM_MEM_WQ_DISP_DCLAIM, + PNE_PM_MRK_DATA_FROM_L275_SHR, + PNE_PM_MRK_DATA_FROM_L375_SHR, + PNE_PM_MRK_DSLB_MISS, + PNE_PM_MRK_DTLB_MISS, + PNE_PM_MRK_FPU_FIN, + PNE_PM_MRK_INST_FIN, + PNE_PM_MRK_L1_RELOAD_VALID, + PNE_PM_MRK_LSU0_FLUSH_LRQ, + PNE_PM_MRK_LSU0_FLUSH_SRQ, + PNE_PM_MRK_LSU0_FLUSH_UST, + PNE_PM_MRK_LSU0_FLUSH_ULD, + PNE_PM_MRK_LSU1_FLUSH_LRQ, + PNE_PM_MRK_LSU1_FLUSH_SRQ, + PNE_PM_MRK_LSU1_FLUSH_ULD, + PNE_PM_MRK_LSU1_FLUSH_UST, + PNE_PM_MRK_LSU_FLUSH_LRQ, + PNE_PM_MRK_LSU_FLUSH_UST, + PNE_PM_MRK_LSU_SRQ_INST_VALID, + PNE_PM_MRK_ST_CMPL_INT, + PNE_PM_PMC2_OVERFLOW, + PNE_PM_PMC6_OVERFLOW, + PNE_PM_PTEG_FROM_L275_SHR, + PNE_PM_PTEG_FROM_L375_SHR, + PNE_PM_SNOOP_PARTIAL_RTRY_QFULL, + PNE_PM_SNOOP_PW_RETRY_WQ_PWQ, + PNE_PM_SNOOP_RD_RETRY_WQ, + PNE_PM_SNOOP_WR_RETRY_QFULL, + PNE_PM_SNOOP_WR_RETRY_WQ, + PNE_PM_STOP_COMPLETION, + PNE_PM_ST_MISS_L1, + PNE_PM_ST_REF_L1, + PNE_PM_ST_REF_L1_LSU0, + PNE_PM_ST_REF_L1_LSU1, + PNE_PM_CLB_EMPTY_CYC, + PNE_PM_THRD_L2MISS_BOTH_CYC, + PNE_PM_THRD_PRIO_DIFF_0_CYC, + PNE_PM_THRD_PRIO_DIFF_1or2_CYC, + PNE_PM_THRD_PRIO_DIFF_3or4_CYC, + PNE_PM_THRD_PRIO_DIFF_5or6_CYC, + PNE_PM_THRD_PRIO_DIFF_minus1or2_CYC, + PNE_PM_THRD_PRIO_DIFF_minus3or4_CYC, + PNE_PM_THRD_PRIO_DIFF_minus5or6_CYC, + PNE_PM_THRD_SEL_OVER_CLB_EMPTY, + PNE_PM_THRD_SEL_OVER_GCT_IMBAL, + PNE_PM_THRD_SEL_OVER_ISU_HOLD, + PNE_PM_THRD_SEL_OVER_L2MISS, + PNE_PM_THRD_SEL_T0, + PNE_PM_THRD_SEL_T1, + PNE_PM_THRD_SMT_HANG, + PNE_PM_THRESH_TIMEO, + PNE_PM_TLBIE_HELD, + PNE_PM_DATA_FROM_L2MISS, + PNE_PM_MRK_DATA_FROM_L2MISS, + PNE_PM_PTEG_FROM_L2MISS, + PNE_PM_0INST_FETCH, + PNE_PM_BR_PRED_CR_TA, + PNE_PM_CMPLU_STALL_DIV, + PNE_PM_CMPLU_STALL_ERAT_MISS, + PNE_PM_CMPLU_STALL_FPU, + PNE_PM_CMPLU_STALL_REJECT, + PNE_PM_EXT_INT, + PNE_PM_FPU_FEST, + PNE_PM_FPU_FIN, + PNE_PM_FXLS_FULL_CYC, + PNE_PM_FXU1_BUSY_FXU0_IDLE, + PNE_PM_GCT_NOSLOT_BR_MPRED, + PNE_PM_INST_FROM_L275_MOD, + PNE_PM_INST_FROM_L375_MOD, + PNE_PM_INST_FROM_RMEM, + PNE_PM_LD_REF_L1, + PNE_PM_LSU_LDF, + PNE_PM_LSU_SRQ_EMPTY_CYC, + PNE_PM_MRK_CRU_FIN, + PNE_PM_MRK_DATA_FROM_L25_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L275_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L35_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L375_MOD_CYC, + PNE_PM_MRK_DATA_FROM_LMEM_CYC, + PNE_PM_MRK_DATA_FROM_RMEM_CYC, + PNE_PM_MRK_GRP_CMPL, + PNE_PM_MRK_GRP_IC_MISS, + PNE_PM_MRK_GRP_TIMEO, + PNE_PM_MRK_LSU_FIN, + PNE_PM_MRK_LSU_FLUSH_SRQ, + PNE_PM_MRK_LSU_FLUSH_ULD, + PNE_PM_PMC3_OVERFLOW, + PNE_PM_WORK_HELD, + NATNAME_GUARD, +}; + +#endif diff --git a/src/components/perfctr_ppc/power5_events_map.c b/src/components/perfctr_ppc/power5_events_map.c new file mode 100644 index 0000000..d2dc926 --- /dev/null +++ b/src/components/perfctr_ppc/power5_events_map.c @@ -0,0 +1,949 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: power5_events_map.c +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "perfctr-ppc64.h" + +PPC64_native_map_t native_name_map[MAX_NATNAME_MAP_INDEX] = { + {"PM_0INST_CLB_CYC", -1} + , + {"PM_1INST_CLB_CYC", -1} + , + {"PM_1PLUS_PPC_CMPL", -1} + , + {"PM_2INST_CLB_CYC", -1} + , + {"PM_3INST_CLB_CYC", -1} + , + {"PM_4INST_CLB_CYC", -1} + , + {"PM_5INST_CLB_CYC", -1} + , + {"PM_6INST_CLB_CYC", -1} + , + {"PM_BRQ_FULL_CYC", -1} + , + {"PM_BR_UNCOND", -1} + , + {"PM_CLB_FULL_CYC", -1} + , + {"PM_CR_MAP_FULL_CYC", -1} + , + {"PM_CYC", -1} + , + {"PM_DATA_FROM_L2", -1} + , + {"PM_DATA_FROM_L25_SHR", -1} + , + {"PM_DATA_FROM_L275_MOD", -1} + , + {"PM_DATA_FROM_L3", -1} + , + {"PM_DATA_FROM_L35_SHR", -1} + , + {"PM_DATA_FROM_L375_MOD", -1} + , + {"PM_DATA_FROM_RMEM", -1} + , + {"PM_DATA_TABLEWALK_CYC", -1} + , + {"PM_DSLB_MISS", -1} + , + {"PM_DTLB_MISS", -1} + , + {"PM_DTLB_MISS_16M", -1} + , + {"PM_DTLB_MISS_4K", -1} + , + {"PM_DTLB_REF_16M", -1} + , + {"PM_DTLB_REF_4K", -1} + , + {"PM_FAB_CMD_ISSUED", -1} + , + {"PM_FAB_DCLAIM_ISSUED", -1} + , + {"PM_FAB_HOLDtoNN_EMPTY", -1} + , + {"PM_FAB_HOLDtoVN_EMPTY", -1} + , + {"PM_FAB_M1toP1_SIDECAR_EMPTY", -1} + , + {"PM_FAB_P1toM1_SIDECAR_EMPTY", -1} + , + {"PM_FAB_PNtoNN_DIRECT", -1} + , + {"PM_FAB_PNtoVN_DIRECT", -1} + , + {"PM_FPR_MAP_FULL_CYC", -1} + , + {"PM_FPU0_1FLOP", -1} + , + {"PM_FPU0_DENORM", -1} + , + {"PM_FPU0_FDIV", -1} + , + {"PM_FPU0_FMA", -1} + , + {"PM_FPU0_FSQRT", -1} + , + {"PM_FPU0_FULL_CYC", -1} + , + {"PM_FPU0_SINGLE", -1} + , + {"PM_FPU0_STALL3", -1} + , + {"PM_FPU0_STF", -1} + , + {"PM_FPU1_1FLOP", -1} + , + {"PM_FPU1_DENORM", -1} + , + {"PM_FPU1_FDIV", -1} + , + {"PM_FPU1_FMA", -1} + , + {"PM_FPU1_FSQRT", -1} + , + {"PM_FPU1_FULL_CYC", -1} + , + {"PM_FPU1_SINGLE", -1} + , + {"PM_FPU1_STALL3", -1} + , + {"PM_FPU1_STF", -1} + , + {"PM_FPU_DENORM", -1} + , + {"PM_FPU_FDIV", -1} + , + {"PM_FPU_1FLOP", -1} + , + {"PM_FPU_FULL_CYC", -1} + , + {"PM_FPU_SINGLE", -1} + , + {"PM_FXU_IDLE", -1} + , + {"PM_GCT_NOSLOT_CYC", -1} + , + {"PM_GCT_FULL_CYC", -1} + , + {"PM_GCT_USAGE_00to59_CYC", -1} + , + {"PM_GRP_BR_REDIR", -1} + , + {"PM_GRP_BR_REDIR_NONSPEC", -1} + , + {"PM_GRP_DISP_REJECT", -1} + , + {"PM_GRP_DISP_VALID", -1} + , + {"PM_GRP_IC_MISS", -1} + , + {"PM_GRP_IC_MISS_BR_REDIR_NONSPEC", -1} + , + {"PM_GRP_IC_MISS_NONSPEC", -1} + , + {"PM_GRP_MRK", -1} + , + {"PM_IC_PREF_REQ", -1} + , + {"PM_IERAT_XLATE_WR", -1} + , + {"PM_INST_CMPL", -1} + , + {"PM_INST_DISP", -1} + , + {"PM_INST_FETCH_CYC", -1} + , + {"PM_INST_FROM_L2", -1} + , + {"PM_INST_FROM_L25_SHR", -1} + , + {"PM_INST_FROM_L3", -1} + , + {"PM_INST_FROM_L35_SHR", -1} + , + {"PM_ISLB_MISS", -1} + , + {"PM_ITLB_MISS", -1} + , + {"PM_L2SA_MOD_TAG", -1} + , + {"PM_L2SA_RCLD_DISP", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SA_RCST_DISP", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SA_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SA_SHR_MOD", -1} + , + {"PM_L2SA_ST_REQ", -1} + , + {"PM_L2SB_MOD_TAG", -1} + , + {"PM_L2SB_RCLD_DISP", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SB_RCST_DISP", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SB_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SB_SHR_MOD", -1} + , + {"PM_L2SB_ST_REQ", -1} + , + {"PM_L2SC_MOD_TAG", -1} + , + {"PM_L2SC_RCLD_DISP", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SC_RCST_DISP", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_RC_FULL", -1} + , + {"PM_L2SC_RC_DISP_FAIL_CO_BUSY", -1} + , + {"PM_L2SC_SHR_MOD", -1} + , + {"PM_L2SC_ST_REQ", -1} + , + {"PM_L3SA_ALL_BUSY", -1} + , + {"PM_L3SA_MOD_TAG", -1} + , + {"PM_L3SA_REF", -1} + , + {"PM_L3SB_ALL_BUSY", -1} + , + {"PM_L3SB_MOD_TAG", -1} + , + {"PM_L3SB_REF", -1} + , + {"PM_L3SC_ALL_BUSY", -1} + , + {"PM_L3SC_MOD_TAG", -1} + , + {"PM_L3SC_REF", -1} + , + {"PM_LARX_LSU0", -1} + , + {"PM_LR_CTR_MAP_FULL_CYC", -1} + , + {"PM_LSU0_BUSY_REJECT", -1} + , + {"PM_LSU0_DERAT_MISS", -1} + , + {"PM_LSU0_FLUSH_LRQ", -1} + , + {"PM_LSU0_FLUSH_SRQ", -1} + , + {"PM_LSU0_FLUSH_ULD", -1} + , + {"PM_LSU0_FLUSH_UST", -1} + , + {"PM_LSU0_REJECT_ERAT_MISS", -1} + , + {"PM_LSU0_REJECT_LMQ_FULL", -1} + , + {"PM_LSU0_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU0_REJECT_SRQ_LHS", -1} + , + {"PM_LSU0_SRQ_STFWD", -1} + , + {"PM_LSU1_BUSY_REJECT", -1} + , + {"PM_LSU1_DERAT_MISS", -1} + , + {"PM_LSU1_FLUSH_LRQ", -1} + , + {"PM_LSU1_FLUSH_SRQ", -1} + , + {"PM_LSU1_FLUSH_ULD", -1} + , + {"PM_LSU1_FLUSH_UST", -1} + , + {"PM_LSU1_REJECT_ERAT_MISS", -1} + , + {"PM_LSU1_REJECT_LMQ_FULL", -1} + , + {"PM_LSU1_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU1_REJECT_SRQ_LHS", -1} + , + {"PM_LSU1_SRQ_STFWD", -1} + , + {"PM_LSU_BUSY_REJECT", -1} + , + {"PM_LSU_FLUSH_LRQ_FULL", -1} + , + {"PM_LSU_FLUSH_SRQ", -1} + , + {"PM_LSU_FLUSH_ULD", -1} + , + {"PM_LSU_LRQ_S0_ALLOC", -1} + , + {"PM_LSU_LRQ_S0_VALID", -1} + , + {"PM_LSU_REJECT_ERAT_MISS", -1} + , + {"PM_LSU_REJECT_SRQ_LHS", -1} + , + {"PM_LSU_SRQ_S0_ALLOC", -1} + , + {"PM_LSU_SRQ_S0_VALID", -1} + , + {"PM_LSU_SRQ_STFWD", -1} + , + {"PM_MEM_FAST_PATH_RD_CMPL", -1} + , + {"PM_MEM_HI_PRIO_PW_CMPL", -1} + , + {"PM_MEM_HI_PRIO_WR_CMPL", -1} + , + {"PM_MEM_PWQ_DISP", -1} + , + {"PM_MEM_PWQ_DISP_BUSY2or3", -1} + , + {"PM_MEM_READ_CMPL", -1} + , + {"PM_MEM_RQ_DISP", -1} + , + {"PM_MEM_RQ_DISP_BUSY8to15", -1} + , + {"PM_MEM_WQ_DISP_BUSY1to7", -1} + , + {"PM_MEM_WQ_DISP_WRITE", -1} + , + {"PM_MRK_DATA_FROM_L2", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR", -1} + , + {"PM_MRK_DATA_FROM_L275_MOD", -1} + , + {"PM_MRK_DATA_FROM_L3", -1} + , + {"PM_MRK_DATA_FROM_L35_SHR", -1} + , + {"PM_MRK_DATA_FROM_L375_MOD", -1} + , + {"PM_MRK_DATA_FROM_RMEM", -1} + , + {"PM_MRK_DTLB_MISS_16M", -1} + , + {"PM_MRK_DTLB_MISS_4K", -1} + , + {"PM_MRK_DTLB_REF_16M", -1} + , + {"PM_MRK_DTLB_REF_4K", -1} + , + {"PM_MRK_GRP_DISP", -1} + , + {"PM_MRK_GRP_ISSUED", -1} + , + {"PM_MRK_IMR_RELOAD", -1} + , + {"PM_MRK_LD_MISS_L1", -1} + , + {"PM_MRK_LD_MISS_L1_LSU0", -1} + , + {"PM_MRK_LD_MISS_L1_LSU1", -1} + , + {"PM_MRK_STCX_FAIL", -1} + , + {"PM_MRK_ST_CMPL", -1} + , + {"PM_MRK_ST_MISS_L1", -1} + , + {"PM_PMC4_OVERFLOW", -1} + , + {"PM_PMC5_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L2", -1} + , + {"PM_PTEG_FROM_L25_SHR", -1} + , + {"PM_PTEG_FROM_L275_MOD", -1} + , + {"PM_PTEG_FROM_L3", -1} + , + {"PM_PTEG_FROM_L35_SHR", -1} + , + {"PM_PTEG_FROM_L375_MOD", -1} + , + {"PM_PTEG_FROM_RMEM", -1} + , + {"PM_RUN_CYC", -1} + , + {"PM_SNOOP_DCLAIM_RETRY_QFULL", -1} + , + {"PM_SNOOP_PW_RETRY_RQ", -1} + , + {"PM_SNOOP_RD_RETRY_QFULL", -1} + , + {"PM_SNOOP_RD_RETRY_RQ", -1} + , + {"PM_SNOOP_RETRY_1AHEAD", -1} + , + {"PM_SNOOP_TLBIE", -1} + , + {"PM_SNOOP_WR_RETRY_RQ", -1} + , + {"PM_STCX_FAIL", -1} + , + {"PM_STCX_PASS", -1} + , + {"PM_SUSPENDED", -1} + , + {"PM_TB_BIT_TRANS", -1} + , + {"PM_THRD_ONE_RUN_CYC", -1} + , + {"PM_THRD_PRIO_1_CYC", -1} + , + {"PM_THRD_PRIO_2_CYC", -1} + , + {"PM_THRD_PRIO_3_CYC", -1} + , + {"PM_THRD_PRIO_4_CYC", -1} + , + {"PM_THRD_PRIO_5_CYC", -1} + , + {"PM_THRD_PRIO_6_CYC", -1} + , + {"PM_THRD_PRIO_7_CYC", -1} + , + {"PM_TLB_MISS", -1} + , + {"PM_XER_MAP_FULL_CYC", -1} + , + {"PM_INST_FROM_L2MISS", -1} + , + {"PM_BR_PRED_TA", -1} + , + {"PM_CMPLU_STALL_DCACHE_MISS", -1} + , + {"PM_CMPLU_STALL_FDIV", -1} + , + {"PM_CMPLU_STALL_FXU", -1} + , + {"PM_CMPLU_STALL_LSU", -1} + , + {"PM_DATA_FROM_L25_MOD", -1} + , + {"PM_DATA_FROM_L35_MOD", -1} + , + {"PM_DATA_FROM_LMEM", -1} + , + {"PM_FPU_FSQRT", -1} + , + {"PM_FPU_FMA", -1} + , + {"PM_FPU_STALL3", -1} + , + {"PM_FPU_STF", -1} + , + {"PM_FXU_BUSY", -1} + , + {"PM_FXU_FIN", -1} + , + {"PM_GCT_NOSLOT_IC_MISS", -1} + , + {"PM_GCT_USAGE_60to79_CYC", -1} + , + {"PM_GRP_DISP", -1} + , + {"PM_HV_CYC", -1} + , + {"PM_INST_FROM_L1", -1} + , + {"PM_INST_FROM_L25_MOD", -1} + , + {"PM_INST_FROM_L35_MOD", -1} + , + {"PM_INST_FROM_LMEM", -1} + , + {"PM_LSU_DERAT_MISS", -1} + , + {"PM_LSU_FLUSH_LRQ", -1} + , + {"PM_LSU_FLUSH_UST", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} + , + {"PM_LSU_REJECT_LMQ_FULL", -1} + , + {"PM_LSU_REJECT_RELOAD_CDF", -1} + , + {"PM_MRK_BRU_FIN", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L275_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L2_CYC", -1} + , + {"PM_MRK_DATA_FROM_L35_MOD", -1} + , + {"PM_MRK_DATA_FROM_L35_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L375_SHR_CYC", -1} + , + {"PM_MRK_DATA_FROM_L3_CYC", -1} + , + {"PM_MRK_DATA_FROM_LMEM", -1} + , + {"PM_MRK_GRP_BR_REDIR", -1} + , + {"PM_MRK_ST_GPS", -1} + , + {"PM_PMC1_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L25_MOD", -1} + , + {"PM_PTEG_FROM_L35_MOD", -1} + , + {"PM_PTEG_FROM_LMEM", -1} + , + {"PM_SLB_MISS", -1} + , + {"PM_GCT_EMPTY_CYC", -1} + , + {"PM_THRD_GRP_CMPL_BOTH_CYC", -1} + , + {"PM_BR_ISSUED", -1} + , + {"PM_BR_MPRED_CR", -1} + , + {"PM_BR_MPRED_TA", -1} + , + {"PM_BR_PRED_CR", -1} + , + {"PM_CRQ_FULL_CYC", -1} + , + {"PM_DATA_FROM_L275_SHR", -1} + , + {"PM_DATA_FROM_L375_SHR", -1} + , + {"PM_DC_INV_L2", -1} + , + {"PM_DC_PREF_DST", -1} + , + {"PM_DC_PREF_STREAM_ALLOC", -1} + , + {"PM_EE_OFF", -1} + , + {"PM_EE_OFF_EXT_INT", -1} + , + {"PM_FAB_CMD_RETRIED", -1} + , + {"PM_FAB_DCLAIM_RETRIED", -1} + , + {"PM_FAB_M1toVNorNN_SIDECAR_EMPTY", -1} + , + {"PM_FAB_P1toVNorNN_SIDECAR_EMPTY", -1} + , + {"PM_FAB_PNtoNN_SIDECAR", -1} + , + {"PM_FAB_PNtoVN_SIDECAR", -1} + , + {"PM_FAB_VBYPASS_EMPTY", -1} + , + {"PM_FLUSH_BR_MPRED", -1} + , + {"PM_FLUSH_IMBAL", -1} + , + {"PM_FLUSH", -1} + , + {"PM_FLUSH_SB", -1} + , + {"PM_FLUSH_SYNC", -1} + , + {"PM_FPU0_FEST", -1} + , + {"PM_FPU0_FIN", -1} + , + {"PM_FPU0_FMOV_FEST", -1} + , + {"PM_FPU0_FPSCR", -1} + , + {"PM_FPU0_FRSP_FCONV", -1} + , + {"PM_FPU1_FEST", -1} + , + {"PM_FPU1_FIN", -1} + , + {"PM_FPU1_FMOV_FEST", -1} + , + {"PM_FPU1_FRSP_FCONV", -1} + , + {"PM_FPU_FMOV_FEST", -1} + , + {"PM_FPU_FRSP_FCONV", -1} + , + {"PM_FXLS0_FULL_CYC", -1} + , + {"PM_FXLS1_FULL_CYC", -1} + , + {"PM_FXU0_BUSY_FXU1_IDLE", -1} + , + {"PM_FXU0_FIN", -1} + , + {"PM_FXU1_FIN", -1} + , + {"PM_GCT_NOSLOT_SRQ_FULL", -1} + , + {"PM_GCT_USAGE_80to99_CYC", -1} + , + {"PM_GPR_MAP_FULL_CYC", -1} + , + {"PM_GRP_CMPL", -1} + , + {"PM_GRP_DISP_BLK_SB_CYC", -1} + , + {"PM_GRP_DISP_SUCCESS", -1} + , + {"PM_IC_DEMAND_L2_BHT_REDIRECT", -1} + , + {"PM_IC_DEMAND_L2_BR_REDIRECT", -1} + , + {"PM_IC_PREF_INSTALL", -1} + , + {"PM_INST_FROM_L275_SHR", -1} + , + {"PM_INST_FROM_L375_SHR", -1} + , + {"PM_INST_FROM_PREF", -1} + , + {"PM_L1_DCACHE_RELOAD_VALID", -1} + , + {"PM_L1_PREF", -1} + , + {"PM_L1_WRITE_CYC", -1} + , + {"PM_L2SA_MOD_INV", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SA_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SA_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SA_SHR_INV", -1} + , + {"PM_L2SA_ST_HIT", -1} + , + {"PM_L2SB_MOD_INV", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SB_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SB_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SB_SHR_INV", -1} + , + {"PM_L2SB_ST_HIT", -1} + , + {"PM_L2SC_MOD_INV", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_ADDR", -1} + , + {"PM_L2SC_RCLD_DISP_FAIL_OTHER", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_ADDR", -1} + , + {"PM_L2SC_RCST_DISP_FAIL_OTHER", -1} + , + {"PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL", -1} + , + {"PM_L2SC_SHR_INV", -1} + , + {"PM_L2SC_ST_HIT", -1} + , + {"PM_L2_PREF", -1} + , + {"PM_L3SA_HIT", -1} + , + {"PM_L3SA_MOD_INV", -1} + , + {"PM_L3SA_SHR_INV", -1} + , + {"PM_L3SA_SNOOP_RETRY", -1} + , + {"PM_L3SB_HIT", -1} + , + {"PM_L3SB_MOD_INV", -1} + , + {"PM_L3SB_SHR_INV", -1} + , + {"PM_L3SB_SNOOP_RETRY", -1} + , + {"PM_L3SC_HIT", -1} + , + {"PM_L3SC_MOD_INV", -1} + , + {"PM_L3SC_SHR_INV", -1} + , + {"PM_L3SC_SNOOP_RETRY", -1} + , + {"PM_LD_MISS_L1", -1} + , + {"PM_LD_MISS_L1_LSU0", -1} + , + {"PM_LD_MISS_L1_LSU1", -1} + , + {"PM_LD_REF_L1_LSU0", -1} + , + {"PM_LD_REF_L1_LSU1", -1} + , + {"PM_LSU0_LDF", -1} + , + {"PM_LSU0_NCLD", -1} + , + {"PM_LSU1_LDF", -1} + , + {"PM_LSU1_NCLD", -1} + , + {"PM_LSU_FLUSH", -1} + , + {"PM_LSU_FLUSH_SRQ_FULL", -1} + , + {"PM_LSU_LMQ_FULL_CYC", -1} + , + {"PM_LSU_LMQ_LHR_MERGE", -1} + , + {"PM_LSU_LMQ_S0_ALLOC", -1} + , + {"PM_LSU_LMQ_S0_VALID", -1} + , + {"PM_LSU_LRQ_FULL_CYC", -1} + , + {"PM_DC_PREF_STREAM_ALLOC_BLK", -1} + , + {"PM_LSU_SRQ_FULL_CYC", -1} + , + {"PM_LSU_SRQ_SYNC_CYC", -1} + , + {"PM_LWSYNC_HELD", -1} + , + {"PM_MEM_LO_PRIO_PW_CMPL", -1} + , + {"PM_MEM_LO_PRIO_WR_CMPL", -1} + , + {"PM_MEM_PW_CMPL", -1} + , + {"PM_MEM_PW_GATH", -1} + , + {"PM_MEM_RQ_DISP_BUSY1to7", -1} + , + {"PM_MEM_SPEC_RD_CANCEL", -1} + , + {"PM_MEM_WQ_DISP_BUSY8to15", -1} + , + {"PM_MEM_WQ_DISP_DCLAIM", -1} + , + {"PM_MRK_DATA_FROM_L275_SHR", -1} + , + {"PM_MRK_DATA_FROM_L375_SHR", -1} + , + {"PM_MRK_DSLB_MISS", -1} + , + {"PM_MRK_DTLB_MISS", -1} + , + {"PM_MRK_FPU_FIN", -1} + , + {"PM_MRK_INST_FIN", -1} + , + {"PM_MRK_L1_RELOAD_VALID", -1} + , + {"PM_MRK_LSU0_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_UST", -1} + , + {"PM_MRK_LSU0_FLUSH_ULD", -1} + , + {"PM_MRK_LSU1_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_ULD", -1} + , + {"PM_MRK_LSU1_FLUSH_UST", -1} + , + {"PM_MRK_LSU_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU_FLUSH_UST", -1} + , + {"PM_MRK_LSU_SRQ_INST_VALID", -1} + , + {"PM_MRK_ST_CMPL_INT", -1} + , + {"PM_PMC2_OVERFLOW", -1} + , + {"PM_PMC6_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L275_SHR", -1} + , + {"PM_PTEG_FROM_L375_SHR", -1} + , + {"PM_SNOOP_PARTIAL_RTRY_QFULL", -1} + , + {"PM_SNOOP_PW_RETRY_WQ_PWQ", -1} + , + {"PM_SNOOP_RD_RETRY_WQ", -1} + , + {"PM_SNOOP_WR_RETRY_QFULL", -1} + , + {"PM_SNOOP_WR_RETRY_WQ", -1} + , + {"PM_STOP_COMPLETION", -1} + , + {"PM_ST_MISS_L1", -1} + , + {"PM_ST_REF_L1", -1} + , + {"PM_ST_REF_L1_LSU0", -1} + , + {"PM_ST_REF_L1_LSU1", -1} + , + {"PM_CLB_EMPTY_CYC", -1} + , + {"PM_THRD_L2MISS_BOTH_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_0_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_1or2_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_3or4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_5or6_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus1or2_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus3or4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus5or6_CYC", -1} + , + {"PM_THRD_SEL_OVER_CLB_EMPTY", -1} + , + {"PM_THRD_SEL_OVER_GCT_IMBAL", -1} + , + {"PM_THRD_SEL_OVER_ISU_HOLD", -1} + , + {"PM_THRD_SEL_OVER_L2MISS", -1} + , + {"PM_THRD_SEL_T0", -1} + , + {"PM_THRD_SEL_T1", -1} + , + {"PM_THRD_SMT_HANG", -1} + , + {"PM_THRESH_TIMEO", -1} + , + {"PM_TLBIE_HELD", -1} + , + {"PM_DATA_FROM_L2MISS", -1} + , + {"PM_MRK_DATA_FROM_L2MISS", -1} + , + {"PM_PTEG_FROM_L2MISS", -1} + , + {"PM_0INST_FETCH", -1} + , + {"PM_BR_PRED_CR_TA", -1} + , + {"PM_CMPLU_STALL_DIV", -1} + , + {"PM_CMPLU_STALL_ERAT_MISS", -1} + , + {"PM_CMPLU_STALL_FPU", -1} + , + {"PM_CMPLU_STALL_REJECT", -1} + , + {"PM_EXT_INT", -1} + , + {"PM_FPU_FEST", -1} + , + {"PM_FPU_FIN", -1} + , + {"PM_FXLS_FULL_CYC", -1} + , + {"PM_FXU1_BUSY_FXU0_IDLE", -1} + , + {"PM_GCT_NOSLOT_BR_MPRED", -1} + , + {"PM_INST_FROM_L275_MOD", -1} + , + {"PM_INST_FROM_L375_MOD", -1} + , + {"PM_INST_FROM_RMEM", -1} + , + {"PM_LD_REF_L1", -1} + , + {"PM_LSU_LDF", -1} + , + {"PM_LSU_SRQ_EMPTY_CYC", -1} + , + {"PM_MRK_CRU_FIN", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L275_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L35_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_L375_MOD_CYC", -1} + , + {"PM_MRK_DATA_FROM_LMEM_CYC", -1} + , + {"PM_MRK_DATA_FROM_RMEM_CYC", -1} + , + {"PM_MRK_GRP_CMPL", -1} + , + {"PM_MRK_GRP_IC_MISS", -1} + , + {"PM_MRK_GRP_TIMEO", -1} + , + {"PM_MRK_LSU_FIN", -1} + , + {"PM_MRK_LSU_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU_FLUSH_ULD", -1} + , + {"PM_PMC3_OVERFLOW", -1} + , + {"PM_WORK_HELD", -1} +}; diff --git a/src/components/perfctr_ppc/power6_events.h b/src/components/perfctr_ppc/power6_events.h new file mode 100644 index 0000000..4afbbc3 --- /dev/null +++ b/src/components/perfctr_ppc/power6_events.h @@ -0,0 +1,588 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +#ifndef _PAPI_POWER6_EVENTS_H +#define _PAPI_POWER6_EVENTS_H + +/* +* File: power6_events.h +* CVS: +* Author: Corey Ashford +* cjashfor@us.ibm.com +* Mods: +* +* +* (C) Copyright IBM Corporation, 2007. All Rights Reserved. +* Contributed by Corey Ashford +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "papiStdEventDefs.h" + +#define GROUP_INTS 7 +#define PAPI_MAX_NATIVE_EVENTS 1024 +#define MAX_GROUPS (GROUP_INTS * 32) +#define MAX_NATNAME_MAP_INDEX 553 + + +enum native_name +{ + PNE_PM_0INST_FETCH = PAPI_NATIVE_MASK, + PNE_PM_1PLUS_PPC_CMPL, + PNE_PM_1PLUS_PPC_DISP, + PNE_PM_BRU_FIN, + PNE_PM_BR_MPRED_CCACHE, + PNE_PM_BR_MPRED_COUNT, + PNE_PM_BR_MPRED_CR, + PNE_PM_BR_MPRED_TA, + PNE_PM_BR_PRED, + PNE_PM_BR_PRED_CCACHE, + PNE_PM_BR_PRED_CR, + PNE_PM_BR_PRED_LSTACK, + PNE_PM_CYC, + PNE_PM_DATA_FROM_L2, + PNE_PM_DATA_FROM_L35_MOD, + PNE_PM_DATA_FROM_MEM_DP, + PNE_PM_DATA_FROM_RL2L3_MOD, + PNE_PM_DATA_PTEG_1ST_HALF, + PNE_PM_DATA_PTEG_2ND_HALF, + PNE_PM_DATA_PTEG_SECONDARY, + PNE_PM_DC_INV_L2, + PNE_PM_DC_PREF_OUT_OF_STREAMS, + PNE_PM_DC_PREF_STREAM_ALLOC, + PNE_PM_DFU_ADD, + PNE_PM_DFU_ADD_SHIFTED_BOTH, + PNE_PM_DFU_BACK2BACK, + PNE_PM_DFU_CONV, + PNE_PM_DFU_ENC_BCD_DPD, + PNE_PM_DFU_EXP_EQ, + PNE_PM_DFU_FIN, + PNE_PM_DFU_SUBNORM, + PNE_PM_DPU_HELD_COMPLETION, + PNE_PM_DPU_HELD_CR_LOGICAL, + PNE_PM_DPU_HELD_CW, + PNE_PM_DPU_HELD_FPQ, + PNE_PM_DPU_HELD_FPU_CR, + PNE_PM_DPU_HELD_FP_FX_MULT, + PNE_PM_DPU_HELD_FXU_MULTI, + PNE_PM_DPU_HELD_FXU_SOPS, + PNE_PM_DPU_HELD_GPR, + PNE_PM_DPU_HELD_INT, + PNE_PM_DPU_HELD_ISYNC, + PNE_PM_DPU_HELD_ITLB_ISLB, + PNE_PM_DPU_HELD_LLA_END, + PNE_PM_DPU_HELD_LSU, + PNE_PM_DPU_HELD_LSU_SOPS, + PNE_PM_DPU_HELD_MULT_GPR, + PNE_PM_DPU_HELD_RESTART, + PNE_PM_DPU_HELD_RU_WQ, + PNE_PM_DPU_HELD_SMT, + PNE_PM_DPU_HELD_SPR, + PNE_PM_DPU_HELD_STCX_CR, + PNE_PM_DPU_HELD_THERMAL, + PNE_PM_DPU_HELD_THRD_PRIO, + PNE_PM_DPU_HELD_XER, + PNE_PM_DPU_HELD_XTHRD, + PNE_PM_DSLB_MISS, + PNE_PM_EE_OFF_EXT_INT, + PNE_PM_FAB_ADDR_COLLISION, + PNE_PM_FAB_CMD_ISSUED, + PNE_PM_FAB_DCLAIM, + PNE_PM_FAB_DMA, + PNE_PM_FAB_MMIO, + PNE_PM_FAB_NODE_PUMP, + PNE_PM_FAB_RETRY_NODE_PUMP, + PNE_PM_FAB_RETRY_SYS_PUMP, + PNE_PM_FAB_SYS_PUMP, + PNE_PM_FLUSH, + PNE_PM_FLUSH_ASYNC, + PNE_PM_FLUSH_FPU, + PNE_PM_FLUSH_FXU, + PNE_PM_FPU0_1FLOP, + PNE_PM_FPU0_DENORM, + PNE_PM_FPU0_FCONV, + PNE_PM_FPU0_FEST, + PNE_PM_FPU0_FIN, + PNE_PM_FPU0_FLOP, + PNE_PM_FPU0_FMA, + PNE_PM_FPU0_FPSCR, + PNE_PM_FPU0_FRSP, + PNE_PM_FPU0_FSQRT_FDIV, + PNE_PM_FPU0_FXDIV, + PNE_PM_FPU0_FXMULT, + PNE_PM_FPU0_SINGLE, + PNE_PM_FPU0_STF, + PNE_PM_FPU0_ST_FOLDED, + PNE_PM_FPU1_1FLOP, + PNE_PM_FPU1_DENORM, + PNE_PM_FPU1_FCONV, + PNE_PM_FPU1_FEST, + PNE_PM_FPU1_FIN, + PNE_PM_FPU1_FLOP, + PNE_PM_FPU1_FMA, + PNE_PM_FPU1_FPSCR, + PNE_PM_FPU1_FRSP, + PNE_PM_FPU1_FSQRT_FDIV, + PNE_PM_FPU1_FXDIV, + PNE_PM_FPU1_FXMULT, + PNE_PM_FPU1_SINGLE, + PNE_PM_FPU1_STF, + PNE_PM_FPU1_ST_FOLDED, + PNE_PM_FPU_1FLOP, + PNE_PM_FPU_FCONV, + PNE_PM_FPU_FIN, + PNE_PM_FPU_FLOP, + PNE_PM_FPU_FXDIV, + PNE_PM_FPU_FXMULT, + PNE_PM_FPU_ISSUE_0, + PNE_PM_FPU_ISSUE_1, + PNE_PM_FPU_ISSUE_2, + PNE_PM_FPU_ISSUE_DIV_SQRT_OVERLAP, + PNE_PM_FPU_ISSUE_OOO, + PNE_PM_FPU_ISSUE_STALL_FPR, + PNE_PM_FPU_ISSUE_STALL_ST, + PNE_PM_FPU_ISSUE_STALL_THRD, + PNE_PM_FPU_ISSUE_STEERING, + PNE_PM_FPU_ISSUE_ST_FOLDED, + PNE_PM_FXU_IDLE, + PNE_PM_FXU_PIPELINED_MULT_DIV, + PNE_PM_GCT_EMPTY_CYC, + PNE_PM_GCT_FULL_CYC, + PNE_PM_GCT_NOSLOT_CYC, + PNE_PM_GXI_ADDR_CYC_BUSY, + PNE_PM_GXI_CYC_BUSY, + PNE_PM_GXI_DATA_CYC_BUSY, + PNE_PM_GXO_ADDR_CYC_BUSY, + PNE_PM_GXO_CYC_BUSY, + PNE_PM_GXO_DATA_CYC_BUSY, + PNE_PM_GX_DMA_READ, + PNE_PM_GX_DMA_WRITE, + PNE_PM_IBUF_FULL_CYC, + PNE_PM_IC_DEMAND_L2_BHT_REDIRECT, + PNE_PM_IC_DEMAND_L2_BR_REDIRECT, + PNE_PM_IC_PREF_REQ, + PNE_PM_IC_PREF_WRITE, + PNE_PM_IC_RELOAD_SHR, + PNE_PM_IC_REQ, + PNE_PM_IERAT_MISS, + PNE_PM_IFU_FIN, + PNE_PM_INST_CMPL, + PNE_PM_INST_DISP_LLA, + PNE_PM_INST_FETCH_CYC, + PNE_PM_INST_FROM_L1, + PNE_PM_INST_FROM_L2, + PNE_PM_INST_FROM_L35_MOD, + PNE_PM_INST_FROM_MEM_DP, + PNE_PM_INST_FROM_RL2L3_MOD, + PNE_PM_INST_IMC_MATCH_CMPL, + PNE_PM_INST_PTEG_1ST_HALF, + PNE_PM_INST_PTEG_2ND_HALF, + PNE_PM_INST_PTEG_SECONDARY, + PNE_PM_INST_TABLEWALK_CYC, + PNE_PM_ISLB_MISS, + PNE_PM_ITLB_REF, + PNE_PM_L1_ICACHE_MISS, + PNE_PM_L1_PREF, + PNE_PM_L1_WRITE_CYC, + PNE_PM_L2SA_CASTOUT_MOD, + PNE_PM_L2SA_CASTOUT_SHR, + PNE_PM_L2SA_DC_INV, + PNE_PM_L2SA_IC_INV, + PNE_PM_L2SA_LD_HIT, + PNE_PM_L2SA_LD_MISS_DATA, + PNE_PM_L2SA_LD_MISS_INST, + PNE_PM_L2SA_LD_REQ, + PNE_PM_L2SA_LD_REQ_DATA, + PNE_PM_L2SA_LD_REQ_INST, + PNE_PM_L2SA_MISS, + PNE_PM_L2SA_ST_HIT, + PNE_PM_L2SA_ST_MISS, + PNE_PM_L2SA_ST_REQ, + PNE_PM_L2SB_CASTOUT_MOD, + PNE_PM_L2SB_CASTOUT_SHR, + PNE_PM_L2SB_DC_INV, + PNE_PM_L2SB_IC_INV, + PNE_PM_L2SB_LD_HIT, + PNE_PM_L2SB_LD_MISS_DATA, + PNE_PM_L2SB_LD_MISS_INST, + PNE_PM_L2SB_LD_REQ, + PNE_PM_L2SB_LD_REQ_DATA, + PNE_PM_L2SB_LD_REQ_INST, + PNE_PM_L2SB_MISS, + PNE_PM_L2SB_ST_HIT, + PNE_PM_L2SB_ST_MISS, + PNE_PM_L2SB_ST_REQ, + PNE_PM_L2_CASTOUT_MOD, + PNE_PM_L2_LD_REQ_DATA, + PNE_PM_L2_LD_REQ_INST, + PNE_PM_L2_PREF_LD, + PNE_PM_L2_PREF_ST, + PNE_PM_L2_ST_MISS_DATA, + PNE_PM_L3SA_HIT, + PNE_PM_L3SA_MISS, + PNE_PM_L3SA_REF, + PNE_PM_L3SB_HIT, + PNE_PM_L3SB_MISS, + PNE_PM_L3SB_REF, + PNE_PM_LARX, + PNE_PM_LARX_L1HIT, + PNE_PM_LD_MISS_L1, + PNE_PM_LD_MISS_L1_CYC, + PNE_PM_LD_REF_L1, + PNE_PM_LD_REF_L1_BOTH, + PNE_PM_LD_REQ_L2, + PNE_PM_LSU0_DERAT_MISS, + PNE_PM_LSU0_LDF, + PNE_PM_LSU0_NCLD, + PNE_PM_LSU0_NCST, + PNE_PM_LSU0_REJECT, + PNE_PM_LSU0_REJECT_DERAT_MPRED, + PNE_PM_LSU0_REJECT_EXTERN, + PNE_PM_LSU0_REJECT_L2MISS, + PNE_PM_LSU0_REJECT_L2_CORR, + PNE_PM_LSU0_REJECT_LHS, + PNE_PM_LSU0_REJECT_NO_SCRATCH, + PNE_PM_LSU0_REJECT_PARTIAL_SECTOR, + PNE_PM_LSU0_REJECT_SET_MPRED, + PNE_PM_LSU0_REJECT_STQ_FULL, + PNE_PM_LSU0_REJECT_ULD, + PNE_PM_LSU0_REJECT_UST, + PNE_PM_LSU1_DERAT_MISS, + PNE_PM_LSU1_LDF, + PNE_PM_LSU1_REJECT, + PNE_PM_LSU1_REJECT_DERAT_MPRED, + PNE_PM_LSU1_REJECT_EXTERN, + PNE_PM_LSU1_REJECT_L2_CORR, + PNE_PM_LSU1_REJECT_LHS, + PNE_PM_LSU1_REJECT_NO_SCRATCH, + PNE_PM_LSU1_REJECT_PARTIAL_SECTOR, + PNE_PM_LSU1_REJECT_SET_MPRED, + PNE_PM_LSU1_REJECT_STQ_FULL, + PNE_PM_LSU1_REJECT_ULD, + PNE_PM_LSU1_REJECT_UST, + PNE_PM_LSU_BOTH_BUS, + PNE_PM_LSU_DERAT_MISS_CYC, + PNE_PM_LSU_FLUSH_ALIGN, + PNE_PM_LSU_FLUSH_DSI, + PNE_PM_LSU_LDF_BOTH, + PNE_PM_LSU_LMQ_FULL_CYC, + PNE_PM_LSU_REJECT_L2_CORR, + PNE_PM_LSU_REJECT_LHS, + PNE_PM_LSU_REJECT_PARTIAL_SECTOR, + PNE_PM_LSU_REJECT_STEAL, + PNE_PM_LSU_REJECT_STQ_FULL, + PNE_PM_LSU_REJECT_ULD, + PNE_PM_LSU_REJECT_UST_BOTH, + PNE_PM_LSU_ST_CHAINED, + PNE_PM_LWSYNC, + PNE_PM_MEM0_DP_CL_WR_GLOB, + PNE_PM_MEM0_DP_CL_WR_LOC, + PNE_PM_MEM0_DP_RQ_GLOB_LOC, + PNE_PM_MEM0_DP_RQ_LOC_GLOB, + PNE_PM_MEM1_DP_CL_WR_GLOB, + PNE_PM_MEM1_DP_CL_WR_LOC, + PNE_PM_MEM1_DP_RQ_GLOB_LOC, + PNE_PM_MEM1_DP_RQ_LOC_GLOB, + PNE_PM_MEM_DP_CL_WR_LOC, + PNE_PM_MEM_DP_RQ_GLOB_LOC, + PNE_PM_MRK_BR_TAKEN, + PNE_PM_MRK_DATA_FROM_L2, + PNE_PM_MRK_DATA_FROM_L2MISS, + PNE_PM_MRK_DATA_FROM_L35_MOD, + PNE_PM_MRK_DATA_FROM_MEM_DP, + PNE_PM_MRK_DATA_FROM_RL2L3_MOD, + PNE_PM_MRK_DTLB_REF, + PNE_PM_MRK_FPU0_FIN, + PNE_PM_MRK_FPU1_FIN, + PNE_PM_MRK_INST_DISP, + PNE_PM_MRK_INST_ISSUED, + PNE_PM_MRK_LSU0_REJECT_L2MISS, + PNE_PM_MRK_LSU0_REJECT_LHS, + PNE_PM_MRK_LSU0_REJECT_ULD, + PNE_PM_MRK_LSU0_REJECT_UST, + PNE_PM_MRK_LSU1_REJECT_LHS, + PNE_PM_MRK_LSU1_REJECT_ULD, + PNE_PM_MRK_LSU1_REJECT_UST, + PNE_PM_MRK_LSU_REJECT_ULD, + PNE_PM_MRK_PTEG_FROM_L2, + PNE_PM_MRK_PTEG_FROM_L35_MOD, + PNE_PM_MRK_PTEG_FROM_MEM_DP, + PNE_PM_MRK_PTEG_FROM_RL2L3_MOD, + PNE_PM_MRK_STCX_FAIL, + PNE_PM_MRK_ST_CMPL, + PNE_PM_MRK_VMX0_LD_WRBACK, + PNE_PM_MRK_VMX1_LD_WRBACK, + PNE_PM_MRK_VMX_COMPLEX_ISSUED, + PNE_PM_MRK_VMX_FLOAT_ISSUED, + PNE_PM_MRK_VMX_PERMUTE_ISSUED, + PNE_PM_MRK_VMX_SIMPLE_ISSUED, + PNE_PM_MRK_VMX_ST_ISSUED, + PNE_PM_NO_ITAG_CYC, + PNE_PM_PMC2_SAVED, + PNE_PM_PMC4_OVERFLOW, + PNE_PM_PMC4_REWIND, + PNE_PM_PMC5_OVERFLOW, + PNE_PM_PTEG_FROM_L2, + PNE_PM_PTEG_FROM_L2MISS, + PNE_PM_PTEG_FROM_L35_MOD, + PNE_PM_PTEG_FROM_MEM_DP, + PNE_PM_PTEG_FROM_RL2L3_MOD, + PNE_PM_PTEG_RELOAD_VALID, + PNE_PM_PURR, + PNE_PM_RUN_CYC, + PNE_PM_SLB_MISS, + PNE_PM_STCX, + PNE_PM_STCX_CANCEL, + PNE_PM_STCX_FAIL, + PNE_PM_ST_FIN, + PNE_PM_ST_HIT_L2, + PNE_PM_ST_MISS_L1, + PNE_PM_ST_REF_L1, + PNE_PM_SUSPENDED, + PNE_PM_SYNC_CYC, + PNE_PM_TB_BIT_TRANS, + PNE_PM_THRD_L2MISS, + PNE_PM_THRD_ONE_RUN_CYC, + PNE_PM_THRD_PRIO_0_CYC, + PNE_PM_THRD_PRIO_7_CYC, + PNE_PM_THRD_PRIO_DIFF_0_CYC, + PNE_PM_THRD_SEL_T0, + PNE_PM_TLB_REF, + PNE_PM_VMX0_INST_ISSUED, + PNE_PM_VMX0_LD_ISSUED, + PNE_PM_VMX0_LD_WRBACK, + PNE_PM_VMX0_STALL, + PNE_PM_VMX1_INST_ISSUED, + PNE_PM_VMX1_LD_ISSUED, + PNE_PM_VMX1_LD_WRBACK, + PNE_PM_VMX1_STALL, + PNE_PM_VMX_COMPLEX_ISUED, + PNE_PM_VMX_FLOAT_ISSUED, + PNE_PM_VMX_FLOAT_MULTICYCLE, + PNE_PM_VMX_PERMUTE_ISSUED, + PNE_PM_VMX_RESULT_SAT_0_1, + PNE_PM_VMX_RESULT_SAT_1, + PNE_PM_VMX_SIMPLE_ISSUED, + PNE_PM_VMX_ST_ISSUED, + PNE_PM_0INST_FETCH_COUNT, + PNE_PM_IBUF_FULL_COUNT, + PNE_PM_GCT_FULL_COUNT, + PNE_PM_NO_ITAG_COUNT, + PNE_PM_INST_TABLEWALK_COUNT, + PNE_PM_SYNC_COUNT, + PNE_PM_RUN_COUNT, + PNE_PM_THRD_ONE_RUN_COUNT, + PNE_PM_LLA_CYC, + PNE_PM_NOT_LLA_CYC, + PNE_PM_LLA_COUNT, + PNE_PM_DPU_HELD_THERMAL_COUNT, + PNE_PM_GCT_NOSLOT_COUNT, + PNE_PM_DERAT_REF_4K, + PNE_PM_DERAT_MISS_4K, + PNE_PM_IERAT_MISS_16G, + PNE_PM_MRK_DERAT_REF_64K, + PNE_PM_MRK_DERAT_MISS_64K, + PNE_PM_BR_TAKEN, + PNE_PM_DATA_FROM_DL2L3_SHR_CYC, + PNE_PM_DATA_FROM_DMEM, + PNE_PM_DATA_FROM_DMEM_CYC, + PNE_PM_DATA_FROM_L21, + PNE_PM_DATA_FROM_L25_SHR_CYC, + PNE_PM_DATA_FROM_L2MISS, + PNE_PM_DATA_FROM_L2_CYC, + PNE_PM_DATA_FROM_L35_SHR, + PNE_PM_DATA_FROM_L35_SHR_CYC, + PNE_PM_DATA_FROM_L3_CYC, + PNE_PM_DATA_FROM_LMEM_CYC, + PNE_PM_DATA_FROM_RL2L3_SHR, + PNE_PM_DATA_FROM_RL2L3_SHR_CYC, + PNE_PM_DPU_HELD, + PNE_PM_DPU_HELD_POWER, + PNE_PM_DPU_WT_IC_MISS, + PNE_PM_EXT_INT, + PNE_PM_FAB_CMD_RETRIED, + PNE_PM_FPU_DENORM, + PNE_PM_FPU_FMA, + PNE_PM_FPU_FPSCR, + PNE_PM_FPU_FRSP, + PNE_PM_FPU_FSQRT_FDIV, + PNE_PM_FXU_BUSY, + PNE_PM_HV_CYC, + PNE_PM_IC_INV_L2, + PNE_PM_INST_DISP, + PNE_PM_INST_FROM_DMEM, + PNE_PM_INST_FROM_L21, + PNE_PM_INST_FROM_L35_SHR, + PNE_PM_INST_FROM_RL2L3_SHR, + PNE_PM_L2_CASTOUT_SHR, + PNE_PM_L2_LD_MISS_DATA, + PNE_PM_L2_LD_MISS_INST, + PNE_PM_L2_MISS, + PNE_PM_L2_ST_REQ_DATA, + PNE_PM_LD_HIT_L2, + PNE_PM_LSU_DERAT_MISS, + PNE_PM_LSU_LDF, + PNE_PM_LSU_LMQ_SRQ_EMPTY_CYC, + PNE_PM_LSU_REJECT_DERAT_MPRED, + PNE_PM_LSU_REJECT_LHS_BOTH, + PNE_PM_LSU_REJECT_NO_SCRATCH, + PNE_PM_LSU_REJECT_SET_MPRED, + PNE_PM_LSU_REJECT_SLOW, + PNE_PM_LSU_REJECT_ULD_BOTH, + PNE_PM_LSU_REJECT_UST, + PNE_PM_MEM_DP_CL_WR_GLOB, + PNE_PM_MEM_DP_RQ_LOC_GLOB, + PNE_PM_MRK_DATA_FROM_DMEM, + PNE_PM_MRK_DATA_FROM_L21, + PNE_PM_MRK_DATA_FROM_L35_SHR, + PNE_PM_MRK_DATA_FROM_RL2L3_SHR, + PNE_PM_MRK_FPU_FIN, + PNE_PM_MRK_FXU_FIN, + PNE_PM_MRK_IFU_FIN, + PNE_PM_MRK_LD_MISS_L1, + PNE_PM_MRK_LSU_REJECT_UST, + PNE_PM_MRK_PTEG_FROM_DMEM, + PNE_PM_MRK_PTEG_FROM_L21, + PNE_PM_MRK_PTEG_FROM_L35_SHR, + PNE_PM_MRK_PTEG_FROM_RL2L3_SHR, + PNE_PM_MRK_ST_GPS, + PNE_PM_PMC1_OVERFLOW, + PNE_PM_PTEG_FROM_DMEM, + PNE_PM_PTEG_FROM_L21, + PNE_PM_PTEG_FROM_L35_SHR, + PNE_PM_PTEG_FROM_RL2L3_SHR, + PNE_PM_ST_REF_L1_BOTH, + PNE_PM_ST_REQ_L2, + PNE_PM_THRD_GRP_CMPL_BOTH_CYC, + PNE_PM_THRD_PRIO_1_CYC, + PNE_PM_THRD_PRIO_6_CYC, + PNE_PM_THRD_PRIO_DIFF_1or2_CYC, + PNE_PM_THRD_PRIO_DIFF_minus1or2_CYC, + PNE_PM_HV_COUNT, + PNE_PM_DPU_HELD_COUNT, + PNE_PM_DPU_HELD_POWER_COUNT, + PNE_PM_DPU_WT_IC_MISS_COUNT, + PNE_PM_GCT_EMPTY_COUNT, + PNE_PM_LSU_LMQ_SRQ_EMPTY_COUNT, + PNE_PM_DERAT_REF_64K, + PNE_PM_DERAT_MISS_64K, + PNE_PM_IERAT_MISS_16M, + PNE_PM_MRK_DERAT_REF_4K, + PNE_PM_MRK_DERAT_MISS_4K, + PNE_PM_DATA_FROM_DL2L3_SHR, + PNE_PM_DATA_FROM_L25_MOD, + PNE_PM_DATA_FROM_L3, + PNE_PM_DATA_FROM_L3MISS, + PNE_PM_DATA_FROM_RMEM, + PNE_PM_DPU_WT, + PNE_PM_FPU_STF, + PNE_PM_FPU_ST_FOLDED, + PNE_PM_FREQ_DOWN, + PNE_PM_FXU0_BUSY_FXU1_IDLE, + PNE_PM_FXU0_FIN, + PNE_PM_INST_FROM_DL2L3_SHR, + PNE_PM_INST_FROM_L25_MOD, + PNE_PM_INST_FROM_L3, + PNE_PM_INST_FROM_L3MISS, + PNE_PM_INST_FROM_RMEM, + PNE_PM_L1_DCACHE_RELOAD_VALID, + PNE_PM_LSU_LMQ_SRQ_EMPTY_BOTH_CYC, + PNE_PM_LSU_REJECT_EXTERN, + PNE_PM_LSU_REJECT_FAST, + PNE_PM_MRK_BR_MPRED, + PNE_PM_MRK_DATA_FROM_DL2L3_SHR, + PNE_PM_MRK_DATA_FROM_L25_MOD, + PNE_PM_MRK_DATA_FROM_L3, + PNE_PM_MRK_DATA_FROM_L3MISS, + PNE_PM_MRK_DATA_FROM_RMEM, + PNE_PM_MRK_DFU_FIN, + PNE_PM_MRK_INST_FIN, + PNE_PM_MRK_PTEG_FROM_DL2L3_SHR, + PNE_PM_MRK_PTEG_FROM_L25_MOD, + PNE_PM_MRK_PTEG_FROM_L3, + PNE_PM_MRK_PTEG_FROM_L3MISS, + PNE_PM_MRK_PTEG_FROM_RMEM, + PNE_PM_MRK_ST_CMPL_INT, + PNE_PM_PMC2_OVERFLOW, + PNE_PM_PMC2_REWIND, + PNE_PM_PMC4_SAVED, + PNE_PM_PMC6_OVERFLOW, + PNE_PM_PTEG_FROM_DL2L3_SHR, + PNE_PM_PTEG_FROM_L25_MOD, + PNE_PM_PTEG_FROM_L3, + PNE_PM_PTEG_FROM_L3MISS, + PNE_PM_PTEG_FROM_RMEM, + PNE_PM_THERMAL_MAX, + PNE_PM_THRD_CONC_RUN_INST, + PNE_PM_THRD_PRIO_2_CYC, + PNE_PM_THRD_PRIO_5_CYC, + PNE_PM_THRD_PRIO_DIFF_3or4_CYC, + PNE_PM_THRD_PRIO_DIFF_minus3or4_CYC, + PNE_PM_THRESH_TIMEO, + PNE_PM_DPU_WT_COUNT, + PNE_PM_LSU_LMQ_SRQ_EMPTY_BOTH_COUNT, + PNE_PM_DERAT_REF_16M, + PNE_PM_DERAT_MISS_16M, + PNE_PM_IERAT_MISS_64K, + PNE_PM_MRK_DERAT_REF_16M, + PNE_PM_MRK_DERAT_MISS_16M, + PNE_PM_BR_MPRED, + PNE_PM_DATA_FROM_DL2L3_MOD, + PNE_PM_DATA_FROM_DL2L3_MOD_CYC, + PNE_PM_DATA_FROM_L21_CYC, + PNE_PM_DATA_FROM_L25_SHR, + PNE_PM_DATA_FROM_L25_MOD_CYC, + PNE_PM_DATA_FROM_L35_MOD_CYC, + PNE_PM_DATA_FROM_LMEM, + PNE_PM_DATA_FROM_MEM_DP_CYC, + PNE_PM_DATA_FROM_RL2L3_MOD_CYC, + PNE_PM_DATA_FROM_RMEM_CYC, + PNE_PM_DPU_WT_BR_MPRED, + PNE_PM_FPU_FEST, + PNE_PM_FPU_SINGLE, + PNE_PM_FREQ_UP, + PNE_PM_FXU1_BUSY_FXU0_IDLE, + PNE_PM_FXU1_FIN, + PNE_PM_INST_FROM_DL2L3_MOD, + PNE_PM_INST_FROM_L25_SHR, + PNE_PM_INST_FROM_L2MISS, + PNE_PM_INST_FROM_LMEM, + PNE_PM_LSU_REJECT, + PNE_PM_LSU_SRQ_EMPTY_CYC, + PNE_PM_MRK_DATA_FROM_DL2L3_MOD, + PNE_PM_MRK_DATA_FROM_L25_SHR, + PNE_PM_MRK_DATA_FROM_LMEM, + PNE_PM_MRK_INST_TIMEO, + PNE_PM_MRK_LSU_DERAT_MISS, + PNE_PM_MRK_LSU_FIN, + PNE_PM_MRK_LSU_REJECT_LHS, + PNE_PM_MRK_PTEG_FROM_DL2L3_MOD, + PNE_PM_MRK_PTEG_FROM_L25_SHR, + PNE_PM_MRK_PTEG_FROM_L2MISS, + PNE_PM_MRK_PTEG_FROM_LMEM, + PNE_PM_PMC3_OVERFLOW, + PNE_PM_PTEG_FROM_DL2L3_MOD, + PNE_PM_PTEG_FROM_L25_SHR, + PNE_PM_PTEG_FROM_LMEM, + PNE_PM_THRD_BOTH_RUN_CYC, + PNE_PM_THRD_LLA_BOTH_CYC, + PNE_PM_THRD_PRIO_3_CYC, + PNE_PM_THRD_PRIO_4_CYC, + PNE_PM_THRD_PRIO_DIFF_5or6_CYC, + PNE_PM_THRD_PRIO_DIFF_minus5or6_CYC, + PNE_PM_THRD_BOTH_RUN_COUNT, + PNE_PM_DPU_WT_BR_MPRED_COUNT, + PNE_PM_LSU_SRQ_EMPTY_COUNT, + PNE_PM_DERAT_REF_16G, + PNE_PM_DERAT_MISS_16G, + PNE_PM_IERAT_MISS_4K, + PNE_PM_MRK_DERAT_REF_16G, + PNE_PM_MRK_DERAT_MISS_16G, + PNE_PM_RUN_PURR, + PNE_PM_RUN_INST_CMPL, + NATNAME_GUARD, +}; + +#endif diff --git a/src/components/perfctr_ppc/power6_events_map.c b/src/components/perfctr_ppc/power6_events_map.c new file mode 100644 index 0000000..02ff5fd --- /dev/null +++ b/src/components/perfctr_ppc/power6_events_map.c @@ -0,0 +1,1126 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: power6_events_map.c +* Author: Corey Ashford +* cjashfor@us.ibm.com +* Mods: +* +* +* (C) Copyright IBM Corporation, 2007. All Rights Reserved. +* Contributed by Corey Ashford +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "perfctr-ppc64.h" + +PPC64_native_map_t native_name_map[PAPI_MAX_NATIVE_EVENTS] = { + {"PM_0INST_FETCH", -1} + , + {"PM_1PLUS_PPC_CMPL", -1} + , + {"PM_1PLUS_PPC_DISP", -1} + , + {"PM_BRU_FIN", -1} + , + {"PM_BR_MPRED_CCACHE", -1} + , + {"PM_BR_MPRED_COUNT", -1} + , + {"PM_BR_MPRED_CR", -1} + , + {"PM_BR_MPRED_TA", -1} + , + {"PM_BR_PRED", -1} + , + {"PM_BR_PRED_CCACHE", -1} + , + {"PM_BR_PRED_CR", -1} + , + {"PM_BR_PRED_LSTACK", -1} + , + {"PM_CYC", -1} + , + {"PM_DATA_FROM_L2", -1} + , + {"PM_DATA_FROM_L35_MOD", -1} + , + {"PM_DATA_FROM_MEM_DP", -1} + , + {"PM_DATA_FROM_RL2L3_MOD", -1} + , + {"PM_DATA_PTEG_1ST_HALF", -1} + , + {"PM_DATA_PTEG_2ND_HALF", -1} + , + {"PM_DATA_PTEG_SECONDARY", -1} + , + {"PM_DC_INV_L2", -1} + , + {"PM_DC_PREF_OUT_OF_STREAMS", -1} + , + {"PM_DC_PREF_STREAM_ALLOC", -1} + , + {"PM_DFU_ADD", -1} + , + {"PM_DFU_ADD_SHIFTED_BOTH", -1} + , + {"PM_DFU_BACK2BACK", -1} + , + {"PM_DFU_CONV", -1} + , + {"PM_DFU_ENC_BCD_DPD", -1} + , + {"PM_DFU_EXP_EQ", -1} + , + {"PM_DFU_FIN", -1} + , + {"PM_DFU_SUBNORM", -1} + , + {"PM_DPU_HELD_COMPLETION", -1} + , + {"PM_DPU_HELD_CR_LOGICAL", -1} + , + {"PM_DPU_HELD_CW", -1} + , + {"PM_DPU_HELD_FPQ", -1} + , + {"PM_DPU_HELD_FPU_CR", -1} + , + {"PM_DPU_HELD_FP_FX_MULT", -1} + , + {"PM_DPU_HELD_FXU_MULTI", -1} + , + {"PM_DPU_HELD_FXU_SOPS", -1} + , + {"PM_DPU_HELD_GPR", -1} + , + {"PM_DPU_HELD_INT", -1} + , + {"PM_DPU_HELD_ISYNC", -1} + , + {"PM_DPU_HELD_ITLB_ISLB", -1} + , + {"PM_DPU_HELD_LLA_END", -1} + , + {"PM_DPU_HELD_LSU", -1} + , + {"PM_DPU_HELD_LSU_SOPS", -1} + , + {"PM_DPU_HELD_MULT_GPR", -1} + , + {"PM_DPU_HELD_RESTART", -1} + , + {"PM_DPU_HELD_RU_WQ", -1} + , + {"PM_DPU_HELD_SMT", -1} + , + {"PM_DPU_HELD_SPR", -1} + , + {"PM_DPU_HELD_STCX_CR", -1} + , + {"PM_DPU_HELD_THERMAL", -1} + , + {"PM_DPU_HELD_THRD_PRIO", -1} + , + {"PM_DPU_HELD_XER", -1} + , + {"PM_DPU_HELD_XTHRD", -1} + , + {"PM_DSLB_MISS", -1} + , + {"PM_EE_OFF_EXT_INT", -1} + , + {"PM_FAB_ADDR_COLLISION", -1} + , + {"PM_FAB_CMD_ISSUED", -1} + , + {"PM_FAB_DCLAIM", -1} + , + {"PM_FAB_DMA", -1} + , + {"PM_FAB_MMIO", -1} + , + {"PM_FAB_NODE_PUMP", -1} + , + {"PM_FAB_RETRY_NODE_PUMP", -1} + , + {"PM_FAB_RETRY_SYS_PUMP", -1} + , + {"PM_FAB_SYS_PUMP", -1} + , + {"PM_FLUSH", -1} + , + {"PM_FLUSH_ASYNC", -1} + , + {"PM_FLUSH_FPU", -1} + , + {"PM_FLUSH_FXU", -1} + , + {"PM_FPU0_1FLOP", -1} + , + {"PM_FPU0_DENORM", -1} + , + {"PM_FPU0_FCONV", -1} + , + {"PM_FPU0_FEST", -1} + , + {"PM_FPU0_FIN", -1} + , + {"PM_FPU0_FLOP", -1} + , + {"PM_FPU0_FMA", -1} + , + {"PM_FPU0_FPSCR", -1} + , + {"PM_FPU0_FRSP", -1} + , + {"PM_FPU0_FSQRT_FDIV", -1} + , + {"PM_FPU0_FXDIV", -1} + , + {"PM_FPU0_FXMULT", -1} + , + {"PM_FPU0_SINGLE", -1} + , + {"PM_FPU0_STF", -1} + , + {"PM_FPU0_ST_FOLDED", -1} + , + {"PM_FPU1_1FLOP", -1} + , + {"PM_FPU1_DENORM", -1} + , + {"PM_FPU1_FCONV", -1} + , + {"PM_FPU1_FEST", -1} + , + {"PM_FPU1_FIN", -1} + , + {"PM_FPU1_FLOP", -1} + , + {"PM_FPU1_FMA", -1} + , + {"PM_FPU1_FPSCR", -1} + , + {"PM_FPU1_FRSP", -1} + , + {"PM_FPU1_FSQRT_FDIV", -1} + , + {"PM_FPU1_FXDIV", -1} + , + {"PM_FPU1_FXMULT", -1} + , + {"PM_FPU1_SINGLE", -1} + , + {"PM_FPU1_STF", -1} + , + {"PM_FPU1_ST_FOLDED", -1} + , + {"PM_FPU_1FLOP", -1} + , + {"PM_FPU_FCONV", -1} + , + {"PM_FPU_FIN", -1} + , + {"PM_FPU_FLOP", -1} + , + {"PM_FPU_FXDIV", -1} + , + {"PM_FPU_FXMULT", -1} + , + {"PM_FPU_ISSUE_0", -1} + , + {"PM_FPU_ISSUE_1", -1} + , + {"PM_FPU_ISSUE_2", -1} + , + {"PM_FPU_ISSUE_DIV_SQRT_OVERLAP", -1} + , + {"PM_FPU_ISSUE_OOO", -1} + , + {"PM_FPU_ISSUE_STALL_FPR", -1} + , + {"PM_FPU_ISSUE_STALL_ST", -1} + , + {"PM_FPU_ISSUE_STALL_THRD", -1} + , + {"PM_FPU_ISSUE_STEERING", -1} + , + {"PM_FPU_ISSUE_ST_FOLDED", -1} + , + {"PM_FXU_IDLE", -1} + , + {"PM_FXU_PIPELINED_MULT_DIV", -1} + , + {"PM_GCT_EMPTY_CYC", -1} + , + {"PM_GCT_FULL_CYC", -1} + , + {"PM_GCT_NOSLOT_CYC", -1} + , + {"PM_GXI_ADDR_CYC_BUSY", -1} + , + {"PM_GXI_CYC_BUSY", -1} + , + {"PM_GXI_DATA_CYC_BUSY", -1} + , + {"PM_GXO_ADDR_CYC_BUSY", -1} + , + {"PM_GXO_CYC_BUSY", -1} + , + {"PM_GXO_DATA_CYC_BUSY", -1} + , + {"PM_GX_DMA_READ", -1} + , + {"PM_GX_DMA_WRITE", -1} + , + {"PM_IBUF_FULL_CYC", -1} + , + {"PM_IC_DEMAND_L2_BHT_REDIRECT", -1} + , + {"PM_IC_DEMAND_L2_BR_REDIRECT", -1} + , + {"PM_IC_PREF_REQ", -1} + , + {"PM_IC_PREF_WRITE", -1} + , + {"PM_IC_RELOAD_SHR", -1} + , + {"PM_IC_REQ", -1} + , + {"PM_IERAT_MISS", -1} + , + {"PM_IFU_FIN", -1} + , + {"PM_INST_CMPL", -1} + , + {"PM_INST_DISP_LLA", -1} + , + {"PM_INST_FETCH_CYC", -1} + , + {"PM_INST_FROM_L1", -1} + , + {"PM_INST_FROM_L2", -1} + , + {"PM_INST_FROM_L35_MOD", -1} + , + {"PM_INST_FROM_MEM_DP", -1} + , + {"PM_INST_FROM_RL2L3_MOD", -1} + , + {"PM_INST_IMC_MATCH_CMPL", -1} + , + {"PM_INST_PTEG_1ST_HALF", -1} + , + {"PM_INST_PTEG_2ND_HALF", -1} + , + {"PM_INST_PTEG_SECONDARY", -1} + , + {"PM_INST_TABLEWALK_CYC", -1} + , + {"PM_ISLB_MISS", -1} + , + {"PM_ITLB_REF", -1} + , + {"PM_L1_ICACHE_MISS", -1} + , + {"PM_L1_PREF", -1} + , + {"PM_L1_WRITE_CYC", -1} + , + {"PM_L2SA_CASTOUT_MOD", -1} + , + {"PM_L2SA_CASTOUT_SHR", -1} + , + {"PM_L2SA_DC_INV", -1} + , + {"PM_L2SA_IC_INV", -1} + , + {"PM_L2SA_LD_HIT", -1} + , + {"PM_L2SA_LD_MISS_DATA", -1} + , + {"PM_L2SA_LD_MISS_INST", -1} + , + {"PM_L2SA_LD_REQ", -1} + , + {"PM_L2SA_LD_REQ_DATA", -1} + , + {"PM_L2SA_LD_REQ_INST", -1} + , + {"PM_L2SA_MISS", -1} + , + {"PM_L2SA_ST_HIT", -1} + , + {"PM_L2SA_ST_MISS", -1} + , + {"PM_L2SA_ST_REQ", -1} + , + {"PM_L2SB_CASTOUT_MOD", -1} + , + {"PM_L2SB_CASTOUT_SHR", -1} + , + {"PM_L2SB_DC_INV", -1} + , + {"PM_L2SB_IC_INV", -1} + , + {"PM_L2SB_LD_HIT", -1} + , + {"PM_L2SB_LD_MISS_DATA", -1} + , + {"PM_L2SB_LD_MISS_INST", -1} + , + {"PM_L2SB_LD_REQ", -1} + , + {"PM_L2SB_LD_REQ_DATA", -1} + , + {"PM_L2SB_LD_REQ_INST", -1} + , + {"PM_L2SB_MISS", -1} + , + {"PM_L2SB_ST_HIT", -1} + , + {"PM_L2SB_ST_MISS", -1} + , + {"PM_L2SB_ST_REQ", -1} + , + {"PM_L2_CASTOUT_MOD", -1} + , + {"PM_L2_LD_REQ_DATA", -1} + , + {"PM_L2_LD_REQ_INST", -1} + , + {"PM_L2_PREF_LD", -1} + , + {"PM_L2_PREF_ST", -1} + , + {"PM_L2_ST_MISS_DATA", -1} + , + {"PM_L3SA_HIT", -1} + , + {"PM_L3SA_MISS", -1} + , + {"PM_L3SA_REF", -1} + , + {"PM_L3SB_HIT", -1} + , + {"PM_L3SB_MISS", -1} + , + {"PM_L3SB_REF", -1} + , + {"PM_LARX", -1} + , + {"PM_LARX_L1HIT", -1} + , + {"PM_LD_MISS_L1", -1} + , + {"PM_LD_MISS_L1_CYC", -1} + , + {"PM_LD_REF_L1", -1} + , + {"PM_LD_REF_L1_BOTH", -1} + , + {"PM_LD_REQ_L2", -1} + , + {"PM_LSU0_DERAT_MISS", -1} + , + {"PM_LSU0_LDF", -1} + , + {"PM_LSU0_NCLD", -1} + , + {"PM_LSU0_NCST", -1} + , + {"PM_LSU0_REJECT", -1} + , + {"PM_LSU0_REJECT_DERAT_MPRED", -1} + , + {"PM_LSU0_REJECT_EXTERN", -1} + , + {"PM_LSU0_REJECT_L2MISS", -1} + , + {"PM_LSU0_REJECT_L2_CORR", -1} + , + {"PM_LSU0_REJECT_LHS", -1} + , + {"PM_LSU0_REJECT_NO_SCRATCH", -1} + , + {"PM_LSU0_REJECT_PARTIAL_SECTOR", -1} + , + {"PM_LSU0_REJECT_SET_MPRED", -1} + , + {"PM_LSU0_REJECT_STQ_FULL", -1} + , + {"PM_LSU0_REJECT_ULD", -1} + , + {"PM_LSU0_REJECT_UST", -1} + , + {"PM_LSU1_DERAT_MISS", -1} + , + {"PM_LSU1_LDF", -1} + , + {"PM_LSU1_REJECT", -1} + , + {"PM_LSU1_REJECT_DERAT_MPRED", -1} + , + {"PM_LSU1_REJECT_EXTERN", -1} + , + {"PM_LSU1_REJECT_L2_CORR", -1} + , + {"PM_LSU1_REJECT_LHS", -1} + , + {"PM_LSU1_REJECT_NO_SCRATCH", -1} + , + {"PM_LSU1_REJECT_PARTIAL_SECTOR", -1} + , + {"PM_LSU1_REJECT_SET_MPRED", -1} + , + {"PM_LSU1_REJECT_STQ_FULL", -1} + , + {"PM_LSU1_REJECT_ULD", -1} + , + {"PM_LSU1_REJECT_UST", -1} + , + {"PM_LSU_BOTH_BUS", -1} + , + {"PM_LSU_DERAT_MISS_CYC", -1} + , + {"PM_LSU_FLUSH_ALIGN", -1} + , + {"PM_LSU_FLUSH_DSI", -1} + , + {"PM_LSU_LDF_BOTH", -1} + , + {"PM_LSU_LMQ_FULL_CYC", -1} + , + {"PM_LSU_REJECT_L2_CORR", -1} + , + {"PM_LSU_REJECT_LHS", -1} + , + {"PM_LSU_REJECT_PARTIAL_SECTOR", -1} + , + {"PM_LSU_REJECT_STEAL", -1} + , + {"PM_LSU_REJECT_STQ_FULL", -1} + , + {"PM_LSU_REJECT_ULD", -1} + , + {"PM_LSU_REJECT_UST_BOTH", -1} + , + {"PM_LSU_ST_CHAINED", -1} + , + {"PM_LWSYNC", -1} + , + {"PM_MEM0_DP_CL_WR_GLOB", -1} + , + {"PM_MEM0_DP_CL_WR_LOC", -1} + , + {"PM_MEM0_DP_RQ_GLOB_LOC", -1} + , + {"PM_MEM0_DP_RQ_LOC_GLOB", -1} + , + {"PM_MEM1_DP_CL_WR_GLOB", -1} + , + {"PM_MEM1_DP_CL_WR_LOC", -1} + , + {"PM_MEM1_DP_RQ_GLOB_LOC", -1} + , + {"PM_MEM1_DP_RQ_LOC_GLOB", -1} + , + {"PM_MEM_DP_CL_WR_LOC", -1} + , + {"PM_MEM_DP_RQ_GLOB_LOC", -1} + , + {"PM_MRK_BR_TAKEN", -1} + , + {"PM_MRK_DATA_FROM_L2", -1} + , + {"PM_MRK_DATA_FROM_L2MISS", -1} + , + {"PM_MRK_DATA_FROM_L35_MOD", -1} + , + {"PM_MRK_DATA_FROM_MEM_DP", -1} + , + {"PM_MRK_DATA_FROM_RL2L3_MOD", -1} + , + {"PM_MRK_DTLB_REF", -1} + , + {"PM_MRK_FPU0_FIN", -1} + , + {"PM_MRK_FPU1_FIN", -1} + , + {"PM_MRK_INST_DISP", -1} + , + {"PM_MRK_INST_ISSUED", -1} + , + {"PM_MRK_LSU0_REJECT_L2MISS", -1} + , + {"PM_MRK_LSU0_REJECT_LHS", -1} + , + {"PM_MRK_LSU0_REJECT_ULD", -1} + , + {"PM_MRK_LSU0_REJECT_UST", -1} + , + {"PM_MRK_LSU1_REJECT_LHS", -1} + , + {"PM_MRK_LSU1_REJECT_ULD", -1} + , + {"PM_MRK_LSU1_REJECT_UST", -1} + , + {"PM_MRK_LSU_REJECT_ULD", -1} + , + {"PM_MRK_PTEG_FROM_L2", -1} + , + {"PM_MRK_PTEG_FROM_L35_MOD", -1} + , + {"PM_MRK_PTEG_FROM_MEM_DP", -1} + , + {"PM_MRK_PTEG_FROM_RL2L3_MOD", -1} + , + {"PM_MRK_STCX_FAIL", -1} + , + {"PM_MRK_ST_CMPL", -1} + , + {"PM_MRK_VMX0_LD_WRBACK", -1} + , + {"PM_MRK_VMX1_LD_WRBACK", -1} + , + {"PM_MRK_VMX_COMPLEX_ISSUED", -1} + , + {"PM_MRK_VMX_FLOAT_ISSUED", -1} + , + {"PM_MRK_VMX_PERMUTE_ISSUED", -1} + , + {"PM_MRK_VMX_SIMPLE_ISSUED", -1} + , + {"PM_MRK_VMX_ST_ISSUED", -1} + , + {"PM_NO_ITAG_CYC", -1} + , + {"PM_PMC2_SAVED", -1} + , + {"PM_PMC4_OVERFLOW", -1} + , + {"PM_PMC4_REWIND", -1} + , + {"PM_PMC5_OVERFLOW", -1} + , + {"PM_PTEG_FROM_L2", -1} + , + {"PM_PTEG_FROM_L2MISS", -1} + , + {"PM_PTEG_FROM_L35_MOD", -1} + , + {"PM_PTEG_FROM_MEM_DP", -1} + , + {"PM_PTEG_FROM_RL2L3_MOD", -1} + , + {"PM_PTEG_RELOAD_VALID", -1} + , + {"PM_PURR", -1} + , + {"PM_RUN_CYC", -1} + , + {"PM_SLB_MISS", -1} + , + {"PM_STCX", -1} + , + {"PM_STCX_CANCEL", -1} + , + {"PM_STCX_FAIL", -1} + , + {"PM_ST_FIN", -1} + , + {"PM_ST_HIT_L2", -1} + , + {"PM_ST_MISS_L1", -1} + , + {"PM_ST_REF_L1", -1} + , + {"PM_SUSPENDED", -1} + , + {"PM_SYNC_CYC", -1} + , + {"PM_TB_BIT_TRANS", -1} + , + {"PM_THRD_L2MISS", -1} + , + {"PM_THRD_ONE_RUN_CYC", -1} + , + {"PM_THRD_PRIO_0_CYC", -1} + , + {"PM_THRD_PRIO_7_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_0_CYC", -1} + , + {"PM_THRD_SEL_T0", -1} + , + {"PM_TLB_REF", -1} + , + {"PM_VMX0_INST_ISSUED", -1} + , + {"PM_VMX0_LD_ISSUED", -1} + , + {"PM_VMX0_LD_WRBACK", -1} + , + {"PM_VMX0_STALL", -1} + , + {"PM_VMX1_INST_ISSUED", -1} + , + {"PM_VMX1_LD_ISSUED", -1} + , + {"PM_VMX1_LD_WRBACK", -1} + , + {"PM_VMX1_STALL", -1} + , + {"PM_VMX_COMPLEX_ISUED", -1} + , + {"PM_VMX_FLOAT_ISSUED", -1} + , + {"PM_VMX_FLOAT_MULTICYCLE", -1} + , + {"PM_VMX_PERMUTE_ISSUED", -1} + , + {"PM_VMX_RESULT_SAT_0_1", -1} + , + {"PM_VMX_RESULT_SAT_1", -1} + , + {"PM_VMX_SIMPLE_ISSUED", -1} + , + {"PM_VMX_ST_ISSUED", -1} + , + {"PM_0INST_FETCH_COUNT", -1} + , + {"PM_IBUF_FULL_COUNT", -1} + , + {"PM_GCT_FULL_COUNT", -1} + , + {"PM_NO_ITAG_COUNT", -1} + , + {"PM_INST_TABLEWALK_COUNT", -1} + , + {"PM_SYNC_COUNT", -1} + , + {"PM_RUN_COUNT", -1} + , + {"PM_THRD_ONE_RUN_COUNT", -1} + , + {"PM_LLA_CYC", -1} + , + {"PM_NOT_LLA_CYC", -1} + , + {"PM_LLA_COUNT", -1} + , + {"PM_DPU_HELD_THERMAL_COUNT", -1} + , + {"PM_GCT_NOSLOT_COUNT", -1} + , + {"PM_DERAT_REF_4K", -1} + , + {"PM_DERAT_MISS_4K", -1} + , + {"PM_IERAT_MISS_16G", -1} + , + {"PM_MRK_DERAT_REF_64K", -1} + , + {"PM_MRK_DERAT_MISS_64K", -1} + , + {"PM_BR_TAKEN", -1} + , + {"PM_DATA_FROM_DL2L3_SHR_CYC", -1} + , + {"PM_DATA_FROM_DMEM", -1} + , + {"PM_DATA_FROM_DMEM_CYC", -1} + , + {"PM_DATA_FROM_L21", -1} + , + {"PM_DATA_FROM_L25_SHR_CYC", -1} + , + {"PM_DATA_FROM_L2MISS", -1} + , + {"PM_DATA_FROM_L2_CYC", -1} + , + {"PM_DATA_FROM_L35_SHR", -1} + , + {"PM_DATA_FROM_L35_SHR_CYC", -1} + , + {"PM_DATA_FROM_L3_CYC", -1} + , + {"PM_DATA_FROM_LMEM_CYC", -1} + , + {"PM_DATA_FROM_RL2L3_SHR", -1} + , + {"PM_DATA_FROM_RL2L3_SHR_CYC", -1} + , + {"PM_DPU_HELD", -1} + , + {"PM_DPU_HELD_POWER", -1} + , + {"PM_DPU_WT_IC_MISS", -1} + , + {"PM_EXT_INT", -1} + , + {"PM_FAB_CMD_RETRIED", -1} + , + {"PM_FPU_DENORM", -1} + , + {"PM_FPU_FMA", -1} + , + {"PM_FPU_FPSCR", -1} + , + {"PM_FPU_FRSP", -1} + , + {"PM_FPU_FSQRT_FDIV", -1} + , + {"PM_FXU_BUSY", -1} + , + {"PM_HV_CYC", -1} + , + {"PM_IC_INV_L2", -1} + , + {"PM_INST_DISP", -1} + , + {"PM_INST_FROM_DMEM", -1} + , + {"PM_INST_FROM_L21", -1} + , + {"PM_INST_FROM_L35_SHR", -1} + , + {"PM_INST_FROM_RL2L3_SHR", -1} + , + {"PM_L2_CASTOUT_SHR", -1} + , + {"PM_L2_LD_MISS_DATA", -1} + , + {"PM_L2_LD_MISS_INST", -1} + , + {"PM_L2_MISS", -1} + , + {"PM_L2_ST_REQ_DATA", -1} + , + {"PM_LD_HIT_L2", -1} + , + {"PM_LSU_DERAT_MISS", -1} + , + {"PM_LSU_LDF", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} + , + {"PM_LSU_REJECT_DERAT_MPRED", -1} + , + {"PM_LSU_REJECT_LHS_BOTH", -1} + , + {"PM_LSU_REJECT_NO_SCRATCH", -1} + , + {"PM_LSU_REJECT_SET_MPRED", -1} + , + {"PM_LSU_REJECT_SLOW", -1} + , + {"PM_LSU_REJECT_ULD_BOTH", -1} + , + {"PM_LSU_REJECT_UST", -1} + , + {"PM_MEM_DP_CL_WR_GLOB", -1} + , + {"PM_MEM_DP_RQ_LOC_GLOB", -1} + , + {"PM_MRK_DATA_FROM_DMEM", -1} + , + {"PM_MRK_DATA_FROM_L21", -1} + , + {"PM_MRK_DATA_FROM_L35_SHR", -1} + , + {"PM_MRK_DATA_FROM_RL2L3_SHR", -1} + , + {"PM_MRK_FPU_FIN", -1} + , + {"PM_MRK_FXU_FIN", -1} + , + {"PM_MRK_IFU_FIN", -1} + , + {"PM_MRK_LD_MISS_L1", -1} + , + {"PM_MRK_LSU_REJECT_UST", -1} + , + {"PM_MRK_PTEG_FROM_DMEM", -1} + , + {"PM_MRK_PTEG_FROM_L21", -1} + , + {"PM_MRK_PTEG_FROM_L35_SHR", -1} + , + {"PM_MRK_PTEG_FROM_RL2L3_SHR", -1} + , + {"PM_MRK_ST_GPS", -1} + , + {"PM_PMC1_OVERFLOW", -1} + , + {"PM_PTEG_FROM_DMEM", -1} + , + {"PM_PTEG_FROM_L21", -1} + , + {"PM_PTEG_FROM_L35_SHR", -1} + , + {"PM_PTEG_FROM_RL2L3_SHR", -1} + , + {"PM_ST_REF_L1_BOTH", -1} + , + {"PM_ST_REQ_L2", -1} + , + {"PM_THRD_GRP_CMPL_BOTH_CYC", -1} + , + {"PM_THRD_PRIO_1_CYC", -1} + , + {"PM_THRD_PRIO_6_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_1or2_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus1or2_CYC", -1} + , + {"PM_HV_COUNT", -1} + , + {"PM_DPU_HELD_COUNT", -1} + , + {"PM_DPU_HELD_POWER_COUNT", -1} + , + {"PM_DPU_WT_IC_MISS_COUNT", -1} + , + {"PM_GCT_EMPTY_COUNT", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_COUNT", -1} + , + {"PM_DERAT_REF_64K", -1} + , + {"PM_DERAT_MISS_64K", -1} + , + {"PM_IERAT_MISS_16M", -1} + , + {"PM_MRK_DERAT_REF_4K", -1} + , + {"PM_MRK_DERAT_MISS_4K", -1} + , + {"PM_DATA_FROM_DL2L3_SHR", -1} + , + {"PM_DATA_FROM_L25_MOD", -1} + , + {"PM_DATA_FROM_L3", -1} + , + {"PM_DATA_FROM_L3MISS", -1} + , + {"PM_DATA_FROM_RMEM", -1} + , + {"PM_DPU_WT", -1} + , + {"PM_FPU_STF", -1} + , + {"PM_FPU_ST_FOLDED", -1} + , + {"PM_FREQ_DOWN", -1} + , + {"PM_FXU0_BUSY_FXU1_IDLE", -1} + , + {"PM_FXU0_FIN", -1} + , + {"PM_INST_FROM_DL2L3_SHR", -1} + , + {"PM_INST_FROM_L25_MOD", -1} + , + {"PM_INST_FROM_L3", -1} + , + {"PM_INST_FROM_L3MISS", -1} + , + {"PM_INST_FROM_RMEM", -1} + , + {"PM_L1_DCACHE_RELOAD_VALID", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_BOTH_CYC", -1} + , + {"PM_LSU_REJECT_EXTERN", -1} + , + {"PM_LSU_REJECT_FAST", -1} + , + {"PM_MRK_BR_MPRED", -1} + , + {"PM_MRK_DATA_FROM_DL2L3_SHR", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD", -1} + , + {"PM_MRK_DATA_FROM_L3", -1} + , + {"PM_MRK_DATA_FROM_L3MISS", -1} + , + {"PM_MRK_DATA_FROM_RMEM", -1} + , + {"PM_MRK_DFU_FIN", -1} + , + {"PM_MRK_INST_FIN", -1} + , + {"PM_MRK_PTEG_FROM_DL2L3_SHR", -1} + , + {"PM_MRK_PTEG_FROM_L25_MOD", -1} + , + {"PM_MRK_PTEG_FROM_L3", -1} + , + {"PM_MRK_PTEG_FROM_L3MISS", -1} + , + {"PM_MRK_PTEG_FROM_RMEM", -1} + , + {"PM_MRK_ST_CMPL_INT", -1} + , + {"PM_PMC2_OVERFLOW", -1} + , + {"PM_PMC2_REWIND", -1} + , + {"PM_PMC4_SAVED", -1} + , + {"PM_PMC6_OVERFLOW", -1} + , + {"PM_PTEG_FROM_DL2L3_SHR", -1} + , + {"PM_PTEG_FROM_L25_MOD", -1} + , + {"PM_PTEG_FROM_L3", -1} + , + {"PM_PTEG_FROM_L3MISS", -1} + , + {"PM_PTEG_FROM_RMEM", -1} + , + {"PM_THERMAL_MAX", -1} + , + {"PM_THRD_CONC_RUN_INST", -1} + , + {"PM_THRD_PRIO_2_CYC", -1} + , + {"PM_THRD_PRIO_5_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_3or4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus3or4_CYC", -1} + , + {"PM_THRESH_TIMEO", -1} + , + {"PM_DPU_WT_COUNT", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_BOTH_COUNT", -1} + , + {"PM_DERAT_REF_16M", -1} + , + {"PM_DERAT_MISS_16M", -1} + , + {"PM_IERAT_MISS_64K", -1} + , + {"PM_MRK_DERAT_REF_16M", -1} + , + {"PM_MRK_DERAT_MISS_16M", -1} + , + {"PM_BR_MPRED", -1} + , + {"PM_DATA_FROM_DL2L3_MOD", -1} + , + {"PM_DATA_FROM_DL2L3_MOD_CYC", -1} + , + {"PM_DATA_FROM_L21_CYC", -1} + , + {"PM_DATA_FROM_L25_SHR", -1} + , + {"PM_DATA_FROM_L25_MOD_CYC", -1} + , + {"PM_DATA_FROM_L35_MOD_CYC", -1} + , + {"PM_DATA_FROM_LMEM", -1} + , + {"PM_DATA_FROM_MEM_DP_CYC", -1} + , + {"PM_DATA_FROM_RL2L3_MOD_CYC", -1} + , + {"PM_DATA_FROM_RMEM_CYC", -1} + , + {"PM_DPU_WT_BR_MPRED", -1} + , + {"PM_FPU_FEST", -1} + , + {"PM_FPU_SINGLE", -1} + , + {"PM_FREQ_UP", -1} + , + {"PM_FXU1_BUSY_FXU0_IDLE", -1} + , + {"PM_FXU1_FIN", -1} + , + {"PM_INST_FROM_DL2L3_MOD", -1} + , + {"PM_INST_FROM_L25_SHR", -1} + , + {"PM_INST_FROM_L2MISS", -1} + , + {"PM_INST_FROM_LMEM", -1} + , + {"PM_LSU_REJECT", -1} + , + {"PM_LSU_SRQ_EMPTY_CYC", -1} + , + {"PM_MRK_DATA_FROM_DL2L3_MOD", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR", -1} + , + {"PM_MRK_DATA_FROM_LMEM", -1} + , + {"PM_MRK_INST_TIMEO", -1} + , + {"PM_MRK_LSU_DERAT_MISS", -1} + , + {"PM_MRK_LSU_FIN", -1} + , + {"PM_MRK_LSU_REJECT_LHS", -1} + , + {"PM_MRK_PTEG_FROM_DL2L3_MOD", -1} + , + {"PM_MRK_PTEG_FROM_L25_SHR", -1} + , + {"PM_MRK_PTEG_FROM_L2MISS", -1} + , + {"PM_MRK_PTEG_FROM_LMEM", -1} + , + {"PM_PMC3_OVERFLOW", -1} + , + {"PM_PTEG_FROM_DL2L3_MOD", -1} + , + {"PM_PTEG_FROM_L25_SHR", -1} + , + {"PM_PTEG_FROM_LMEM", -1} + , + {"PM_THRD_BOTH_RUN_CYC", -1} + , + {"PM_THRD_LLA_BOTH_CYC", -1} + , + {"PM_THRD_PRIO_3_CYC", -1} + , + {"PM_THRD_PRIO_4_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_5or6_CYC", -1} + , + {"PM_THRD_PRIO_DIFF_minus5or6_CYC", -1} + , + {"PM_THRD_BOTH_RUN_COUNT", -1} + , + {"PM_DPU_WT_BR_MPRED_COUNT", -1} + , + {"PM_LSU_SRQ_EMPTY_COUNT", -1} + , + {"PM_DERAT_REF_16G", -1} + , + {"PM_DERAT_MISS_16G", -1} + , + {"PM_IERAT_MISS_4K", -1} + , + {"PM_MRK_DERAT_REF_16G", -1} + , + {"PM_MRK_DERAT_MISS_16G", -1} + , + {"PM_RUN_PURR", -1} + , + {"PM_RUN_INST_CMPL", -1} +}; diff --git a/src/components/perfctr_ppc/power7_events.h b/src/components/perfctr_ppc/power7_events.h new file mode 100644 index 0000000..e73263f --- /dev/null +++ b/src/components/perfctr_ppc/power7_events.h @@ -0,0 +1,578 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +#ifndef _PAPI_POWER7_EVENTS_H +#define _PAPI_POWER7_EVENTS_H + +/* +* File: power7_events.h +* CVS: +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +* +* (C) Copyright IBM Corporation, 2010. All Rights Reserved. +* Contributed by Maynard Johnson +* +* +*/ +#include "papiStdEventDefs.h" + +#define GROUP_INTS 9 +#define PAPI_MAX_NATIVE_EVENTS 1024 +#define MAX_GROUPS (GROUP_INTS * 32) +#define MAX_NATNAME_MAP_INDEX 545 + + +enum native_name { +PNE_PM_1PLUS_PPC_CMPL = PAPI_NATIVE_MASK, + PNE_PM_ANY_THRD_RUN_CYC, + PNE_PM_BC_PLUS_8_CONV, + PNE_PM_BC_PLUS_8_RSLV_TAKEN, + PNE_PM_BR_MPRED_CCACHE, + PNE_PM_BR_MPRED_CR, + PNE_PM_BR_MPRED_CR_TA, + PNE_PM_BR_MPRED_LSTACK, + PNE_PM_BR_MPRED_TA, + PNE_PM_BR_PRED, + PNE_PM_BR_PRED_CCACHE, + PNE_PM_BR_PRED_CR, + PNE_PM_BR_PRED_CR_TA, + PNE_PM_BR_PRED_LSTACK, + PNE_PM_BR_PRED_TA, + PNE_PM_BR_UNCOND, + PNE_PM_BRU_FIN, + PNE_PM_BTAC_HIT, + PNE_PM_BTAC_MISS, + PNE_PM_CMPLU_STALL_END_GCT_NOSLOT, + PNE_PM_CMPLU_STALL_THRD, + PNE_PM_CYC, + PNE_PM_DATA_FROM_DMEM, + PNE_PM_DATA_FROM_L2, + PNE_PM_DATA_FROM_L3, + PNE_PM_DATA_FROM_L31_MOD, + PNE_PM_DATA_FROM_L31_SHR, + PNE_PM_DATA_FROM_RL2L3_MOD, + PNE_PM_DATA_FROM_RL2L3_SHR, + PNE_PM_DC_PREF_DST, + PNE_PM_DERAT_MISS_4K, + PNE_PM_DISP_CLB_HELD, + PNE_PM_DISP_CLB_HELD_BAL, + PNE_PM_DISP_CLB_HELD_RES, + PNE_PM_DISP_CLB_HELD_SB, + PNE_PM_DISP_CLB_HELD_SYNC, + PNE_PM_DISP_CLB_HELD_TLBIE, + PNE_PM_DISP_HELD, + PNE_PM_DSEG, + PNE_PM_DSLB_MISS, + PNE_PM_DTLB_MISS_16G, + PNE_PM_EE_OFF_EXT_INT, + PNE_PM_FLOP, + PNE_PM_FLUSH_BR_MPRED, + PNE_PM_FLUSH_DISP, + PNE_PM_FLUSH_DISP_SB, + PNE_PM_FLUSH_DISP_SYNC, + PNE_PM_FLUSH_DISP_TLBIE, + PNE_PM_FLUSH_PARTIAL, + PNE_PM_FXU_IDLE, + PNE_PM_FXU0_FIN, + PNE_PM_GCT_FULL_CYC, + PNE_PM_GCT_NOSLOT_CYC, + PNE_PM_GCT_UTIL_1_TO_2_SLOTS, + PNE_PM_GCT_UTIL_11_PLUS_SLOTS, + PNE_PM_GCT_UTIL_3_TO_6_SLOTS, + PNE_PM_GCT_UTIL_7_TO_10_SLOTS, + PNE_PM_GRP_BR_MPRED_NONSPEC, + PNE_PM_GRP_IC_MISS_NONSPEC, + PNE_PM_GRP_MRK, + PNE_PM_GRP_MRK_CYC, + PNE_PM_IBUF_FULL_CYC, + PNE_PM_IC_BANK_CONFLICT, + PNE_PM_IC_DEMAND_CYC, + PNE_PM_IC_DEMAND_L2_BHT_REDIRECT, + PNE_PM_IC_DEMAND_L2_BR_ALL, + PNE_PM_IC_DEMAND_L2_BR_REDIRECT, + PNE_PM_IC_DEMAND_REQ, + PNE_PM_IC_PREF_CANCEL_ALL, + PNE_PM_IC_PREF_CANCEL_HIT, + PNE_PM_IC_PREF_CANCEL_L2, + PNE_PM_IC_PREF_CANCEL_PAGE, + PNE_PM_IC_PREF_REQ, + PNE_PM_IC_PREF_WRITE, + PNE_PM_IC_RELOAD_SHR, + PNE_PM_IC_REQ_ALL, + PNE_PM_IC_WRITE_ALL, + PNE_PM_IERAT_MISS, + PNE_PM_IERAT_WR_64K, + PNE_PM_IERAT_XLATE_WR_16MPLUS, + PNE_PM_INST_CMPL, + PNE_PM_INST_FROM_DMEM, + PNE_PM_INST_FROM_L1, + PNE_PM_INST_FROM_L2, + PNE_PM_INST_FROM_L3, + PNE_PM_INST_FROM_L31_MOD, + PNE_PM_INST_FROM_L31_SHR, + PNE_PM_INST_FROM_PREF, + PNE_PM_INST_FROM_RL2L3_MOD, + PNE_PM_INST_FROM_RL2L3_SHR, + PNE_PM_INST_IMC_MATCH_CMPL, + PNE_PM_INST_PTEG_FROM_L2, + PNE_PM_INST_PTEG_FROM_L31_MOD, + PNE_PM_INST_PTEG_FROM_RL2L3_MOD, + PNE_PM_IOPS_CMPL, + PNE_PM_ISEG, + PNE_PM_ISLB_MISS, + PNE_PM_L1_DEMAND_WRITE, + PNE_PM_L1_PREF, + PNE_PM_L2_CASTOUT_MOD, + PNE_PM_L2_CASTOUT_SHR, + PNE_PM_L2_CO_FAIL_BUSY, + PNE_PM_L2_GLOB_GUESS_CORRECT, + PNE_PM_L2_LD, + PNE_PM_L2_LDST, + PNE_PM_L2_LOC_GUESS_CORRECT, + PNE_PM_L2_RCLD_DISP, + PNE_PM_L2_RCLD_DISP_FAIL_ADDR, + PNE_PM_L2_ST, + PNE_PM_L3_HIT, + PNE_PM_L3_MISS, + PNE_PM_L3_PREF_LD, + PNE_PM_L3_PREF_LDST, + PNE_PM_L3_PREF_ST, + PNE_PM_LARX_LSU, + PNE_PM_LARX_LSU0, + PNE_PM_LARX_LSU1, + PNE_PM_LD_REF_L1, + PNE_PM_LD_REF_L1_LSU0, + PNE_PM_LD_REF_L1_LSU1, + PNE_PM_LSU_DC_PREF_STREAM_ALLOC, + PNE_PM_LSU_DC_PREF_STREAM_CONFIRM, + PNE_PM_LSU_DC_PREF_STRIDED_STREAM_CONFIRM, + PNE_PM_LSU_DCACHE_RELOAD_VALID, + PNE_PM_LSU_FLUSH, + PNE_PM_LSU_FLUSH_LRQ, + PNE_PM_LSU_FLUSH_SRQ, + PNE_PM_LSU_FLUSH_ULD, + PNE_PM_LSU_FLUSH_UST, + PNE_PM_LSU_FX_FIN, + PNE_PM_LSU_LDF, + PNE_PM_LSU_LDX, + PNE_PM_LSU_LMQ_FULL_CYC, + PNE_PM_LSU_LMQ_S0_ALLOC, + PNE_PM_LSU_LMQ_S0_VALID, + PNE_PM_LSU_LRQ_S0_ALLOC, + PNE_PM_LSU_LRQ_S0_VALID, + PNE_PM_LSU_NCLD, + PNE_PM_LSU_NCST, + PNE_PM_LSU_PARTIAL_CDF, + PNE_PM_LSU_REJECT, + PNE_PM_LSU_REJECT_LHS, + PNE_PM_LSU_REJECT_LMQ_FULL, + PNE_PM_LSU_REJECT_SET_MPRED, + PNE_PM_LSU_SET_MPRED, + PNE_PM_LSU_SRQ_FULL_CYC, + PNE_PM_LSU_SRQ_S0_ALLOC, + PNE_PM_LSU_SRQ_S0_VALID, + PNE_PM_LSU_SRQ_STFWD, + PNE_PM_LSU_SRQ_SYNC_COUNT, + PNE_PM_LSU_SRQ_SYNC_CYC, + PNE_PM_LSU_TWO_TABLEWALK_CYC, + PNE_PM_LSU0_DC_PREF_STREAM_ALLOC, + PNE_PM_LSU0_DC_PREF_STREAM_CONFIRM, + PNE_PM_LSU0_DC_PREF_STREAM_CONFIRM_STRIDE, + PNE_PM_LSU0_FLUSH_LRQ, + PNE_PM_LSU0_FLUSH_SRQ, + PNE_PM_LSU0_FLUSH_ULD, + PNE_PM_LSU0_FLUSH_UST, + PNE_PM_LSU0_L1_PREF, + PNE_PM_LSU0_L1_SW_PREF, + PNE_PM_LSU0_LDF, + PNE_PM_LSU0_LDX, + PNE_PM_LSU0_LMQ_LHR_MERGE, + PNE_PM_LSU0_NCLD, + PNE_PM_LSU0_REJECT_LHS, + PNE_PM_LSU0_REJECT_LMQ_FULL, + PNE_PM_LSU0_SRQ_STFWD, + PNE_PM_LSU1_DC_PREF_STREAM_ALLOC, + PNE_PM_LSU1_DC_PREF_STREAM_CONFIRM, + PNE_PM_LSU1_DC_PREF_STREAM_CONFIRM_STRIDE, + PNE_PM_LSU1_FLUSH_LRQ, + PNE_PM_LSU1_FLUSH_SRQ, + PNE_PM_LSU1_FLUSH_ULD, + PNE_PM_LSU1_FLUSH_UST, + PNE_PM_LSU1_L1_PREF, + PNE_PM_LSU1_L1_SW_PREF, + PNE_PM_LSU1_LDF, + PNE_PM_LSU1_LDX, + PNE_PM_LSU1_LMQ_LHR_MERGE, + PNE_PM_LSU1_NCLD, + PNE_PM_LSU1_REJECT_LHS, + PNE_PM_LSU1_REJECT_LMQ_FULL, + PNE_PM_LSU1_SRQ_STFWD, + PNE_PM_LWSYNC, + PNE_PM_LWSYNC_HELD, + PNE_PM_MRK_BR_TAKEN, + PNE_PM_MRK_DATA_FROM_DMEM, + PNE_PM_MRK_DATA_FROM_L2, + PNE_PM_MRK_DATA_FROM_L3, + PNE_PM_MRK_DATA_FROM_L31_MOD, + PNE_PM_MRK_DATA_FROM_L31_SHR, + PNE_PM_MRK_DATA_FROM_RL2L3_MOD, + PNE_PM_MRK_DATA_FROM_RL2L3_SHR, + PNE_PM_MRK_DERAT_MISS_4K, + PNE_PM_MRK_DTLB_MISS_16G, + PNE_PM_MRK_FIN_STALL_CYC, + PNE_PM_MRK_FIN_STALL_CYC_COUNT, + PNE_PM_MRK_INST_ISSUED, + PNE_PM_MRK_LD_MISS_EXPOSED_CYC, + PNE_PM_MRK_LD_MISS_EXPOSED_CYC_COUNT, + PNE_PM_MRK_LSU_FLUSH, + PNE_PM_MRK_LSU_FLUSH_LRQ, + PNE_PM_MRK_LSU_FLUSH_SRQ, + PNE_PM_MRK_LSU_FLUSH_ULD, + PNE_PM_MRK_LSU_FLUSH_UST, + PNE_PM_MRK_LSU_PARTIAL_CDF, + PNE_PM_MRK_LSU_REJECT_LHS, + PNE_PM_MRK_PTEG_FROM_L2, + PNE_PM_MRK_PTEG_FROM_L31_MOD, + PNE_PM_MRK_PTEG_FROM_RL2L3_MOD, + PNE_PM_MRK_ST_CMPL, + PNE_PM_MRK_STCX_FAIL, + PNE_PM_PB_NODE_PUMP, + PNE_PM_MEM0_RQ_DISP, + PNE_PM_NEST_PAIR0_ADD, + PNE_PM_NEST_PAIR0_AND, + PNE_PM_PMC2_SAVED, + PNE_PM_PMC4_OVERFLOW, + PNE_PM_PMC4_REWIND, + PNE_PM_PMC5_OVERFLOW, + PNE_PM_POWER_EVENT1, + PNE_PM_PTEG_FROM_L2, + PNE_PM_PTEG_FROM_L31_MOD, + PNE_PM_PTEG_FROM_RL2L3_MOD, + PNE_PM_RUN_SPURR, + PNE_PM_SEG_EXCEPTION, + PNE_PM_SHL_CREATED, + PNE_PM_SHL_DEALLOCATED, + PNE_PM_SHL_MATCH, + PNE_PM_SHL_MERGED, + PNE_PM_SLB_MISS, + PNE_PM_SNOOP_TLBIE, + PNE_PM_STCX_CMPL, + PNE_PM_STCX_FAIL, + PNE_PM_SUSPENDED, + PNE_PM_TABLEWALK_CYC, + PNE_PM_THERMAL_WARN, + PNE_PM_THRD_1_RUN_CYC, + PNE_PM_THRD_3_CONC_RUN_INST, + PNE_PM_THRD_GRP_CMPL_BOTH_CYC, + PNE_PM_THRD_PRIO_0_1_CYC, + PNE_PM_THRD_PRIO_2_3_CYC, + PNE_PM_THRD_PRIO_4_5_CYC, + PNE_PM_THRD_PRIO_6_7_CYC, + PNE_PM_THRESH_TIMEO, + PNE_PM_VMX_RESULT_SAT_1, + PNE_PM_VSU_1FLOP, + PNE_PM_VSU_2FLOP, + PNE_PM_VSU_2FLOP_DOUBLE, + PNE_PM_VSU_4FLOP, + PNE_PM_VSU_8FLOP, + PNE_PM_VSU_DENORM, + PNE_PM_VSU_FCONV, + PNE_PM_VSU_FEST, + PNE_PM_VSU_FIN, + PNE_PM_VSU_FMA, + PNE_PM_VSU_FMA_DOUBLE, + PNE_PM_VSU_FRSP, + PNE_PM_VSU_FSQRT_FDIV, + PNE_PM_VSU_FSQRT_FDIV_DOUBLE, + PNE_PM_VSU_SCALAR_DOUBLE_ISSUED, + PNE_PM_VSU_SCALAR_SINGLE_ISSUED, + PNE_PM_VSU_SIMPLE_ISSUED, + PNE_PM_VSU_SINGLE, + PNE_PM_VSU_STF, + PNE_PM_VSU_VECTOR_DOUBLE_ISSUED, + PNE_PM_VSU_VECTOR_SINGLE_ISSUED, + PNE_PM_VSU0_16FLOP, + PNE_PM_VSU0_1FLOP, + PNE_PM_VSU0_2FLOP, + PNE_PM_VSU0_2FLOP_DOUBLE, + PNE_PM_VSU0_4FLOP, + PNE_PM_VSU0_8FLOP, + PNE_PM_VSU0_COMPLEX_ISSUED, + PNE_PM_VSU0_DENORM, + PNE_PM_VSU0_FCONV, + PNE_PM_VSU0_FEST, + PNE_PM_VSU0_FIN, + PNE_PM_VSU0_FMA, + PNE_PM_VSU0_FMA_DOUBLE, + PNE_PM_VSU0_FPSCR, + PNE_PM_VSU0_FRSP, + PNE_PM_VSU0_FSQRT_FDIV, + PNE_PM_VSU0_FSQRT_FDIV_DOUBLE, + PNE_PM_VSU0_SCAL_DOUBLE_ISSUED, + PNE_PM_VSU0_SCAL_SINGLE_ISSUED, + PNE_PM_VSU0_SIMPLE_ISSUED, + PNE_PM_VSU0_SINGLE, + PNE_PM_VSU0_STF, + PNE_PM_VSU0_VECT_DOUBLE_ISSUED, + PNE_PM_VSU0_VECTOR_SP_ISSUED, + PNE_PM_VSU1_1FLOP, + PNE_PM_VSU1_2FLOP, + PNE_PM_VSU1_2FLOP_DOUBLE, + PNE_PM_VSU1_4FLOP, + PNE_PM_VSU1_8FLOP, + PNE_PM_VSU1_DD_ISSUED, + PNE_PM_VSU1_DENORM, + PNE_PM_VSU1_DQ_ISSUED, + PNE_PM_VSU1_FCONV, + PNE_PM_VSU1_FEST, + PNE_PM_VSU1_FIN, + PNE_PM_VSU1_FMA, + PNE_PM_VSU1_FMA_DOUBLE, + PNE_PM_VSU1_FRSP, + PNE_PM_VSU1_FSQRT_FDIV, + PNE_PM_VSU1_FSQRT_FDIV_DOUBLE, + PNE_PM_VSU1_PERMUTE_ISSUED, + PNE_PM_VSU1_SCAL_DOUBLE_ISSUED, + PNE_PM_VSU1_SCAL_SINGLE_ISSUED, + PNE_PM_VSU1_SINGLE, + PNE_PM_VSU1_SQ, + PNE_PM_VSU1_STF, + PNE_PM_VSU1_VECT_DOUBLE_ISSUED, + PNE_PM_BR_TAKEN, + PNE_PM_CMPLU_STALL_DCACHE_MISS, + PNE_PM_CMPLU_STALL_DFU, + PNE_PM_CMPLU_STALL_FXU, + PNE_PM_CMPLU_STALL_LSU, + PNE_PM_CMPLU_STALL_SCALAR_LONG, + PNE_PM_CMPLU_STALL_STORE, + PNE_PM_CMPLU_STALL_VECTOR, + PNE_PM_DATA_FROM_L2MISS, + PNE_PM_DATA_FROM_L3MISS, + PNE_PM_DERAT_MISS_64K, + PNE_PM_DPU_HELD_POWER, + PNE_PM_DTLB_MISS_4K, + PNE_PM_EXT_INT, + PNE_PM_FXU_BUSY, + PNE_PM_GCT_EMPTY_CYC, + PNE_PM_GCT_NOSLOT_IC_MISS, + PNE_PM_HV_CYC, + PNE_PM_INST_DISP, + PNE_PM_INST_FROM_L3MISS, + PNE_PM_INST_PTEG_FROM_DMEM, + PNE_PM_INST_PTEG_FROM_L3, + PNE_PM_INST_PTEG_FROM_L31_SHR, + PNE_PM_INST_PTEG_FROM_L3MISS, + PNE_PM_INST_PTEG_FROM_RL2L3_SHR, + PNE_PM_L1_ICACHE_MISS, + PNE_PM_L2_DC_INV, + PNE_PM_L2_GLOB_GUESS_WRONG, + PNE_PM_L2_IC_INV, + PNE_PM_L2_LD_MISS, + PNE_PM_L2_LDST_MISS, + PNE_PM_L2_LOC_GUESS_WRONG, + PNE_PM_L2_RCLD_DISP_FAIL_OTHER, + PNE_PM_L2_RCST_BUSY_RC_FULL, + PNE_PM_L2_ST_MISS, + PNE_PM_L3_LD_HIT, + PNE_PM_L3_LD_MISS, + PNE_PM_LSU_DERAT_MISS, + PNE_PM_LSU_LMQ_SRQ_EMPTY_CYC, + PNE_PM_LSU_REJECT_ERAT_MISS, + PNE_PM_MRK_BRU_FIN, + PNE_PM_MRK_DATA_FROM_DL2L3_SHR_CYC, + PNE_PM_MRK_DATA_FROM_DMEM_CYC, + PNE_PM_MRK_DATA_FROM_L2_CYC, + PNE_PM_MRK_DATA_FROM_L21_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L31_SHR_CYC, + PNE_PM_MRK_DATA_FROM_L3MISS, + PNE_PM_MRK_DATA_FROM_LMEM_CYC, + PNE_PM_MRK_DATA_FROM_RL2L3_SHR_CYC, + PNE_PM_MRK_DERAT_MISS_64K, + PNE_PM_MRK_DFU_FIN, + PNE_PM_MRK_DTLB_MISS_4K, + PNE_PM_MRK_FXU_FIN, + PNE_PM_MRK_INST_DISP, + PNE_PM_MRK_LD_MISS_L1, + PNE_PM_MRK_PTEG_FROM_DMEM, + PNE_PM_MRK_PTEG_FROM_L3, + PNE_PM_MRK_PTEG_FROM_L31_SHR, + PNE_PM_MRK_PTEG_FROM_L3MISS, + PNE_PM_MRK_PTEG_FROM_RL2L3_SHR, + PNE_PM_MRK_ST_NEST, + PNE_PM_PB_SYS_PUMP, + PNE_PM_MEM0_PREFETCH_DISP, + PNE_PM_NEST_PAIR1_ADD, + PNE_PM_NEST_PAIR1_AND, + PNE_PM_PMC1_OVERFLOW, + PNE_PM_POWER_EVENT2, + PNE_PM_PTEG_FROM_DMEM, + PNE_PM_PTEG_FROM_L3, + PNE_PM_PTEG_FROM_L31_SHR, + PNE_PM_PTEG_FROM_L3MISS, + PNE_PM_PTEG_FROM_RL2L3_SHR, + PNE_PM_RUN_CYC, + PNE_PM_ST_FIN, + PNE_PM_THRD_2_RUN_CYC, + PNE_PM_THRD_4_CONC_RUN_INST, + PNE_PM_THRD_ALL_RUN_CYC, + PNE_PM_TLB_MISS, + PNE_PM_1THRD_CON_RUN_INSTR, + PNE_PM_DATA_FROM_DL2L3_MOD, + PNE_PM_DATA_FROM_DL2L3_SHR, + PNE_PM_DATA_FROM_L21_MOD, + PNE_PM_DATA_FROM_L21_SHR, + PNE_PM_DATA_FROM_LMEM, + PNE_PM_DATA_FROM_RMEM, + PNE_PM_DATA_TABLEWALK_CYC, + PNE_PM_DERAT_MISS_16M, + PNE_PM_DISP_HELD_THERMAL, + PNE_PM_DISP_WT, + PNE_PM_DTLB_MISS, + PNE_PM_DTLB_MISS_64K, + PNE_PM_FLUSH_COMPLETION, + PNE_PM_FREQ_DOWN, + PNE_PM_FXU0_BUSY_FXU1_IDLE, + PNE_PM_GRP_CMPL, + PNE_PM_GRP_DISP, + PNE_PM_INST_FROM_DL2L3_MOD, + PNE_PM_INST_FROM_DL2L3_SHR, + PNE_PM_INST_FROM_L21_MOD, + PNE_PM_INST_FROM_L21_SHR, + PNE_PM_INST_FROM_LMEM, + PNE_PM_INST_FROM_RMEM, + PNE_PM_INST_IMC_MATCH_DISP, + PNE_PM_INST_PTEG_FROM_L21_MOD, + PNE_PM_INST_PTEG_FROM_DL2L3_SHR, + PNE_PM_INST_PTEG_FROM_RMEM, + PNE_PM_IOPS_DISP, + PNE_PM_L1_DCACHE_RELOAD_VALID, + PNE_PM_L2_INST, + PNE_PM_L2_INST_MISS, + PNE_PM_L2_LD_DISP, + PNE_PM_L2_LD_HIT, + PNE_PM_L2_NODE_PUMP, + PNE_PM_L2_RC_ST_DONE, + PNE_PM_L2_RCST_DISP, + PNE_PM_L2_RCST_DISP_FAIL_ADDR, + PNE_PM_L2_SN_SX_I_DONE, + PNE_PM_L2_SYS_PUMP, + PNE_PM_L3_PREF_HIT, + PNE_PM_L3_PREF_MISS, + PNE_PM_LSU_FIN, + PNE_PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC, + PNE_PM_MRK_BR_MPRED, + PNE_PM_MRK_DATA_FROM_DL2L3_MOD, + PNE_PM_MRK_DATA_FROM_DL2L3_SHR, + PNE_PM_MRK_DATA_FROM_L21_MOD, + PNE_PM_MRK_DATA_FROM_L21_SHR, + PNE_PM_MRK_DATA_FROM_LMEM, + PNE_PM_MRK_DATA_FROM_RMEM, + PNE_PM_MRK_DERAT_MISS_16M, + PNE_PM_MRK_DTLB_MISS_64K, + PNE_PM_MRK_IFU_FIN, + PNE_PM_MRK_INST_FIN, + PNE_PM_MRK_LSU_DERAT_MISS, + PNE_PM_MRK_LSU_REJECT_ERAT_MISS, + PNE_PM_MRK_PTEG_FROM_L21_MOD, + PNE_PM_MRK_PTEG_FROM_DL2L3_SHR, + PNE_PM_MRK_PTEG_FROM_RMEM, + PNE_PM_MRK_ST_CMPL_INT, + PNE_PM_MRK_STALL_CMPLU_CYC, + PNE_PM_MRK_STALL_CMPLU_CYC_COUNT, + PNE_PM_MRK_VSU_FIN, + PNE_PM_PB_RETRY_NODE_PUMP, + PNE_PM_MEM0_RD_CANCEL_TOTAL, + PNE_PM_NEST_PAIR2_ADD, + PNE_PM_NEST_PAIR2_AND, + PNE_PM_PMC2_OVERFLOW, + PNE_PM_PMC2_REWIND, + PNE_PM_PMC4_SAVED, + PNE_PM_PMC6_OVERFLOW, + PNE_PM_POWER_EVENT3, + PNE_PM_PTEG_FROM_L21_MOD, + PNE_PM_PTEG_FROM_DL2L3_SHR, + PNE_PM_PTEG_FROM_RMEM, + PNE_PM_ST_MISS_L1, + PNE_PM_TB_BIT_TRANS, + PNE_PM_THRD_3_RUN_CYC, + PNE_PM_THRD_CONC_RUN_INST, + PNE_PM_1PLUS_PPC_DISP, + PNE_PM_BR_MPRED, + PNE_PM_CMPLU_STALL, + PNE_PM_CMPLU_STALL_BRU, + PNE_PM_CMPLU_STALL_DIV, + PNE_PM_CMPLU_STALL_ERAT_MISS, + PNE_PM_CMPLU_STALL_IFU, + PNE_PM_CMPLU_STALL_REJECT, + PNE_PM_CMPLU_STALL_SCALAR, + PNE_PM_CMPLU_STALL_VECTOR_LONG, + PNE_PM_DERAT_MISS_16G, + PNE_PM_DTLB_MISS_16M, + PNE_PM_FLUSH, + PNE_PM_FREQ_UP, + PNE_PM_FXU1_BUSY_FXU0_IDLE, + PNE_PM_FXU1_FIN, + PNE_PM_GCT_NOSLOT_BR_MPRED, + PNE_PM_GCT_NOSLOT_BR_MPRED_IC_MISS, + PNE_PM_IFU_FIN, + PNE_PM_INST_FROM_L2MISS, + PNE_PM_INST_PTEG_FROM_DL2L3_MOD, + PNE_PM_INST_PTEG_FROM_L21_SHR, + PNE_PM_INST_PTEG_FROM_L2MISS, + PNE_PM_INST_PTEG_FROM_LMEM, + PNE_PM_ITLB_MISS, + PNE_PM_L2_DISP_ALL, + PNE_PM_L2_RCLD_BUSY_RC_FULL, + PNE_PM_L2_RCST_DISP_FAIL_OTHER, + PNE_PM_L2_SN_M_RD_DONE, + PNE_PM_L2_SN_M_WR_DONE, + PNE_PM_L2_ST_DISP, + PNE_PM_L2_ST_HIT, + PNE_PM_L3_CO_MEM, + PNE_PM_L3_CO_L31, + PNE_PM_L3_PREF_BUSY, + PNE_PM_L3_RD_BUSY, + PNE_PM_LD_MISS_L1, + PNE_PM_LSU_SRQ_EMPTY_CYC, + PNE_PM_MRK_DATA_FROM_DL2L3_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L21_MOD_CYC, + PNE_PM_MRK_DATA_FROM_L2MISS, + PNE_PM_MRK_DATA_FROM_L3_CYC, + PNE_PM_MRK_DATA_FROM_L31_MOD_CYC, + PNE_PM_MRK_DATA_FROM_RL2L3_MOD_CYC, + PNE_PM_MRK_DATA_FROM_RMEM_CYC, + PNE_PM_MRK_DERAT_MISS_16G, + PNE_PM_MRK_DTLB_MISS_16M, + PNE_PM_MRK_GRP_CMPL, + PNE_PM_MRK_GRP_IC_MISS, + PNE_PM_MRK_INST_TIMEO, + PNE_PM_MRK_LD_MISS_L1_CYC, + PNE_PM_MRK_LSU_FIN, + PNE_PM_MRK_LSU_REJECT, + PNE_PM_MRK_PTEG_FROM_DL2L3_MOD, + PNE_PM_MRK_PTEG_FROM_L21_SHR, + PNE_PM_MRK_PTEG_FROM_L2MISS, + PNE_PM_MRK_PTEG_FROM_LMEM, + PNE_PM_PB_RETRY_SYS_PUMP, + PNE_PM_MEM0_WQ_DISP, + PNE_PM_NEST_PAIR3_ADD, + PNE_PM_NEST_PAIR3_AND, + PNE_PM_PMC3_OVERFLOW, + PNE_PM_POWER_EVENT4, + PNE_PM_PTEG_FROM_DL2L3_MOD, + PNE_PM_PTEG_FROM_L21_SHR, + PNE_PM_PTEG_FROM_L2MISS, + PNE_PM_PTEG_FROM_LMEM, + PNE_PM_RUN_INST_CMPL, + PNE_PM_RUN_PURR, + PNE_PM_THERMAL_MAX, + PNE_PM_THRD_2_CONC_RUN_INSTR, + PNE_PM_THRD_4_RUN_CYC, + NATNAME_GUARD, +}; + +#endif diff --git a/src/components/perfctr_ppc/ppc64_events.c b/src/components/perfctr_ppc/ppc64_events.c new file mode 100644 index 0000000..622df4b --- /dev/null +++ b/src/components/perfctr_ppc/ppc64_events.c @@ -0,0 +1,503 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: ppc64_events.c +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +*/ + +#include "papi_internal.h" +#include +#include "libperfctr.h" + +hwd_groups_t group_map[MAX_GROUPS] = { {0} +, {0} +, {0} +, {0} +, {0} +}; +native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; + +/* to initialize the native_table */ +void +perfctr_initialize_native_table( ) +{ + int i, j; + memset( native_table, 0, + PAPI_MAX_NATIVE_EVENTS * sizeof ( native_event_entry_t ) ); + for ( i = 0; i < PAPI_MAX_NATIVE_EVENTS; i++ ) { + for ( j = 0; j < MAX_COUNTERS; j++ ) + native_table[i].resources.counter_cmd[j] = -1; + } +} + + +/* to setup native_table group value */ +void +perfctr_ppc64_setup_gps( int total, ntv_event_group_info_t * group_info ) +{ + int i, j, gnum; + + for ( i = 0; i < total; i++ ) { + for ( j = 0; j < MAX_COUNTERS; j++ ) { + if ( native_table[i].resources.selector & ( 1 << j ) ) { + for ( gnum = 0; gnum < group_info->maxgroups; gnum++ ) { + if ( native_table[i].resources.counter_cmd[j] == + group_info->event_groups[gnum]->events[j] ) { + native_table[i].resources.group[gnum / 32] |= + 1 << ( gnum % 32 ); + } + } + } + } + } + + for ( gnum = 0; gnum < group_info->maxgroups; gnum++ ) { + group_map[gnum].mmcr0 = group_info->event_groups[gnum]->mmcr0; + group_map[gnum].mmcr1L = group_info->event_groups[gnum]->mmcr1L; + group_map[gnum].mmcr1U = group_info->event_groups[gnum]->mmcr1U; + group_map[gnum].mmcra = group_info->event_groups[gnum]->mmcra; + for ( i = 0; i < MAX_COUNTERS; i++ ) + group_map[gnum].counter_cmd[i] = + group_info->event_groups[gnum]->events[i]; + } +} + + +/* to setup native_table values, and return number of entries */ +int +perfctr_ppc64_setup_native_table( ) +{ + int pmc, ev, i, j, index; + /* This is for initialisation-testing of consistency between + native_name_map and our events file */ + int itemCount = 0; + index = 0; + perfctr_initialize_native_table( ); + ntv_event_info_t *info = perfctr_get_native_evt_info( ); + if ( info == NULL ) { + PAPIERROR( EVENT_INFO_FILE_ERROR ); + return PAPI_ECMP; + } + ntv_event_t *wevp; + for ( pmc = 0; pmc < info->maxpmcs; pmc++ ) { + wevp = info->wev[pmc]; + for ( ev = 0; ev < info->maxevents[pmc]; ev++, wevp++ ) { + for ( i = 0; i < index; i++ ) { + if ( strcmp( wevp->symbol, native_table[i].name ) == 0 ) { + native_table[i].resources.selector |= 1 << pmc; + native_table[i].resources.counter_cmd[pmc] = + wevp->event_num; + break; + } + } + if ( i == index ) { + //native_table[i].index=i; + native_table[i].resources.selector |= 1 << pmc; + native_table[i].resources.counter_cmd[pmc] = wevp->event_num; + native_table[i].name = + ( char * ) malloc( strlen( wevp->symbol ) + 1 ); + strcpy( native_table[i].name, wevp->symbol ); + native_table[i].description = wevp->description; + index++; + for ( j = 0; j < MAX_NATNAME_MAP_INDEX; j++ ) { + /* It appears that here, if I'm right, that the events + file entry matches the event from native_name_map, */ + /* This here check is to ensure that native_name_map in fact + has MAX_NATNAME_MAP_INDEX elements, or rather that it never + tries to access one that has not been initialised. */ + if ( native_name_map[j].name == NULL ) { + SUBDBG( "native_name_map has a NULL at position %i\n", + j ); + PAPIERROR + ( "Inconsistency between events_map file and events header." ); + return PAPI_EBUG; + } + if ( strcmp( native_table[i].name, native_name_map[j].name ) + == 0 ) { + native_name_map[j].index = i; + itemCount++; + break; + } + } + /* If we never set native_name_map[j], then there is an + inconsistency between native_name_map and native_table */ + if ( ( !( j < MAX_NATNAME_MAP_INDEX ) ) || + native_name_map[j].index != i ) { + SUBDBG + ( "No match found between native_name_map and native_table. " + "Values was %s at position %i in native_table.\n", + native_table[i].name, i ); + PAPIERROR + ( "Inconsistency between native_name_map and events file." ); + return PAPI_EBUG; + } + } + } + } + /* given the previous evidence that native_name_map is a superset of + native_table, ensuring this match in their cardinality shows them to + be equivalent. */ + if ( itemCount != MAX_NATNAME_MAP_INDEX ) { + SUBDBG( "%i events found in native_table, but really should be %i\n", + itemCount, MAX_NATNAME_MAP_INDEX ); + PAPIERROR + ( "Inconsistent cardinality between native_name_map and events file", + itemCount, MAX_NATNAME_MAP_INDEX ); + return PAPI_EBUG; + } + + ntv_event_group_info_t *gp_info = perfctr_get_native_group_info( ); + if ( gp_info == NULL ) { + perfctr_initialize_native_table( ); + PAPIERROR( EVENT_INFO_FILE_ERROR ); + return PAPI_ECMP; + } + + perfctr_ppc64_setup_gps( index, gp_info ); + _papi_hwi_system_info.sub_info.num_native_events = index; + + return check_native_name( ); +} + +int +check_native_name( ) +{ + enum native_name foo; + int itemCount = 0; + int i; + + /* This should ensure that the cardinality of native_name is the same + as that of native_name_map which may be true iff native_name + expresses the same data as native_name_map and there is a 1:1 + mapping from one onto the other, though there is no guarantee of + order. */ + if ( ( NATNAME_GUARD - PAPI_NATIVE_MASK ) != MAX_NATNAME_MAP_INDEX ) { + SUBDBG( "%i is the number of elements apparently in native_name, " + "but really should be %i, according to native_name_map.\n", + ( NATNAME_GUARD - PAPI_NATIVE_MASK ), MAX_NATNAME_MAP_INDEX ); + PAPIERROR + ( "Inconsistent cardinality between native_name and native_name_map " + "detected in preliminary check\n" ); + return PAPI_EBUG; + } + + /* The following is sanity checking only. It attempts to verify some level + of consistency between native_name and native_name_map and native_table. + This should imply that native_name is a subset of native_name_map. */ + for ( foo = PAPI_NATIVE_MASK; foo < NATNAME_GUARD; foo++ ) { + for ( i = 0; i < MAX_NATNAME_MAP_INDEX; i++ ) { + /* Now, if the event we are on is the native event we seek... */ + if ( ( native_name_map[i].index | PAPI_NATIVE_MASK ) == foo ) { + itemCount++; + break; + } + } + } + if ( itemCount != MAX_NATNAME_MAP_INDEX ) { + SUBDBG( "Inconsistency between native_name_map and native_name. " + "%i events matched, but really should be %i\n", itemCount, + MAX_NATNAME_MAP_INDEX ); + PAPIERROR + ( "Inconsistent cardinality between native_name and native_name_map\n" ); + return PAPI_EBUG; + } + + return PAPI_OK; +} + +static FILE * +open_file( const char *fname ) +{ + char *cpu; + char *dot = "."; + char *dot_dot = ".."; +#ifdef _POWER5p + cpu = "power5+"; +#elif defined(_POWER5) + cpu = "power5"; +#elif defined(_PPC970) + cpu = "ppc970"; +#else + cpu = ""; +#endif + char *dir = ( char * ) getenv( "PAPI_EVENTFILE_PATH" ); +#ifdef PAPI_DATADIR + if ( dir == NULL ) { + dir = PAPI_DATADIR; + } +#endif + /* If dir is still NULL, assume current dir holds event_data dir */ + if ( dir == NULL ) + dir = dot; + + char *relative_pathname = ( char * ) malloc( strlen( "/" ) + + strlen( "event_data" ) + + strlen( "/" ) + strlen( cpu ) + + strlen( "/" ) + + strlen( fname ) + 1 ); + strcpy( relative_pathname, "/" ); + strcat( relative_pathname, "event_data" ); + strcat( relative_pathname, "/" ); + strcat( relative_pathname, cpu ); + strcat( relative_pathname, "/" ); + strcat( relative_pathname, fname ); + /* Add a little extra space to the malloc for the case where dir = "." since + * we may be trying dir = ".." later on. */ + char *pathname = + ( char * ) malloc( strlen( dir ) + strlen( relative_pathname ) + 4 ); + int keep_trying; + if ( strcmp( dir, dot ) == 0 ) + keep_trying = 2; + else + keep_trying = 3; + FILE *file = NULL; + while ( file == NULL && keep_trying-- ) { + strcpy( pathname, dir ); + strcat( pathname, relative_pathname ); + file = fopen( pathname, "r" ); + if ( strcmp( dir, dot ) == 0 ) { + dir = dot_dot; + } else { + dir = dot; + } + SUBDBG( "Attempt to open event data file %s %s successful.\n", pathname, + ( file == NULL ) ? "was not" : "was" ); + memset( pathname, '\0', sizeof ( pathname ) ); + } + + free( pathname ); + free( relative_pathname ); + return ( file ); +} + +static ntv_event_t * +copy_buffer( ntv_event_t events[], int maxevents ) +{ + ntv_event_t *cur_wev, *start_wev; + start_wev = ( ntv_event_t * ) malloc( sizeof ( ntv_event_t ) * maxevents ); + cur_wev = start_wev; + + int cnt; + for ( cnt = 0; cnt < maxevents; cnt++, cur_wev++ ) { + cur_wev->event_num = events[cnt].event_num; + strcpy( cur_wev->symbol, events[cnt].symbol ); + cur_wev->short_description = + ( char * ) malloc( strlen( events[cnt].short_description ) ); + strcpy( cur_wev->short_description, events[cnt].short_description ); + cur_wev->description = + ( char * ) malloc( strlen( events[cnt].description ) ); + strcpy( cur_wev->description, events[cnt].description ); + } + return start_wev; +} + +static ntv_event_info_t * +parse_eventfile( FILE * evfile ) +{ + int counter = 0, num_events = 0; + int i, len, cc; + int event; + int line_counter_flag = 0; + char line_data[1024]; + ntv_event_t events[PAPI_MAX_NATIVE_EVENTS]; + ntv_event_info_t *ntv_evt_info = + ( ntv_event_info_t * ) malloc( sizeof ( ntv_event_info_t ) ); + ntv_evt_info->maxpmcs = 0; + char data[1024]; + while ( fgets( data, 1022, evfile ) ) { + if ( feof( evfile ) ) + continue; + if ( strlen( data ) < 2 ) + continue; + + if ( strncmp( data, "$$$$", 4 ) == 0 ) { + line_counter_flag = 0; + ntv_evt_info->maxevents[counter - 1] = num_events; + ntv_evt_info->wev[counter - 1] = copy_buffer( events, num_events ); + ntv_event_t *temp = ntv_evt_info->wev[counter - 1]; + temp++; + } + switch ( line_counter_flag ) { + case 0: + if ( sscanf( data, "{ counter %u", &counter ) == 1 ) { + line_counter_flag = 1; + num_events = 0; + ntv_evt_info->maxpmcs++; + } + break; + case 1: + if ( sscanf( data, "#%u", &event ) != 1 ) { + fprintf( stderr, "EVS file format error 1 (%s)\n", data ); + return NULL; + } + if ( event >= PAPI_MAX_NATIVE_EVENTS ) { + fprintf( stderr, "EVS file format error 1 (%s)\n", data ); + return NULL; + } + events[num_events].event_num = event; + len = strlen( data ); + int symb_found = 0; + for ( i = cc = 0; i < len; i++ ) { + if ( data[i] == ',' ) + cc++; + if ( cc == 5 && !symb_found ) { + strcpy( line_data, &data[i + 1] ); + int j = 0; + while ( line_data[j] != ',' ) + j++; + strncpy( events[num_events].symbol, line_data, j ); + events[num_events].symbol[j] = 0; + symb_found = 1; + i += j; + } else if ( cc == 6 ) { + len = strlen( &data[i + 1] ); + events[num_events].short_description = + ( char * ) malloc( len ); + strcpy( events[num_events].short_description, + &data[i + 1] ); + events[num_events].short_description[len - 1] = 0; + break; + } + } + line_counter_flag = 2; + break; + case 2: + line_counter_flag = 3; + break; + case 3: + len = strlen( data ); + events[num_events].description = ( char * ) malloc( len ); + strcpy( events[num_events].description, data ); + events[num_events].description[len - 1] = 0; + line_counter_flag = 1; + num_events++; + break; + } + } + if ( counter == 0 ) { + free( ntv_evt_info ); + ntv_evt_info = NULL; + } + if ( counter == MAX_COUNTERS ) { + ntv_evt_info->maxevents[counter - 1] = num_events; + ntv_evt_info->wev[counter - 1] = copy_buffer( events, num_events ); + } + fclose( evfile ); + return ntv_evt_info; +} + +static int +any_counter_invalid( int event_id[], int size ) +{ + int j; + for ( j = 0; j < size; j++ ) { + if ( event_id[j] >= PAPI_MAX_NATIVE_EVENTS ) + return 1; + } + return 0; +} + +static ntv_event_group_info_t * +parse_groupfile( FILE * grp_file ) +{ + char data[1024]; + unsigned int mmcr0, mmcr1H, mmcr1L, mmcra; + int g, state = 0; + ntv_event_group_info_t *group_info = + ( ntv_event_group_info_t * ) + malloc( sizeof ( ntv_event_group_info_t ) ); + group_info->maxgroups = 0; + int event_num[MAX_COUNTERS]; + while ( fgets( data, 1022, grp_file ) ) { + if ( feof( grp_file ) || ( strlen( data ) < 2 ) ) + continue; + + switch ( state ) { + case 0: +#if defined(_POWER5) || defined(_POWER5p) + if ( sscanf + ( data, "#%u,%u,%u,%u,%u,%u,%u", &g, &event_num[0], + &event_num[1], &event_num[2], &event_num[3], &event_num[4], + &event_num[5] ) == 7 ) { + state = 1; + if ( any_counter_invalid( event_num, 6 ) ) { + fprintf( stderr, "ERROR: Maximum events exceeded\n" ); + return NULL; + } + } +#else + if ( sscanf + ( data, "#%u,%u,%u,%u,%u,%u,%u,%u,%u", &g, &event_num[0], + &event_num[1], &event_num[2], &event_num[3], &event_num[4], + &event_num[5], &event_num[6], &event_num[7] ) == 9 ) { + state = 1; + if ( any_counter_invalid( event_num, 8 ) ) { + fprintf( stderr, "ERROR: Maximum events exceeded\n" ); + return NULL; + } + } +#endif + if ( state == 1 ) { + group_info->event_groups[group_info->maxgroups] = + ( event_group_t * ) malloc( sizeof ( event_group_t ) ); + group_info->event_groups[group_info->maxgroups]->group_id = g; + int j = 0; + for ( ; j < MAX_COUNTERS; j++ ) + group_info->event_groups[group_info->maxgroups]->events[j] = + event_num[j]; + } + break; + case 1: + // unused hex event codes + state = 2; + break; + case 2: + /* get mmcr values */ + if ( sscanf( data, "%#x,%#x,%#x,%#x", &mmcr0, &mmcr1H, &mmcr1L, &mmcra ) + != 4 ) { + fprintf( stderr, "GPS file format error 1 (%s)\n", data ); + return NULL; + } + state = 3; + group_info->event_groups[group_info->maxgroups]->mmcr0 = mmcr0; + group_info->event_groups[group_info->maxgroups]->mmcr1L = mmcr1L; + group_info->event_groups[group_info->maxgroups]->mmcr1U = mmcr1H; + group_info->event_groups[group_info->maxgroups]->mmcra = mmcra; + group_info->maxgroups++; + break; + case 3: + // unused group name + state = 0; + } + } + fclose( grp_file ); + return group_info; +} + +ntv_event_info_t * +perfctr_get_native_evt_info( void ) +{ + ntv_event_info_t *evt_info = NULL; + FILE *evt_file = open_file( "events" ); + if ( evt_file != NULL ) { + evt_info = parse_eventfile( evt_file ); + } + return evt_info; + +} + +ntv_event_group_info_t * +perfctr_get_native_group_info( void ) +{ + ntv_event_group_info_t *groups = NULL; + FILE *grp_file = NULL; + if ( ( grp_file = open_file( "groups" ) ) != NULL ) { + groups = parse_groupfile( grp_file ); + } + return groups; +} diff --git a/src/components/perfctr_ppc/ppc64_events.h b/src/components/perfctr_ppc/ppc64_events.h new file mode 100644 index 0000000..b8e7b55 --- /dev/null +++ b/src/components/perfctr_ppc/ppc64_events.h @@ -0,0 +1,86 @@ +#ifndef PPC64_EVENTS_H_ +#define PPC64_EVENTS_H_ +/* +* File: ppc64_events.h +* CVS: +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +* +*/ + +#ifdef ARCH_EVTS +#include ARCH_EVTS +#else +#define GROUP_INTS 8 +#if defined(_POWER5) +#define PAPI_MAX_NATIVE_EVENTS 512 +#elif defined(_POWER6) +#define PAPI_MAX_NATIVE_EVENTS 1024 +#else +#define PAPI_MAX_NATIVE_EVENTS 1024 +#endif +#define MAX_GROUPS (GROUP_INTS * 32) +#endif + +typedef struct PPC64_register +{ + /* indicate which counters this event can live on */ + unsigned int selector; + /* Buffers containing counter cmds for each possible metric */ + int counter_cmd[MAX_COUNTERS]; + /* which group this event belongs */ + unsigned int group[GROUP_INTS]; +} PPC64_register_t; + +/* Override void* definitions from PAPI framework layer */ +/* with typedefs to conform to PAPI component layer code. */ +#undef hwd_register_t +typedef PPC64_register_t hwd_register_t; + +typedef struct PPC64_groups +{ +#ifdef __perfctr__ + unsigned int mmcr0; + unsigned int mmcr1L; + unsigned int mmcr1U; + unsigned int mmcra; + unsigned int counter_cmd[MAX_COUNTERS]; +#else +/* Buffer containing counter cmds for this group */ + unsigned int counter_cmd[MAX_COUNTERS]; +#endif +} PPC64_groups_t; + +typedef PPC64_groups_t hwd_groups_t; + +typedef struct native_event_entry +{ + /* description of the resources required by this native event */ + hwd_register_t resources; + /* If it exists, then this is the name of this event */ + char *name; + /* If it exists, then this is the description of this event */ + char *description; +} native_event_entry_t; + +typedef struct PPC64_native_map +{ + /* native event name */ + char *name; + /* real index in the native table */ + int index; +} PPC64_native_map_t; + +extern native_event_entry_t native_table[PAPI_MAX_NATIVE_EVENTS]; +#ifndef __perfctr__ +extern hwd_pminfo_t pminfo; +extern pm_groups_info_t pmgroups; +#endif +extern PPC64_native_map_t native_name_map[PAPI_MAX_NATIVE_EVENTS]; +extern hwd_groups_t group_map[MAX_GROUPS]; + +int check_native_name( ); + +#endif /*PPC64_EVENTS_H_ */ diff --git a/src/components/perfctr_ppc/ppc970_events.h b/src/components/perfctr_ppc/ppc970_events.h new file mode 100644 index 0000000..b91949a --- /dev/null +++ b/src/components/perfctr_ppc/ppc970_events.h @@ -0,0 +1,246 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +#ifndef _PAPI_PPC970_EVENTS_H +#define _PAPI_PPC970_EVENTS_H + +/* +* File: ppc970_events.h +* CVS: +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "papiStdEventDefs.h" + +#define GROUP_INTS 2 +#define PAPI_MAX_NATIVE_EVENTS 256 +#define MAX_GROUPS (GROUP_INTS * 32) +#define MAX_NATNAME_MAP_INDEX 215 + + +enum native_name +{ + PNE_PM_BRQ_FULL_CYC = PAPI_NATIVE_MASK, + PNE_PM_CR_MAP_FULL_CYC, + PNE_PM_CYC, + PNE_PM_DATA_FROM_L2, + PNE_PM_DATA_TABLEWALK_CYC, + PNE_PM_DSLB_MISS, + PNE_PM_DTLB_MISS, + PNE_PM_FPR_MAP_FULL_CYC, + PNE_PM_FPU0_ALL, + PNE_PM_FPU0_DENORM, + PNE_PM_FPU0_FDIV, + PNE_PM_FPU0_FMA, + PNE_PM_FPU0_FSQRT, + PNE_PM_FPU0_FULL_CYC, + PNE_PM_FPU0_SINGLE, + PNE_PM_FPU0_STALL3, + PNE_PM_FPU0_STF, + PNE_PM_FPU1_ALL, + PNE_PM_FPU1_DENORM, + PNE_PM_FPU1_FDIV, + PNE_PM_FPU1_FMA, + PNE_PM_FPU1_FSQRT, + PNE_PM_FPU1_FULL_CYC, + PNE_PM_FPU1_SINGLE, + PNE_PM_FPU1_STALL3, + PNE_PM_FPU1_STF, + PNE_PM_FPU_DENORM, + PNE_PM_FPU_FDIV, + PNE_PM_GCT_EMPTY_CYC, + PNE_PM_GCT_FULL_CYC, + PNE_PM_GRP_BR_MPRED, + PNE_PM_GRP_BR_REDIR, + PNE_PM_GRP_DISP_REJECT, + PNE_PM_GRP_DISP_VALID, + PNE_PM_IC_PREF_INSTALL, + PNE_PM_IC_PREF_REQ, + PNE_PM_IERAT_XLATE_WR, + PNE_PM_INST_CMPL, + PNE_PM_INST_DISP, + PNE_PM_INST_FROM_L1, + PNE_PM_INST_FROM_L2, + PNE_PM_ISLB_MISS, + PNE_PM_ITLB_MISS, + PNE_PM_LARX_LSU0, + PNE_PM_LR_CTR_MAP_FULL_CYC, + PNE_PM_LSU0_DERAT_MISS, + PNE_PM_LSU0_FLUSH_LRQ, + PNE_PM_LSU0_FLUSH_SRQ, + PNE_PM_LSU0_FLUSH_ULD, + PNE_PM_LSU0_FLUSH_UST, + PNE_PM_LSU0_REJECT_ERAT_MISS, + PNE_PM_LSU0_REJECT_LMQ_FULL, + PNE_PM_LSU0_REJECT_RELOAD_CDF, + PNE_PM_LSU0_REJECT_SRQ, + PNE_PM_LSU0_SRQ_STFWD, + PNE_PM_LSU1_DERAT_MISS, + PNE_PM_LSU1_FLUSH_LRQ, + PNE_PM_LSU1_FLUSH_SRQ, + PNE_PM_LSU1_FLUSH_ULD, + PNE_PM_LSU1_FLUSH_UST, + PNE_PM_LSU1_REJECT_ERAT_MISS, + PNE_PM_LSU1_REJECT_LMQ_FULL, + PNE_PM_LSU1_REJECT_RELOAD_CDF, + PNE_PM_LSU1_REJECT_SRQ, + PNE_PM_LSU1_SRQ_STFWD, + PNE_PM_LSU_FLUSH_ULD, + PNE_PM_LSU_LRQ_S0_ALLOC, + PNE_PM_LSU_LRQ_S0_VALID, + PNE_PM_LSU_REJECT_SRQ, + PNE_PM_LSU_SRQ_S0_ALLOC, + PNE_PM_LSU_SRQ_S0_VALID, + PNE_PM_LSU_SRQ_STFWD, + PNE_PM_MRK_DATA_FROM_L2, + PNE_PM_MRK_GRP_DISP, + PNE_PM_MRK_IMR_RELOAD, + PNE_PM_MRK_LD_MISS_L1, + PNE_PM_MRK_LD_MISS_L1_LSU0, + PNE_PM_MRK_LD_MISS_L1_LSU1, + PNE_PM_MRK_STCX_FAIL, + PNE_PM_MRK_ST_CMPL, + PNE_PM_MRK_ST_MISS_L1, + PNE_PM_PMC8_OVERFLOW, + PNE_PM_RUN_CYC, + PNE_PM_SNOOP_TLBIE, + PNE_PM_STCX_FAIL, + PNE_PM_STCX_PASS, + PNE_PM_ST_MISS_L1, + PNE_PM_SUSPENDED, + PNE_PM_XER_MAP_FULL_CYC, + PNE_PM_FPU_FMA, + PNE_PM_FPU_STALL3, + PNE_PM_GCT_EMPTY_SRQ_FULL, + PNE_PM_GRP_DISP, + PNE_PM_INST_FROM_MEM, + PNE_PM_LSU_FLUSH_UST, + PNE_PM_LSU_LMQ_SRQ_EMPTY_CYC, + PNE_PM_LSU_REJECT_LMQ_FULL, + PNE_PM_MRK_BRU_FIN, + PNE_PM_PMC1_OVERFLOW, + PNE_PM_THRESH_TIMEO, + PNE_PM_WORK_HELD, + PNE_PM_BR_ISSUED, + PNE_PM_BR_MPRED_CR, + PNE_PM_BR_MPRED_TA, + PNE_PM_CRQ_FULL_CYC, + PNE_PM_DATA_FROM_MEM, + PNE_PM_DC_INV_L2, + PNE_PM_DC_PREF_OUT_OF_STREAMS, + PNE_PM_DC_PREF_STREAM_ALLOC, + PNE_PM_EE_OFF, + PNE_PM_EE_OFF_EXT_INT, + PNE_PM_FLUSH_BR_MPRED, + PNE_PM_FLUSH_LSU_BR_MPRED, + PNE_PM_FPU0_FEST, + PNE_PM_FPU0_FIN, + PNE_PM_FPU0_FMOV_FEST, + PNE_PM_FPU0_FPSCR, + PNE_PM_FPU0_FRSP_FCONV, + PNE_PM_FPU1_FEST, + PNE_PM_FPU1_FIN, + PNE_PM_FPU1_FMOV_FEST, + PNE_PM_FPU1_FRSP_FCONV, + PNE_PM_FPU_FEST, + PNE_PM_FXLS0_FULL_CYC, + PNE_PM_FXLS1_FULL_CYC, + PNE_PM_FXU0_FIN, + PNE_PM_FXU1_FIN, + PNE_PM_FXU_FIN, + PNE_PM_GPR_MAP_FULL_CYC, + PNE_PM_GRP_DISP_BLK_SB_CYC, + PNE_PM_HV_CYC, + PNE_PM_INST_FROM_PREF, + PNE_PM_L1_DCACHE_RELOAD_VALID, + PNE_PM_L1_PREF, + PNE_PM_L1_WRITE_CYC, + PNE_PM_L2_PREF, + PNE_PM_LD_MISS_L1, + PNE_PM_LD_MISS_L1_LSU0, + PNE_PM_LD_MISS_L1_LSU1, + PNE_PM_LD_REF_L1_LSU0, + PNE_PM_LD_REF_L1_LSU1, + PNE_PM_LSU0_LDF, + PNE_PM_LSU1_LDF, + PNE_PM_LSU_FLUSH, + PNE_PM_LSU_LMQ_FULL_CYC, + PNE_PM_LSU_LMQ_LHR_MERGE, + PNE_PM_LSU_LMQ_S0_ALLOC, + PNE_PM_LSU_LMQ_S0_VALID, + PNE_PM_LSU_LRQ_FULL_CYC, + PNE_PM_LSU_SRQ_FULL_CYC, + PNE_PM_LSU_SRQ_SYNC_CYC, + PNE_PM_MRK_DATA_FROM_MEM, + PNE_PM_MRK_L1_RELOAD_VALID, + PNE_PM_MRK_LSU0_FLUSH_LRQ, + PNE_PM_MRK_LSU0_FLUSH_SRQ, + PNE_PM_MRK_LSU0_FLUSH_ULD, + PNE_PM_MRK_LSU0_FLUSH_UST, + PNE_PM_MRK_LSU1_FLUSH_LRQ, + PNE_PM_MRK_LSU1_FLUSH_SRQ, + PNE_PM_MRK_LSU1_FLUSH_ULD, + PNE_PM_MRK_LSU1_FLUSH_UST, + PNE_PM_MRK_LSU_SRQ_INST_VALID, + PNE_PM_MRK_ST_CMPL_INT, + PNE_PM_MRK_VMX_FIN, + PNE_PM_PMC2_OVERFLOW, + PNE_PM_STOP_COMPLETION, + PNE_PM_ST_REF_L1_LSU0, + PNE_PM_ST_REF_L1_LSU1, + PNE_PM_0INST_FETCH, + PNE_PM_FPU_FIN, + PNE_PM_FXU1_BUSY_FXU0_IDLE, + PNE_PM_LSU_SRQ_EMPTY_CYC, + PNE_PM_MRK_CRU_FIN, + PNE_PM_MRK_GRP_CMPL, + PNE_PM_PMC3_OVERFLOW, + PNE_PM_1PLUS_PPC_CMPL, + PNE_PM_DATA_FROM_L25_SHR, + PNE_PM_FPU_ALL, + PNE_PM_FPU_SINGLE, + PNE_PM_FXU_IDLE, + PNE_PM_GRP_DISP_SUCCESS, + PNE_PM_GRP_MRK, + PNE_PM_INST_FROM_L25_SHR, + PNE_PM_LSU_FLUSH_SRQ, + PNE_PM_LSU_REJECT_ERAT_MISS, + PNE_PM_MRK_DATA_FROM_L25_SHR, + PNE_PM_MRK_GRP_TIMEO, + PNE_PM_PMC4_OVERFLOW, + PNE_PM_DATA_FROM_L25_MOD, + PNE_PM_FPU_FSQRT, + PNE_PM_FPU_STF, + PNE_PM_FXU_BUSY, + PNE_PM_INST_FROM_L25_MOD, + PNE_PM_LSU_DERAT_MISS, + PNE_PM_LSU_FLUSH_LRQ, + PNE_PM_LSU_REJECT_RELOAD_CDF, + PNE_PM_MRK_DATA_FROM_L25_MOD, + PNE_PM_MRK_FXU_FIN, + PNE_PM_MRK_GRP_ISSUED, + PNE_PM_MRK_ST_GPS, + PNE_PM_PMC5_OVERFLOW, + PNE_PM_FPU_FRSP_FCONV, + PNE_PM_FXU0_BUSY_FXU1_IDLE, + PNE_PM_GRP_CMPL, + PNE_PM_MRK_FPU_FIN, + PNE_PM_MRK_INST_FIN, + PNE_PM_PMC6_OVERFLOW, + PNE_PM_ST_REF_L1, + PNE_PM_EXT_INT, + PNE_PM_FPU_FMOV_FEST, + PNE_PM_LD_REF_L1, + PNE_PM_LSU_LDF, + PNE_PM_MRK_LSU_FIN, + PNE_PM_PMC7_OVERFLOW, + PNE_PM_TB_BIT_TRANS, + NATNAME_GUARD, +}; +#endif //_PAPI_PPC970_EVENTS_H diff --git a/src/components/perfctr_ppc/ppc970_events_map.c b/src/components/perfctr_ppc/ppc970_events_map.c new file mode 100644 index 0000000..fe8b4ae --- /dev/null +++ b/src/components/perfctr_ppc/ppc970_events_map.c @@ -0,0 +1,447 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: ppc970_events_map.c +* Author: Maynard Johnson +* maynardj@us.ibm.com +* Mods: +* +* +* This file MUST be kept synchronised with the events file. +* +*/ +#include "perfctr-ppc64.h" + +PPC64_native_map_t native_name_map[MAX_NATNAME_MAP_INDEX] = { + {"PM_BRQ_FULL_CYC", -1} + , + {"PM_CR_MAP_FULL_CYC", -1} + , + {"PM_CYC", -1} + , + {"PM_DATA_FROM_L2", -1} + , + {"PM_DATA_TABLEWALK_CYC", -1} + , + {"PM_DSLB_MISS", -1} + , + {"PM_DTLB_MISS", -1} + , + {"PM_FPR_MAP_FULL_CYC", -1} + , + {"PM_FPU0_ALL", -1} + , + {"PM_FPU0_DENORM", -1} + , + {"PM_FPU0_FDIV", -1} + , + {"PM_FPU0_FMA", -1} + , + {"PM_FPU0_FSQRT", -1} + , + {"PM_FPU0_FULL_CYC", -1} + , + {"PM_FPU0_SINGLE", -1} + , + {"PM_FPU0_STALL3", -1} + , + {"PM_FPU0_STF", -1} + , + {"PM_FPU1_ALL", -1} + , + {"PM_FPU1_DENORM", -1} + , + {"PM_FPU1_FDIV", -1} + , + {"PM_FPU1_FMA", -1} + , + {"PM_FPU1_FSQRT", -1} + , + {"PM_FPU1_FULL_CYC", -1} + , + {"PM_FPU1_SINGLE", -1} + , + {"PM_FPU1_STALL3", -1} + , + {"PM_FPU1_STF", -1} + , + {"PM_FPU_DENORM", -1} + , + {"PM_FPU_FDIV", -1} + , + {"PM_GCT_EMPTY_CYC", -1} + , + {"PM_GCT_FULL_CYC", -1} + , + {"PM_GRP_BR_MPRED", -1} + , + {"PM_GRP_BR_REDIR", -1} + , + {"PM_GRP_DISP_REJECT", -1} + , + {"PM_GRP_DISP_VALID", -1} + , + {"PM_IC_PREF_INSTALL", -1} + , + {"PM_IC_PREF_REQ", -1} + , + {"PM_IERAT_XLATE_WR", -1} + , + {"PM_INST_CMPL", -1} + , + {"PM_INST_DISP", -1} + , + {"PM_INST_FROM_L1", -1} + , + {"PM_INST_FROM_L2", -1} + , + {"PM_ISLB_MISS", -1} + , + {"PM_ITLB_MISS", -1} + , + {"PM_LARX_LSU0", -1} + , + {"PM_LR_CTR_MAP_FULL_CYC", -1} + , + {"PM_LSU0_DERAT_MISS", -1} + , + {"PM_LSU0_FLUSH_LRQ", -1} + , + {"PM_LSU0_FLUSH_SRQ", -1} + , + {"PM_LSU0_FLUSH_ULD", -1} + , + {"PM_LSU0_FLUSH_UST", -1} + , + {"PM_LSU0_REJECT_ERAT_MISS", -1} + , + {"PM_LSU0_REJECT_LMQ_FULL", -1} + , + {"PM_LSU0_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU0_REJECT_SRQ", -1} + , + {"PM_LSU0_SRQ_STFWD", -1} + , + {"PM_LSU1_DERAT_MISS", -1} + , + {"PM_LSU1_FLUSH_LRQ", -1} + , + {"PM_LSU1_FLUSH_SRQ", -1} + , + {"PM_LSU1_FLUSH_ULD", -1} + , + {"PM_LSU1_FLUSH_UST", -1} + , + {"PM_LSU1_REJECT_ERAT_MISS", -1} + , + {"PM_LSU1_REJECT_LMQ_FULL", -1} + , + {"PM_LSU1_REJECT_RELOAD_CDF", -1} + , + {"PM_LSU1_REJECT_SRQ", -1} + , + {"PM_LSU1_SRQ_STFWD", -1} + , + {"PM_LSU_FLUSH_ULD", -1} + , + {"PM_LSU_LRQ_S0_ALLOC", -1} + , + {"PM_LSU_LRQ_S0_VALID", -1} + , + {"PM_LSU_REJECT_SRQ", -1} + , + {"PM_LSU_SRQ_S0_ALLOC", -1} + , + {"PM_LSU_SRQ_S0_VALID", -1} + , + {"PM_LSU_SRQ_STFWD", -1} + , + {"PM_MRK_DATA_FROM_L2", -1} + , + {"PM_MRK_GRP_DISP", -1} + , + {"PM_MRK_IMR_RELOAD", -1} + , + {"PM_MRK_LD_MISS_L1", -1} + , + {"PM_MRK_LD_MISS_L1_LSU0", -1} + , + {"PM_MRK_LD_MISS_L1_LSU1", -1} + , + {"PM_MRK_STCX_FAIL", -1} + , + {"PM_MRK_ST_CMPL", -1} + , + {"PM_MRK_ST_MISS_L1", -1} + , + {"PM_PMC8_OVERFLOW", -1} + , + {"PM_RUN_CYC", -1} + , + {"PM_SNOOP_TLBIE", -1} + , + {"PM_STCX_FAIL", -1} + , + {"PM_STCX_PASS", -1} + , + {"PM_ST_MISS_L1", -1} + , + {"PM_SUSPENDED", -1} + , + {"PM_XER_MAP_FULL_CYC", -1} + , + {"PM_FPU_FMA", -1} + , + {"PM_FPU_STALL3", -1} + , + {"PM_GCT_EMPTY_SRQ_FULL", -1} + , + {"PM_GRP_DISP", -1} + , + {"PM_INST_FROM_MEM", -1} + , + {"PM_LSU_FLUSH_UST", -1} + , + {"PM_LSU_LMQ_SRQ_EMPTY_CYC", -1} + , + {"PM_LSU_REJECT_LMQ_FULL", -1} + , + {"PM_MRK_BRU_FIN", -1} + , + {"PM_PMC1_OVERFLOW", -1} + , + {"PM_THRESH_TIMEO", -1} + , + {"PM_WORK_HELD", -1} + , + {"PM_BR_ISSUED", -1} + , + {"PM_BR_MPRED_CR", -1} + , + {"PM_BR_MPRED_TA", -1} + , + {"PM_CRQ_FULL_CYC", -1} + , + {"PM_DATA_FROM_MEM", -1} + , + {"PM_DC_INV_L2", -1} + , + {"PM_DC_PREF_OUT_OF_STREAMS", -1} + , + {"PM_DC_PREF_STREAM_ALLOC", -1} + , + {"PM_EE_OFF", -1} + , + {"PM_EE_OFF_EXT_INT", -1} + , + {"PM_FLUSH_BR_MPRED", -1} + , + {"PM_FLUSH_LSU_BR_MPRED", -1} + , + {"PM_FPU0_FEST", -1} + , + {"PM_FPU0_FIN", -1} + , + {"PM_FPU0_FMOV_FEST", -1} + , + {"PM_FPU0_FPSCR", -1} + , + {"PM_FPU0_FRSP_FCONV", -1} + , + {"PM_FPU1_FEST", -1} + , + {"PM_FPU1_FIN", -1} + , + {"PM_FPU1_FMOV_FEST", -1} + , + {"PM_FPU1_FRSP_FCONV", -1} + , + {"PM_FPU_FEST", -1} + , + {"PM_FXLS0_FULL_CYC", -1} + , + {"PM_FXLS1_FULL_CYC", -1} + , + {"PM_FXU0_FIN", -1} + , + {"PM_FXU1_FIN", -1} + , + {"PM_FXU_FIN", -1} + , + {"PM_GPR_MAP_FULL_CYC", -1} + , + {"PM_GRP_DISP_BLK_SB_CYC", -1} + , + {"PM_HV_CYC", -1} + , + {"PM_INST_FROM_PREF", -1} + , + {"PM_L1_DCACHE_RELOAD_VALID", -1} + , + {"PM_L1_PREF", -1} + , + {"PM_L1_WRITE_CYC", -1} + , + {"PM_L2_PREF", -1} + , + {"PM_LD_MISS_L1", -1} + , + {"PM_LD_MISS_L1_LSU0", -1} + , + {"PM_LD_MISS_L1_LSU1", -1} + , + {"PM_LD_REF_L1_LSU0", -1} + , + {"PM_LD_REF_L1_LSU1", -1} + , + {"PM_LSU0_LDF", -1} + , + {"PM_LSU1_LDF", -1} + , + {"PM_LSU_FLUSH", -1} + , + {"PM_LSU_LMQ_FULL_CYC", -1} + , + {"PM_LSU_LMQ_LHR_MERGE", -1} + , + {"PM_LSU_LMQ_S0_ALLOC", -1} + , + {"PM_LSU_LMQ_S0_VALID", -1} + , + {"PM_LSU_LRQ_FULL_CYC", -1} + , + {"PM_LSU_SRQ_FULL_CYC", -1} + , + {"PM_LSU_SRQ_SYNC_CYC", -1} + , + {"PM_MRK_DATA_FROM_MEM", -1} + , + {"PM_MRK_L1_RELOAD_VALID", -1} + , + {"PM_MRK_LSU0_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU0_FLUSH_ULD", -1} + , + {"PM_MRK_LSU0_FLUSH_UST", -1} + , + {"PM_MRK_LSU1_FLUSH_LRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_SRQ", -1} + , + {"PM_MRK_LSU1_FLUSH_ULD", -1} + , + {"PM_MRK_LSU1_FLUSH_UST", -1} + , + {"PM_MRK_LSU_SRQ_INST_VALID", -1} + , + {"PM_MRK_ST_CMPL_INT", -1} + , + {"PM_MRK_VMX_FIN", -1} + , + {"PM_PMC2_OVERFLOW", -1} + , + {"PM_STOP_COMPLETION", -1} + , + {"PM_ST_REF_L1_LSU0", -1} + , + {"PM_ST_REF_L1_LSU1", -1} + , + {"PM_0INST_FETCH", -1} + , + {"PM_FPU_FIN", -1} + , + {"PM_FXU1_BUSY_FXU0_IDLE", -1} + , + {"PM_LSU_SRQ_EMPTY_CYC", -1} + , + {"PM_MRK_CRU_FIN", -1} + , + {"PM_MRK_GRP_CMPL", -1} + , + {"PM_PMC3_OVERFLOW", -1} + , + {"PM_1PLUS_PPC_CMPL", -1} + , + {"PM_DATA_FROM_L25_SHR", -1} + , + {"PM_FPU_ALL", -1} + , + {"PM_FPU_SINGLE", -1} + , + {"PM_FXU_IDLE", -1} + , + {"PM_GRP_DISP_SUCCESS", -1} + , + {"PM_GRP_MRK", -1} + , + {"PM_INST_FROM_L25_SHR", -1} + , + {"PM_LSU_FLUSH_SRQ", -1} + , + {"PM_LSU_REJECT_ERAT_MISS", -1} + , + {"PM_MRK_DATA_FROM_L25_SHR", -1} + , + {"PM_MRK_GRP_TIMEO", -1} + , + {"PM_PMC4_OVERFLOW", -1} + , + {"PM_DATA_FROM_L25_MOD", -1} + , + {"PM_FPU_FSQRT", -1} + , + {"PM_FPU_STF", -1} + , + {"PM_FXU_BUSY", -1} + , + {"PM_INST_FROM_L25_MOD", -1} + , + {"PM_LSU_DERAT_MISS", -1} + , + {"PM_LSU_FLUSH_LRQ", -1} + , + {"PM_LSU_REJECT_RELOAD_CDF", -1} + , + {"PM_MRK_DATA_FROM_L25_MOD", -1} + , + {"PM_MRK_FXU_FIN", -1} + , + {"PM_MRK_GRP_ISSUED", -1} + , + {"PM_MRK_ST_GPS", -1} + , + {"PM_PMC5_OVERFLOW", -1} + , + {"PM_FPU_FRSP_FCONV", -1} + , + {"PM_FXU0_BUSY_FXU1_IDLE", -1} + , + {"PM_GRP_CMPL", -1} + , + {"PM_MRK_FPU_FIN", -1} + , + {"PM_MRK_INST_FIN", -1} + , + {"PM_PMC6_OVERFLOW", -1} + , + {"PM_ST_REF_L1", -1} + , + {"PM_EXT_INT", -1} + , + {"PM_FPU_FMOV_FEST", -1} + , + {"PM_LD_REF_L1", -1} + , + {"PM_LSU_LDF", -1} + , + {"PM_MRK_LSU_FIN", -1} + , + {"PM_PMC7_OVERFLOW", -1} + , + {"PM_TB_BIT_TRANS", -1} +}; diff --git a/src/components/perfmon2/Rules.perfmon2 b/src/components/perfmon2/Rules.perfmon2 new file mode 100644 index 0000000..8c28898 --- /dev/null +++ b/src/components/perfmon2/Rules.perfmon2 @@ -0,0 +1,6 @@ + +COMPSRCS += components/perfmon2/perfmon.c +COMPOBJS += perfmon.o + +perfmon.o: components/perfmon2/perfmon.c components/perfmon2/perfmon.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfmon2/perfmon.c -o perfmon.o diff --git a/src/components/perfmon2/perfmon.c b/src/components/perfmon2/perfmon.c new file mode 100644 index 0000000..f3462ba --- /dev/null +++ b/src/components/perfmon2/perfmon.c @@ -0,0 +1,2264 @@ +/* +* File: perfmon.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Brian Sheely +* bsheely@eecs.utk.edu +*/ + +/* TODO LIST: + - Events for all platforms + - Derived events for all platforms + - Latency profiling + - BTB/IPIEAR sampling + - Test on ITA2, Pentium 4 + - hwd_ntv_code_to_name + - Make native map carry major events, not umasks + - Enum event uses native_map not pfm() + - Hook up globals to be freed to sub_info + - Better feature bit support for IEAR +*/ + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "papi_libpfm_events.h" +#include "extras.h" + +#include "perfmon.h" + +#include "linux-memory.h" +#include "linux-timer.h" +#include "linux-common.h" + +#ifdef __ia64__ +#include "perfmon/pfmlib_itanium2.h" +#include "perfmon/pfmlib_montecito.h" +#endif + +typedef unsigned uint; + +/* Advance declarations */ +static int _papi_pfm_set_overflow( EventSetInfo_t * ESI, int EventIndex, + int threshold ); +papi_vector_t _perfmon2_vector; + + +/* Static locals */ + +static int _perfmon2_pfm_pmu_type = -1; +static pfmlib_regmask_t _perfmon2_pfm_unavailable_pmcs; +static pfmlib_regmask_t _perfmon2_pfm_unavailable_pmds; + +/* Debug functions */ + +#ifdef DEBUG +static void +dump_smpl_arg( pfm_dfl_smpl_arg_t * arg ) +{ + SUBDBG( "SMPL_ARG.buf_size = %llu\n", + ( unsigned long long ) arg->buf_size ); + SUBDBG( "SMPL_ARG.buf_flags = %d\n", arg->buf_flags ); +} + +static void +dump_sets( pfarg_setdesc_t * set, int num_sets ) +{ + int i; + + for ( i = 0; i < num_sets; i++ ) { + SUBDBG( "SET[%d]\n", i ); + SUBDBG( "SET[%d].set_id = %d\n", i, set[i].set_id ); + // SUBDBG("SET[%d].set_id_next = %d\n",i,set[i].set_id_next); + SUBDBG( "SET[%d].set_flags = %d\n", i, set[i].set_flags ); + SUBDBG( "SET[%d].set_timeout = %llu\n", i, + ( unsigned long long ) set[i].set_timeout ); + // SUBDBG("SET[%d].set_mmap_offset = %#016llx\n",i,(unsigned long long)set[i].set_mmap_offset); + } +} + +static void +dump_setinfo( pfarg_setinfo_t * setinfo, int num_sets ) +{ + int i; + + for ( i = 0; i < num_sets; i++ ) { + SUBDBG( "SETINFO[%d]\n", i ); + SUBDBG( "SETINFO[%d].set_id = %d\n", i, setinfo[i].set_id ); + // SUBDBG("SETINFO[%d].set_id_next = %d\n",i,setinfo[i].set_id_next); + SUBDBG( "SETINFO[%d].set_flags = %d\n", i, setinfo[i].set_flags ); + SUBDBG( "SETINFO[%d].set_ovfl_pmds[0] = %#016llx\n", i, + ( unsigned long long ) setinfo[i].set_ovfl_pmds[0] ); + SUBDBG( "SETINFO[%d].set_runs = %llu\n", i, + ( unsigned long long ) setinfo[i].set_runs ); + SUBDBG( "SETINFO[%d].set_timeout = %llu\n", i, + ( unsigned long long ) setinfo[i].set_timeout ); + SUBDBG( "SETINFO[%d].set_act_duration = %llu\n", i, + ( unsigned long long ) setinfo[i].set_act_duration ); + // SUBDBG("SETINFO[%d].set_mmap_offset = %#016llx\n",i,(unsigned long long)setinfo[i].set_mmap_offset); + SUBDBG( "SETINFO[%d].set_avail_pmcs[0] = %#016llx\n", i, + ( unsigned long long ) setinfo[i].set_avail_pmcs[0] ); + SUBDBG( "SETINFO[%d].set_avail_pmds[0] = %#016llx\n", i, + ( unsigned long long ) setinfo[i].set_avail_pmds[0] ); + } +} + +static void +dump_pmc( pfm_control_state_t * ctl ) +{ + unsigned int i; + pfarg_pmc_t *pc = ctl->pc; + + for ( i = 0; i < ctl->out.pfp_pmc_count; i++ ) { + SUBDBG( "PC[%d]\n", i ); + SUBDBG( "PC[%d].reg_num = %d\n", i, pc[i].reg_num ); + SUBDBG( "PC[%d].reg_set = %d\n", i, pc[i].reg_set ); + SUBDBG( "PC[%d].reg_flags = %#08x\n", i, pc[i].reg_flags ); + SUBDBG( "PC[%d].reg_value = %#016llx\n", i, + ( unsigned long long ) pc[i].reg_value ); + } +} + +static void +dump_pmd( pfm_control_state_t * ctl ) +{ + unsigned int i; + pfarg_pmd_t *pd = ctl->pd; + + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { + SUBDBG( "PD[%d]\n", i ); + SUBDBG( "PD[%d].reg_num = %d\n", i, pd[i].reg_num ); + SUBDBG( "PD[%d].reg_set = %d\n", i, pd[i].reg_set ); + SUBDBG( "PD[%d].reg_flags = %#08x\n", i, pd[i].reg_flags ); + SUBDBG( "PD[%d].reg_value = %#016llx\n", i, + ( unsigned long long ) pd[i].reg_value ); + SUBDBG( "PD[%d].reg_long_reset = %llu\n", i, + ( unsigned long long ) pd[i].reg_long_reset ); + SUBDBG( "PD[%d].reg_short_reset = %llu\n", i, + ( unsigned long long ) pd[i].reg_short_reset ); + SUBDBG( "PD[%d].reg_last_reset_val = %llu\n", i, + ( unsigned long long ) pd[i].reg_last_reset_val ); + SUBDBG( "PD[%d].reg_ovfl_switch_cnt = %llu\n", i, + ( unsigned long long ) pd[i].reg_ovfl_switch_cnt ); + SUBDBG( "PD[%d].reg_reset_pmds[0] = %#016llx\n", i, + ( unsigned long long ) pd[i].reg_reset_pmds[0] ); + SUBDBG( "PD[%d].reg_smpl_pmds[0] = %#016llx\n", i, + ( unsigned long long ) pd[i].reg_smpl_pmds[0] ); + SUBDBG( "PD[%d].reg_smpl_eventid = %llu\n", i, + ( unsigned long long ) pd[i].reg_smpl_eventid ); + SUBDBG( "PD[%d].reg_random_mask = %llu\n", i, + ( unsigned long long ) pd[i].reg_random_mask ); + SUBDBG( "PD[%d].reg_random_seed = %d\n", i, pd[i].reg_random_seed ); + } +} + +static void +dump_smpl_hdr( pfm_dfl_smpl_hdr_t * hdr ) +{ + SUBDBG( "SMPL_HDR.hdr_count = %llu\n", + ( unsigned long long ) hdr->hdr_count ); + SUBDBG( "SMPL_HDR.hdr_cur_offs = %llu\n", + ( unsigned long long ) hdr->hdr_cur_offs ); + SUBDBG( "SMPL_HDR.hdr_overflows = %llu\n", + ( unsigned long long ) hdr->hdr_overflows ); + SUBDBG( "SMPL_HDR.hdr_buf_size = %llu\n", + ( unsigned long long ) hdr->hdr_buf_size ); + SUBDBG( "SMPL_HDR.hdr_min_buf_space = %llu\n", + ( unsigned long long ) hdr->hdr_min_buf_space ); + SUBDBG( "SMPL_HDR.hdr_version = %d\n", hdr->hdr_version ); + SUBDBG( "SMPL_HDR.hdr_buf_flags = %d\n", hdr->hdr_buf_flags ); +} + +static void +dump_smpl( pfm_dfl_smpl_entry_t * entry ) +{ + SUBDBG( "SMPL.pid = %d\n", entry->pid ); + SUBDBG( "SMPL.ovfl_pmd = %d\n", entry->ovfl_pmd ); + SUBDBG( "SMPL.last_reset_val = %llu\n", + ( unsigned long long ) entry->last_reset_val ); + SUBDBG( "SMPL.ip = %#llx\n", ( unsigned long long ) entry->ip ); + SUBDBG( "SMPL.tstamp = %llu\n", ( unsigned long long ) entry->tstamp ); + SUBDBG( "SMPL.cpu = %d\n", entry->cpu ); + SUBDBG( "SMPL.set = %d\n", entry->set ); + SUBDBG( "SMPL.tgid = %d\n", entry->tgid ); +} +#endif + +#define PFM_MAX_PMCDS 20 + +int +_papi_pfm_write_pmcs( pfm_context_t * ctx, pfm_control_state_t * ctl ) +{ + ( void ) ctx; /*unused */ + unsigned int i = 0; + int ret; + + SUBDBG( "PFM_WRITE_PMCS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pc, + ctl->out.pfp_pmc_count ); + if ( ctl->out.pfp_pmc_count > PFM_MAX_PMCDS ) { + for ( i = 0; i < ctl->out.pfp_pmc_count - PFM_MAX_PMCDS; + i += PFM_MAX_PMCDS ) { + if ( ( ret = + pfm_write_pmcs( ctl->ctx_fd, ctl->pc + i, + PFM_MAX_PMCDS ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); + PAPIERROR( "pfm_write_pmcs(%d,%p,%d): %s", ctl->ctx_fd, ctl->pc, + ctl->out.pfp_pmc_count, strerror( ret ) ); + return ( PAPI_ESYS ); + } + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); + } + if ( ( ret = + pfm_write_pmcs( ctl->ctx_fd, ctl->pc + i, + ctl->out.pfp_pmc_count - i ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); + PAPIERROR( "pfm_write_pmcs(%d,%p,%d): %s", ctl->ctx_fd, ctl->pc, + ctl->out.pfp_pmc_count, strerror( ret ) ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); + + return PAPI_OK; +} + +int +_papi_pfm_write_pmds( pfm_context_t * ctx, pfm_control_state_t * ctl ) +{ + ( void ) ctx; /*unused */ + unsigned int i = 0; + int ret; + + SUBDBG( "PFM_WRITE_PMDS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pd, + ctl->in.pfp_event_count ); + if ( ctl->in.pfp_event_count > PFM_MAX_PMCDS ) { + for ( i = 0; i < ctl->in.pfp_event_count - PFM_MAX_PMCDS; + i += PFM_MAX_PMCDS ) { + if ( ( ret = + pfm_write_pmds( ctl->ctx_fd, ctl->pd + i, + PFM_MAX_PMCDS ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + PAPIERROR( "pfm_write_pmds(%d,%p,%d): errno=%d %s", ctl->ctx_fd, + ctl->pd, ctl->in.pfp_event_count, errno, + strerror( ret ) ); + perror( "pfm_write_pmds" ); + return ( PAPI_ESYS ); + } + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + } + if ( ( ret = + pfm_write_pmds( ctl->ctx_fd, ctl->pd + i, + ctl->in.pfp_event_count - i ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + PAPIERROR( "pfm_write_pmds(%d,%p,%d): errno=%d %s", ctl->ctx_fd, + ctl->pd, ctl->in.pfp_event_count, errno, strerror( ret ) ); + perror( "pfm_write_pmds" ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + + return PAPI_OK; +} + +int +_papi_pfm_read_pmds( pfm_context_t * ctx, pfm_control_state_t * ctl ) +{ + ( void ) ctx; /*unused */ + unsigned int i = 0; + int ret; + + SUBDBG( "PFM_READ_PMDS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pd, + ctl->in.pfp_event_count ); + if ( ctl->in.pfp_event_count > PFM_MAX_PMCDS ) { + for ( i = 0; i < ctl->in.pfp_event_count - PFM_MAX_PMCDS; + i += PFM_MAX_PMCDS ) { + if ( ( ret = + pfm_read_pmds( ctl->ctx_fd, ctl->pd + i, + PFM_MAX_PMCDS ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + PAPIERROR( "pfm_read_pmds(%d,%p,%d): %s", ctl->ctx_fd, ctl->pd, + ctl->in.pfp_event_count, strerror( ret ) ); + return ( ( errno == EBADF ) ? PAPI_ECLOST : PAPI_ESYS ); + } + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + } + if ( ( ret = + pfm_read_pmds( ctl->ctx_fd, ctl->pd + i, + ctl->in.pfp_event_count - i ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + PAPIERROR( "pfm_read_pmds(%d,%p,%d): %s", ctl->ctx_fd, ctl->pd, + ctl->in.pfp_event_count, strerror( ret ) ); + return ( ( errno == EBADF ) ? PAPI_ECLOST : PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); + + return PAPI_OK; +} + + +/* This routine effectively does argument checking as the real magic will happen + in compute_kernel_args. This just gets the value back from the kernel. */ + +static int +check_multiplex_timeout( int ctx_fd, unsigned long *timeout_ns ) +{ + int ret; + pfarg_setdesc_t set[2]; + + memset( set, 0, sizeof ( pfarg_setdesc_t ) * 2 ); + set[1].set_id = 1; + set[1].set_flags = PFM_SETFL_TIME_SWITCH; + set[1].set_timeout = *timeout_ns; + SUBDBG( "Multiplexing interval requested is %llu ns.\n", + ( unsigned long long ) set[1].set_timeout ); + + /* Create a test eventset */ + + SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,1)\n", ctx_fd, &set[1] ); + if ( ( ret = pfm_create_evtsets( ctx_fd, &set[1], 1 ) ) != PFMLIB_SUCCESS ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set[1], 1 ) ); + PAPIERROR( "pfm_create_evtsets(%d,%p,%d): %s", ctx_fd, &set[1], 1, + strerror( ret ) ); + return ( PAPI_ESYS ); + } + + SUBDBG( "Multiplexing interval returned is %llu ns.\n", + ( unsigned long long ) set[1].set_timeout ); + *timeout_ns = set[1].set_timeout; + + /* Delete the second eventset */ + + pfm_delete_evtsets( ctx_fd, &set[1], 1 ); + + return ( PAPI_OK ); +} + +/* The below function is stolen from libpfm from Stephane Eranian */ +static int +detect_timeout_and_unavail_pmu_regs( pfmlib_regmask_t * r_pmcs, + pfmlib_regmask_t * r_pmds, + unsigned long *timeout_ns ) +{ + pfarg_ctx_t ctx; + pfarg_setinfo_t setf; + unsigned int i; + int ret, j, myfd; + + memset( r_pmcs, 0, sizeof ( *r_pmcs ) ); + memset( r_pmds, 0, sizeof ( *r_pmds ) ); + + memset( &ctx, 0, sizeof ( ctx ) ); + memset( &setf, 0, sizeof ( setf ) ); + /* + * if no context descriptor is passed, then create + * a temporary context + */ + SUBDBG( "PFM_CREATE_CONTEXT(%p,%p,%p,%d)\n", &ctx, NULL, NULL, 0 ); + myfd = pfm_create_context( &ctx, NULL, NULL, 0 ); + if ( myfd == -1 ) { + PAPIERROR( "detect_unavail_pmu_regs:pfm_create_context(): %s", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", myfd ); + /* + * retrieve available register bitmasks from set0 + * which is guaranteed to exist for every context + */ + ret = pfm_getinfo_evtsets( myfd, &setf, 1 ); + if ( ret != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_getinfo_evtsets(): %s", strerror( ret ) ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_setinfo( &setf, 1 ) ); + if ( r_pmcs ) + for ( i = 0; i < PFM_PMC_BV; i++ ) { + for ( j = 0; j < 64; j++ ) { + if ( ( setf.set_avail_pmcs[i] & ( 1ULL << j ) ) == 0 ) + pfm_regmask_set( r_pmcs, ( i << 6 ) + j ); + } + } + if ( r_pmds ) + for ( i = 0; i < PFM_PMD_BV; i++ ) { + for ( j = 0; j < 64; j++ ) { + if ( ( setf.set_avail_pmds[i] & ( 1ULL << j ) ) == 0 ) + pfm_regmask_set( r_pmds, ( i << 6 ) + j ); + } + } + check_multiplex_timeout( myfd, timeout_ns ); + i = close( myfd ); + SUBDBG( "CLOSE fd %d returned %d\n", myfd, i ); + return PAPI_OK; +} + +/* BEGIN COMMON CODE */ + +static inline int +compute_kernel_args( hwd_control_state_t * ctl0 ) +{ + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + pfmlib_input_param_t *inp = &ctl->in; + pfmlib_output_param_t *outp = &ctl->out; + pfmlib_input_param_t tmpin; + pfmlib_output_param_t tmpout; +#if 0 + /* This will be used to fixup the overflow and sample args after re-allocation */ + pfarg_pmd_t oldpd; +#endif + pfarg_pmd_t *pd = ctl->pd; + pfarg_pmc_t *pc = ctl->pc; + pfarg_setdesc_t *sets = ctl->set; + pfarg_setinfo_t *setinfos = ctl->setinfo; + int *num_sets = &ctl->num_sets; + unsigned int set = 0; + int donepc = 0, donepd = 0, ret, j; + unsigned int i, dispatch_count = inp->pfp_event_count; + int togo = inp->pfp_event_count, done = 0; + + /* Save old PD array so we can reconstruct certain flags. */ + /* This can be removed when we have higher level code call */ + /* set_profile,set_overflow etc when there is hardware */ + /* (component) support, but this change won't happen for PAPI 3.5 */ + + SUBDBG + ( "entry multiplexed %d, pfp_event_count %d, num_cntrs %d, num_sets %d\n", + ctl->multiplexed, inp->pfp_event_count, _perfmon2_vector.cmp_info.num_cntrs, + *num_sets ); + if ( ( ctl->multiplexed ) && + ( inp->pfp_event_count > + ( unsigned int ) _perfmon2_vector.cmp_info.num_cntrs ) ) { + dispatch_count = _perfmon2_vector.cmp_info.num_cntrs; + } + + while ( togo ) { + again: + memset( &tmpin, 0x0, sizeof ( tmpin ) ); + memset( &tmpout, 0x0, sizeof ( tmpout ) ); + + SUBDBG( "togo %d, done %d, dispatch_count %d, num_cntrs %d\n", togo, + done, dispatch_count, _perfmon2_vector.cmp_info.num_cntrs ); + tmpin.pfp_event_count = dispatch_count; + tmpin.pfp_dfl_plm = inp->pfp_dfl_plm; + + /* Make sure we tell dispatch that these PMC's are not available */ + memcpy( &tmpin.pfp_unavail_pmcs, &_perfmon2_pfm_unavailable_pmcs, + sizeof ( _perfmon2_pfm_unavailable_pmcs ) ); + + for ( i = 0, j = done; i < dispatch_count; i++, j++ ) { + memcpy( tmpin.pfp_events + i, inp->pfp_events + j, + sizeof ( pfmlib_event_t ) ); + } + + if ( ( ret = + pfm_dispatch_events( &tmpin, NULL, &tmpout, + NULL ) ) != PFMLIB_SUCCESS ) { + if ( ctl->multiplexed ) { + dispatch_count--; + if ( dispatch_count == 0 ) { + PAPIERROR( "pfm_dispatch_events(): %s", + pfm_strerror( ret ) ); + return ( _papi_libpfm_error( ret ) ); + } + SUBDBG + ( "Dispatch failed because of counter conflict, trying again with %d counters.\n", + dispatch_count ); + goto again; + } + PAPIERROR( "pfm_dispatch_events(): %s", pfm_strerror( ret ) ); + return ( _papi_libpfm_error( ret ) ); + } + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for ( i = 0; i < tmpout.pfp_pmc_count; i++, donepc++ ) { + pc[donepc].reg_num = tmpout.pfp_pmcs[i].reg_num; + pc[donepc].reg_value = tmpout.pfp_pmcs[i].reg_value; + pc[donepc].reg_set = set; + SUBDBG( "PC%d (i%d) is reg num %d, value %llx, set %d\n", donepc, i, + pc[donepc].reg_num, + ( unsigned long long ) pc[donepc].reg_value, + pc[donepc].reg_set ); + } + + /* figure out pmd mapping from output pmc */ + +#if defined(HAVE_PFM_REG_EVT_IDX) + for ( i = 0, j = 0; i < tmpin.pfp_event_count; i++, donepd++ ) { + pd[donepd].reg_num = tmpout.pfp_pmcs[j].reg_pmd_num; + pd[donepd].reg_set = set; + SUBDBG( "PD%d (i%d,j%d) is reg num %d, set %d\n", donepd, i, j, + pd[donepd].reg_num, pd[donepd].reg_set ); + + /* Skip over entries that map to the same PMD, + PIV has 2 PMCS for every PMD */ + + for ( ; j < tmpout.pfp_pmc_count; j++ ) + if ( tmpout.pfp_pmcs[j].reg_evt_idx != i ) + break; + } +#else + for ( i = 0; i < tmpout.pfp_pmd_count; i++, donepd++ ) { + pd[donepd].reg_num = tmpout.pfp_pmds[i].reg_num; + pd[donepd].reg_set = set; + SUBDBG( "PD%d (i%d) is reg num %d, set %d\n", donepd, i, + pd[donepd].reg_num, pd[donepd].reg_set ); + } +#endif + + togo -= dispatch_count; + done += dispatch_count; + if ( togo > _perfmon2_vector.cmp_info.num_cntrs ) + dispatch_count = _perfmon2_vector.cmp_info.num_cntrs; + else + dispatch_count = togo; + + setinfos[set].set_id = set; + sets[set].set_id = set; + set++; + } + + *num_sets = set; + outp->pfp_pmc_count = donepc; + + if ( ctl->multiplexed && ( set > 1 ) ) { + for ( i = 0; i < set; i++ ) { + sets[i].set_flags = PFM_SETFL_TIME_SWITCH; + sets[i].set_timeout = ctl->multiplexed; + } + } + SUBDBG + ( "exit multiplexed %d (ns switch time), pfp_pmc_count %d, num_sets %d\n", + ctl->multiplexed, outp->pfp_pmc_count, *num_sets ); + return ( PAPI_OK ); +} + +int +tune_up_fd( int ctx_fd ) +{ + int ret; + + /* set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. */ + ret = fcntl( ctx_fd, F_SETFD, FD_CLOEXEC ); + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(FD_CLOEXEC) on %d: %s", ctx_fd, + strerror( errno ) ); + return ( PAPI_ESYS ); + } + /* setup asynchronous notification on the file descriptor */ + ret = fcntl( ctx_fd, F_SETFL, fcntl( ctx_fd, F_GETFL, 0 ) | O_ASYNC ); + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(O_ASYNC) on %d: %s", ctx_fd, + strerror( errno ) ); + return ( PAPI_ESYS ); + } + /* get ownership of the descriptor */ + ret = fcntl( ctx_fd, F_SETOWN, mygettid( ) ); + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(F_SETOWN) on %d: %s", ctx_fd, + strerror( errno ) ); + return ( PAPI_ESYS ); + } + /* + * when you explicitely declare that you want a particular signal, + * even with you use the default signal, the kernel will send more + * information concerning the event to the signal handler. + * + * In particular, it will send the file descriptor from which the + * event is originating which can be quite useful when monitoring + * multiple tasks from a single thread. + */ + ret = fcntl( ctx_fd, F_SETSIG, _perfmon2_vector.cmp_info.hardware_intr_sig ); + if ( ret == -1 ) { + PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s", + _perfmon2_vector.cmp_info.hardware_intr_sig, ctx_fd, + strerror( errno ) ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); +} + +static int +attach( hwd_control_state_t * ctl, unsigned long tid ) +{ + pfarg_ctx_t *newctx = ( pfarg_ctx_t * ) malloc( sizeof ( pfarg_ctx_t ) ); + pfarg_load_t *load_args = + ( pfarg_load_t * ) malloc( sizeof ( pfarg_load_t ) ); + int ret; + + if ( ( newctx == NULL ) || ( load_args == NULL ) ) + return ( PAPI_ENOMEM ); + memset( newctx, 0x0, sizeof ( *newctx ) ); + memset( load_args, 0, sizeof ( *load_args ) ); + + /* Make sure the process exists and is being ptraced() */ + + ret = ptrace( PTRACE_ATTACH, tid, NULL, NULL ); + if ( ret == 0 ) { + ptrace( PTRACE_DETACH, tid, NULL, NULL ); + PAPIERROR( "Process/thread %d is not being ptraced", tid ); + free( newctx ); + free( load_args ); + return ( PAPI_EINVAL ); + } + /* If we get here, then we should hope that the process is being + ptraced, if not, then we probably can't attach to it. */ + + if ( ( ret == -1 ) && ( errno != EPERM ) ) { + PAPIERROR( "Process/thread %d cannot be ptraced: %s", tid, + strerror( errno ) ); + free( newctx ); + free( load_args ); + return ( PAPI_EINVAL ); + } + + SUBDBG( "PFM_CREATE_CONTEXT(%p,%p,%p,%d)\n", newctx, NULL, NULL, 0 ); + if ( ( ret = pfm_create_context( newctx, NULL, NULL, 0 ) ) == -1 ) { + PAPIERROR( "attach:pfm_create_context(): %s", strerror( errno ) ); + free( newctx ); + free( load_args ); + return ( PAPI_ESYS ); + } + SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ret ); + tune_up_fd( ret ); + + ( ( pfm_control_state_t * ) ctl )->ctx_fd = ret; + ( ( pfm_control_state_t * ) ctl )->ctx = newctx; + load_args->load_pid = tid; + ( ( pfm_control_state_t * ) ctl )->load = load_args; + + return ( PAPI_OK ); +} + +static int +detach( hwd_context_t * ctx, hwd_control_state_t * ctl ) +{ + int i; + + i = close( ( ( pfm_control_state_t * ) ctl )->ctx_fd ); + SUBDBG( "CLOSE fd %d returned %d\n", + ( ( pfm_control_state_t * ) ctl )->ctx_fd, i ); + (void) i; + + /* Restore to main threads context */ + free( ( ( pfm_control_state_t * ) ctl )->ctx ); + ( ( pfm_control_state_t * ) ctl )->ctx = &( ( pfm_context_t * ) ctx )->ctx; + ( ( pfm_control_state_t * ) ctl )->ctx_fd = + ( ( pfm_context_t * ) ctx )->ctx_fd; + free( ( ( pfm_control_state_t * ) ctl )->load ); + ( ( pfm_control_state_t * ) ctl )->load = + &( ( pfm_context_t * ) ctx )->load; + + return ( PAPI_OK ); +} + +static inline int +set_domain( hwd_control_state_t * ctl0, int domain ) +{ + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + int mode = 0, did = 0; + pfmlib_input_param_t *inp = &ctl->in; + + if ( domain & PAPI_DOM_USER ) { + did = 1; + mode |= PFM_PLM3; + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + mode |= PFM_PLM0; + } + + if ( domain & PAPI_DOM_SUPERVISOR ) { + did = 1; + mode |= PFM_PLM1; + } + + if ( domain & PAPI_DOM_OTHER ) { + did = 1; + mode |= PFM_PLM2; + } + + if ( !did ) + return ( PAPI_EINVAL ); + + inp->pfp_dfl_plm = mode; + + return ( compute_kernel_args( ctl ) ); +} + +static inline int +set_granularity( hwd_control_state_t * this_state, int domain ) +{ + ( void ) this_state; /*unused */ + switch ( domain ) { + case PAPI_GRN_PROCG: + case PAPI_GRN_SYS: + case PAPI_GRN_SYS_CPU: + case PAPI_GRN_PROC: + return PAPI_ECMP; + case PAPI_GRN_THR: + break; + default: + return PAPI_EINVAL; + } + return PAPI_OK; +} + +/* This function should tell your kernel extension that your children + inherit performance register information and propagate the values up + upon child exit and parent wait. */ + +static inline int +set_inherit( int arg ) +{ + ( void ) arg; /*unused */ + return PAPI_ECMP; +} + +static int +get_string_from_file( char *file, char *str, int len ) +{ + FILE *f = fopen( file, "r" ); + char buf[PAPI_HUGE_STR_LEN]; + if ( f == NULL ) { + PAPIERROR( "fopen(%s): %s", file, strerror( errno ) ); + return ( PAPI_ESYS ); + } + if ( fscanf( f, "%s\n", buf ) != 1 ) { + PAPIERROR( "fscanf(%s, %%s\\n): Unable to scan 1 token", file ); + fclose( f ); + return PAPI_ESYS; + } + strncpy( str, buf, ( len > PAPI_HUGE_STR_LEN ? PAPI_HUGE_STR_LEN : len ) ); + fclose( f ); + return ( PAPI_OK ); +} + +int +_papi_pfm_init_component( int cidx ) +{ + int retval; + char buf[PAPI_HUGE_STR_LEN]; + + /* The following checks the PFMLIB version + against the perfmon2 kernel version... */ + strncpy( _perfmon2_vector.cmp_info.support_version, buf, + sizeof ( _perfmon2_vector.cmp_info.support_version ) ); + + retval = get_string_from_file( "/sys/kernel/perfmon/version", + _perfmon2_vector.cmp_info.kernel_version, + sizeof ( _perfmon2_vector.cmp_info.kernel_version ) ); + if ( retval != PAPI_OK ) { + strncpy(_perfmon2_vector.cmp_info.disabled_reason, + "/sys/kernel/perfmon/version not found",PAPI_MAX_STR_LEN); + return retval; + } + +#ifdef PFM_VERSION + sprintf( buf, "%d.%d", PFM_VERSION_MAJOR( PFM_VERSION ), + PFM_VERSION_MINOR( PFM_VERSION ) ); + SUBDBG( "Perfmon2 library versions...kernel: %s library: %s\n", + _perfmon2_vector.cmp_info.kernel_version, buf ); + if ( strcmp( _perfmon2_vector.cmp_info.kernel_version, buf ) != 0 ) { + /* do a little exception processing; 81 is compatible with 80 */ + if ( !( ( PFM_VERSION_MINOR( PFM_VERSION ) == 81 ) && + ( strncmp( _perfmon2_vector.cmp_info.kernel_version, "2.8", 3 ) == + 0 ) ) ) { + PAPIERROR( "Version mismatch of libpfm: compiled %s " + "vs. installed %s\n", + buf, _perfmon2_vector.cmp_info.kernel_version ); + return PAPI_ESYS; + } + } +#endif + + _perfmon2_vector.cmp_info.hardware_intr_sig = SIGRTMIN + 2, + + + /* Run the libpfm-specific setup */ + retval=_papi_libpfm_init(&_perfmon2_vector, cidx); + if (retval) return retval; + + /* Load the module, find out if any PMC's/PMD's are off limits */ + + /* Perfmon2 timeouts are based on the clock tick, we need to check + them otherwise it will complain at us when we multiplex */ + + unsigned long min_timeout_ns; + + struct timespec ts; + + if ( syscall( __NR_clock_getres, CLOCK_REALTIME, &ts ) == -1 ) { + PAPIERROR( "Could not detect proper HZ rate, multiplexing may fail\n" ); + min_timeout_ns = 10000000; + } else { + min_timeout_ns = ts.tv_nsec; + } + + /* This will fail if we've done timeout detection wrong */ + retval=detect_timeout_and_unavail_pmu_regs( &_perfmon2_pfm_unavailable_pmcs, + &_perfmon2_pfm_unavailable_pmds, + &min_timeout_ns ); + if ( retval != PAPI_OK ) { + return ( retval ); + } + + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_IBM ) { + /* powerpc */ + _perfmon2_vector.cmp_info.available_domains |= PAPI_DOM_KERNEL | + PAPI_DOM_SUPERVISOR; + if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0) { + _perfmon2_vector.cmp_info.default_domain = PAPI_DOM_USER | + PAPI_DOM_KERNEL | + PAPI_DOM_SUPERVISOR; + } + } else { + _perfmon2_vector.cmp_info.available_domains |= PAPI_DOM_KERNEL; + } + + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_SUN ) { + switch ( _perfmon2_pfm_pmu_type ) { +#ifdef PFMLIB_SPARC_ULTRA12_PMU + case PFMLIB_SPARC_ULTRA12_PMU: + case PFMLIB_SPARC_ULTRA3_PMU: + case PFMLIB_SPARC_ULTRA3I_PMU: + case PFMLIB_SPARC_ULTRA3PLUS_PMU: + case PFMLIB_SPARC_ULTRA4PLUS_PMU: + break; +#endif + default: + _perfmon2_vector.cmp_info.available_domains |= + PAPI_DOM_SUPERVISOR; + break; + } + } + + if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_CRAY ) { + _perfmon2_vector.cmp_info.available_domains |= PAPI_DOM_OTHER; + } + + if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) || + ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_AMD ) ) { + _perfmon2_vector.cmp_info.fast_counter_read = 1; + _perfmon2_vector.cmp_info.fast_real_timer = 1; + _perfmon2_vector.cmp_info.cntr_umasks = 1; + } + + return PAPI_OK; +} + +int +_papi_pfm_shutdown_component( ) +{ + return PAPI_OK; +} + +static int +_papi_pfm_init_thread( hwd_context_t * thr_ctx ) +{ + pfarg_load_t load_args; + pfarg_ctx_t newctx; + int ret, ctx_fd; + +#if defined(USE_PROC_PTTIMER) + ret = init_proc_thread_timer( thr_ctx ); + if ( ret != PAPI_OK ) + return ( ret ); +#endif + + memset( &newctx, 0, sizeof ( newctx ) ); + memset( &load_args, 0, sizeof ( load_args ) ); + + if ( ( ret = pfm_create_context( &newctx, NULL, NULL, 0 ) ) == -1 ) { + PAPIERROR( "pfm_create_context(): %s", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ret ); + tune_up_fd( ret ); + ctx_fd = ret; + + memcpy( &( ( pfm_context_t * ) thr_ctx )->ctx, &newctx, sizeof ( newctx ) ); + ( ( pfm_context_t * ) thr_ctx )->ctx_fd = ctx_fd; + load_args.load_pid = mygettid( ); + memcpy( &( ( pfm_context_t * ) thr_ctx )->load, &load_args, + sizeof ( load_args ) ); + + return ( PAPI_OK ); +} + +/* reset the hardware counters */ +int +_papi_pfm_reset( hwd_context_t * ctx, hwd_control_state_t * ctl ) +{ + unsigned int i; + int ret; + + /* Read could have clobbered the values */ + for ( i = 0; i < ( ( pfm_control_state_t * ) ctl )->in.pfp_event_count; + i++ ) { + if ( ( ( pfm_control_state_t * ) ctl )->pd[i]. + reg_flags & PFM_REGFL_OVFL_NOTIFY ) + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_long_reset; + else + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = 0ULL; + } + + ret = + _papi_pfm_write_pmds( ( pfm_context_t * ) ctx, + ( pfm_control_state_t * ) ctl ); + if ( ret != PAPI_OK ) + return PAPI_ESYS; + + return ( PAPI_OK ); +} + +/* write(set) the hardware counters */ +int +_papi_pfm_write( hwd_context_t * ctx, hwd_control_state_t * ctl, + long long *from ) +{ + unsigned int i; + int ret; + + /* Read could have clobbered the values */ + for ( i = 0; i < ( ( pfm_control_state_t * ) ctl )->in.pfp_event_count; + i++ ) { + if ( ( ( pfm_control_state_t * ) ctl )->pd[i]. + reg_flags & PFM_REGFL_OVFL_NOTIFY ) + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = + from[i] + + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_long_reset; + else + ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = from[i]; + } + + ret = + _papi_pfm_write_pmds( ( pfm_context_t * ) ctx, + ( pfm_control_state_t * ) ctl ); + if ( ret != PAPI_OK ) + return PAPI_ESYS; + + + return ( PAPI_OK ); +} + +int +_papi_pfm_read( hwd_context_t * ctx0, hwd_control_state_t * ctl0, + long long **events, int flags ) +{ + ( void ) flags; /*unused */ + unsigned int i; + int ret; + long long tot_runs = 0LL; + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + pfm_context_t *ctx = ( pfm_context_t * ) ctx0; + + ret = _papi_pfm_read_pmds( ctx, ctl ); + if ( ret != PAPI_OK ) + return PAPI_ESYS; + + /* Copy the values over */ + + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { + if ( ctl->pd[i].reg_flags & PFM_REGFL_OVFL_NOTIFY ) + ctl->counts[i] = ctl->pd[i].reg_value - ctl->pd[i].reg_long_reset; + else + ctl->counts[i] = ctl->pd[i].reg_value; + SUBDBG( "PMD[%d] = %lld (LLD),%llu (LLU)\n", i, + ( unsigned long long ) ctl->counts[i], + ( unsigned long long ) ctl->pd[i].reg_value ); + } + *events = ctl->counts; + + /* If we're not multiplexing, bail now */ + + if ( ctl->num_sets == 1 ) + return ( PAPI_OK ); + + /* If we're multiplexing, get the scaling information */ + + SUBDBG( "PFM_GETINFO_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, ctl->setinfo, + ctl->num_sets ); + if ( ( ret = + pfm_getinfo_evtsets( ctl->ctx_fd, ctl->setinfo, ctl->num_sets ) ) ) { + DEBUGCALL( DEBUG_SUBSTRATE, + dump_setinfo( ctl->setinfo, ctl->num_sets ) ); + PAPIERROR( "pfm_getinfo_evtsets(%d,%p,%d): %s", ctl->ctx_fd, + ctl->setinfo, ctl->num_sets, strerror( ret ) ); + *events = NULL; + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_setinfo( ctl->setinfo, ctl->num_sets ) ); + + /* Add up the number of total runs */ + + for ( i = 0; i < ( unsigned int ) ctl->num_sets; i++ ) + tot_runs += ctl->setinfo[i].set_runs; + + /* Now scale the values */ + + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { + SUBDBG + ( "Counter %d is in set %d ran %llu of %llu times, old count %lld.\n", + i, ctl->pd[i].reg_set, + ( unsigned long long ) ctl->setinfo[ctl->pd[i].reg_set].set_runs, + ( unsigned long long ) tot_runs, ctl->counts[i] ); + if ( ctl->setinfo[ctl->pd[i].reg_set].set_runs ) + ctl->counts[i] = + ( ctl->counts[i] * tot_runs ) / + ctl->setinfo[ctl->pd[i].reg_set].set_runs; + else { + ctl->counts[i] = 0; + SUBDBG( "Set %lld didn't run!!!!\n", + ( unsigned long long ) ctl->pd[i].reg_set ); + } + SUBDBG( "Counter %d, new count %lld.\n", i, ctl->counts[i] ); + } + + return PAPI_OK; +} + +#if defined(__crayxt) +int _papi_hwd_start_create_context = 0; /* CrayPat checkpoint support */ +#endif /* XT */ + +int +_papi_pfm_start( hwd_context_t * ctx0, hwd_control_state_t * ctl0 ) +{ + unsigned int i; + int ret; + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + pfm_context_t *ctx = ( pfm_context_t * ) ctx0; + +#if defined(__crayxt) + if ( _papi_hwd_start_create_context ) { + pfarg_ctx_t tmp; + + memset( &tmp, 0, sizeof ( tmp ) ); + if ( ( ret = pfm_create_context( &tmp, NULL, NULL, 0 ) ) == -1 ) { + PAPIERROR( "_papi_hwd_init:pfm_create_context(): %s", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + tune_up_fd( ret ); + ctl->ctx_fd = ctx->ctx_fd = ret; + } +#endif /* XT */ + + if ( ctl->num_sets > 1 ) { + SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, ctl->set, + ctl->num_sets ); + if ( ( ret = + pfm_create_evtsets( ctl->ctx_fd, ctl->set, + ctl->num_sets ) ) != PFMLIB_SUCCESS ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( ctl->set, ctl->num_sets ) ); + PAPIERROR( "pfm_create_evtsets(%d,%p,%d): errno=%d %s", + ctl->ctx_fd, ctl->set, ctl->num_sets, errno, + strerror( ret ) ); + perror( "pfm_create_evtsets" ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( ctl->set, ctl->num_sets ) ); + } + + /* + * Now program the registers + * + * We don't use the same variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events (pmd) we specified, i.e., contains more than counting + * monitors. + */ + + ret = _papi_pfm_write_pmcs( ctx, ctl ); + if ( ret != PAPI_OK ) + return PAPI_ESYS; + + /* Set counters to zero as per PAPI_start man page, unless it is set to overflow */ + + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) + if ( !( ctl->pd[i].reg_flags & PFM_REGFL_OVFL_NOTIFY ) ) + ctl->pd[i].reg_value = 0ULL; + + /* + * To be read, each PMD must be either written or declared + * as being part of a sample (reg_smpl_pmds) + */ + + ret = _papi_pfm_write_pmds( ctx, ctl ); + if ( ret != PAPI_OK ) + return PAPI_ESYS; + + SUBDBG( "PFM_LOAD_CONTEXT(%d,%p(%u))\n", ctl->ctx_fd, ctl->load, + ctl->load->load_pid ); + if ( ( ret = pfm_load_context( ctl->ctx_fd, ctl->load ) ) ) { + PAPIERROR( "pfm_load_context(%d,%p(%u)): %s", ctl->ctx_fd, ctl->load, + ctl->load->load_pid, strerror( ret ) ); + return PAPI_ESYS; + } + + SUBDBG( "PFM_START(%d,%p)\n", ctl->ctx_fd, NULL ); + if ( ( ret = pfm_start( ctl->ctx_fd, NULL ) ) ) { + PAPIERROR( "pfm_start(%d): %s", ctl->ctx_fd, strerror( ret ) ); + return ( PAPI_ESYS ); + } + return PAPI_OK; +} + +int +_papi_pfm_stop( hwd_context_t * ctx0, hwd_control_state_t * ctl0 ) +{ + ( void ) ctx0; /*unused */ + int ret; + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; +// pfm_context_t *ctx = (pfm_context_t *)ctx0; + + SUBDBG( "PFM_STOP(%d)\n", ctl->ctx_fd ); + if ( ( ret = pfm_stop( ctl->ctx_fd ) ) ) { + /* If this thread is attached to another thread, and that thread + has exited, we can safely discard the error here. */ + + if ( ( ret == PFMLIB_ERR_NOTSUPP ) && + ( ctl->load->load_pid != ( unsigned int ) mygettid( ) ) ) + return ( PAPI_OK ); + + PAPIERROR( "pfm_stop(%d): %s", ctl->ctx_fd, strerror( ret ) ); + return ( PAPI_ESYS ); + } + + SUBDBG( "PFM_UNLOAD_CONTEXT(%d) (tid %u)\n", ctl->ctx_fd, + ctl->load->load_pid ); + if ( ( ret = pfm_unload_context( ctl->ctx_fd ) ) ) { + PAPIERROR( "pfm_unload_context(%d): %s", ctl->ctx_fd, strerror( ret ) ); + return PAPI_ESYS; + } + + if ( ctl->num_sets > 1 ) { + static pfarg_setdesc_t set = { 0, 0, 0, 0, {0, 0, 0, 0, 0, 0} }; + /* Delete the high sets */ + SUBDBG( "PFM_DELETE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, &ctl->set[1], + ctl->num_sets - 1 ); + if ( ( ret = + pfm_delete_evtsets( ctl->ctx_fd, &ctl->set[1], + ctl->num_sets - 1 ) ) != PFMLIB_SUCCESS ) { + DEBUGCALL( DEBUG_SUBSTRATE, + dump_sets( &ctl->set[1], ctl->num_sets - 1 ) ); + PAPIERROR( "pfm_delete_evtsets(%d,%p,%d): %s", ctl->ctx_fd, + &ctl->set[1], ctl->num_sets - 1, strerror( ret ) ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, + dump_sets( &ctl->set[1], ctl->num_sets - 1 ) ); + /* Reprogram the 0 set */ + SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, &set, 1 ); + if ( ( ret = + pfm_create_evtsets( ctl->ctx_fd, &set, + 1 ) ) != PFMLIB_SUCCESS ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set, 1 ) ); + PAPIERROR( "pfm_create_evtsets(%d,%p,%d): %s", ctl->ctx_fd, &set, + ctl->num_sets, strerror( ret ) ); + return ( PAPI_ESYS ); + } + DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set, 1 ) ); + } + + return PAPI_OK; +} + +static inline int +round_requested_ns( int ns ) +{ + if ( ns <= _papi_os_info.itimer_res_ns ) { + return _papi_os_info.itimer_res_ns; + } else { + int leftover_ns = ns % _papi_os_info.itimer_res_ns; + return ( ns - leftover_ns + _papi_os_info.itimer_res_ns ); + } +} + +int +_papi_pfm_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + switch ( code ) { + case PAPI_MULTIPLEX: + { + option->multiplex.ns = round_requested_ns( option->multiplex.ns ); + ( ( pfm_control_state_t * ) ( option->multiplex.ESI->ctl_state ) )-> + multiplexed = option->multiplex.ns; + return ( PAPI_OK ); + } + + case PAPI_ATTACH: + return ( attach + ( ( pfm_control_state_t * ) ( option->attach.ESI->ctl_state ), + option->attach.tid ) ); + case PAPI_DETACH: + return ( detach + ( ctx, + ( pfm_control_state_t * ) ( option->attach.ESI-> + ctl_state ) ) ); + + case PAPI_DOMAIN: + return ( set_domain + ( ( pfm_control_state_t * ) ( option->domain.ESI->ctl_state ), + option->domain.domain ) ); + case PAPI_GRANUL: + return ( set_granularity + ( ( pfm_control_state_t * ) ( option->granularity.ESI-> + ctl_state ), + option->granularity.granularity ) ); +#if 0 + case PAPI_DATA_ADDRESS: + ret = + set_default_domain( ( pfm_control_state_t * ) ( option-> + address_range.ESI-> + ctl_state ), + option->address_range.domain ); + if ( ret != PAPI_OK ) + return ( ret ); + set_drange( ctx, + ( pfm_control_state_t * ) ( option->address_range.ESI-> + ctl_state ), option ); + return ( PAPI_OK ); + case PAPI_INSTR_ADDRESS: + ret = + set_default_domain( ( pfm_control_state_t * ) ( option-> + address_range.ESI-> + ctl_state ), + option->address_range.domain ); + if ( ret != PAPI_OK ) + return ( ret ); + set_irange( ctx, + ( pfm_control_state_t * ) ( option->address_range.ESI-> + ctl_state ), option ); + return ( PAPI_OK ); +#endif + + + case PAPI_DEF_ITIMER: + { + /* flags are currently ignored, eventually the flags will be able + to specify whether or not we use POSIX itimers (clock_gettimer) */ + if ( ( option->itimer.itimer_num == ITIMER_REAL ) && + ( option->itimer.itimer_sig != SIGALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && + ( option->itimer.itimer_sig != SIGVTALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_PROF ) && + ( option->itimer.itimer_sig != SIGPROF ) ) + return PAPI_EINVAL; + if ( option->itimer.ns > 0 ) + option->itimer.ns = round_requested_ns( option->itimer.ns ); + /* At this point, we assume the user knows what he or + she is doing, they maybe doing something arch specific */ + return PAPI_OK; + } + + case PAPI_DEF_MPX_NS: + { + option->multiplex.ns = round_requested_ns( option->multiplex.ns ); + return ( PAPI_OK ); + } + case PAPI_DEF_ITIMER_NS: + { + option->itimer.ns = round_requested_ns( option->itimer.ns ); + return ( PAPI_OK ); + } + default: + return ( PAPI_ENOSUPP ); + } +} + +int +_papi_pfm_shutdown( hwd_context_t * ctx0 ) +{ + pfm_context_t *ctx = ( pfm_context_t * ) ctx0; + int ret; +#if defined(USE_PROC_PTTIMER) + close( ctx->stat_fd ); +#endif + + + ret = close( ctx->ctx_fd ); + SUBDBG( "CLOSE fd %d returned %d\n", ctx->ctx_fd, ret ); + (void) ret; + + return ( PAPI_OK ); +} + +/* This will need to be modified for the Pentium IV */ + +static inline int +find_profile_index( EventSetInfo_t * ESI, int pmd, int *flags, + unsigned int *native_index, int *profile_index ) +{ + int pos, esi_index, count; + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ESI->ctl_state; + pfarg_pmd_t *pd; + unsigned int i; + + pd = ctl->pd; + + /* Find virtual PMD index, the one we actually read from the physical PMD number that + overflowed. This index is the one related to the profile buffer. */ + + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { + if ( pd[i].reg_num == pmd ) { + SUBDBG( "Physical PMD %d is Virtual PMD %d\n", pmd, i ); + pmd = i; + break; + } + } + + + SUBDBG( "(%p,%d,%p)\n", ESI, pmd, index ); + + for ( count = 0; count < ESI->profile.event_counter; count++ ) { + /* Find offset of PMD that gets read from the kernel */ + esi_index = ESI->profile.EventIndex[count]; + pos = ESI->EventInfoArray[esi_index].pos[0]; + SUBDBG( "Examining event at ESI index %d, PMD position %d\n", esi_index, + pos ); + // PMU_FIRST_COUNTER + if ( pos == pmd ) { + *profile_index = count; + *native_index = + ESI->NativeInfoArray[pos].ni_event & PAPI_NATIVE_AND_MASK; + *flags = ESI->profile.flags; + SUBDBG( "Native event %d is at profile index %d, flags %d\n", + *native_index, *profile_index, *flags ); + return ( PAPI_OK ); + } + } + + PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", count, + ESI->profile.event_counter ); + return ( PAPI_EBUG ); +} + +#if defined(__ia64__) +static inline int +is_montecito_and_dear( unsigned int native_index ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { + if ( pfm_mont_is_dear( native_index ) ) + return ( 1 ); + } + return ( 0 ); +} +static inline int +is_montecito_and_iear( unsigned int native_index ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { + if ( pfm_mont_is_iear( native_index ) ) + return ( 1 ); + } + return ( 0 ); +} +static inline int +is_itanium2_and_dear( unsigned int native_index ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { + if ( pfm_ita2_is_dear( native_index ) ) + return ( 1 ); + } + return ( 0 ); +} +static inline int +is_itanium2_and_iear( unsigned int native_index ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { + if ( pfm_ita2_is_iear( native_index ) ) + return ( 1 ); + } + return ( 0 ); +} +#endif + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 +static inline void +pfm_bv_set( uint64_t * bv, uint16_t rnum ) +{ + bv[rnum >> LBPL] |= 1UL << ( rnum & ( BPL - 1 ) ); +} + +static inline int +setup_ear_event( unsigned int native_index, pfarg_pmd_t * pd, int flags ) +{ + ( void ) flags; /*unused */ +#if defined(__ia64__) + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { + if ( pfm_mont_is_dear( native_index ) ) { /* 2,3,17 */ + pfm_bv_set( pd[0].reg_smpl_pmds, 32 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 33 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 36 ); + pfm_bv_set( pd[0].reg_reset_pmds, 36 ); + return ( 1 ); + } else if ( pfm_mont_is_iear( native_index ) ) { /* O,1 MK */ + pfm_bv_set( pd[0].reg_smpl_pmds, 34 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 35 ); + pfm_bv_set( pd[0].reg_reset_pmds, 34 ); + return ( 1 ); + } + return ( 0 ); + } else if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { + if ( pfm_mont_is_dear( native_index ) ) { /* 2,3,17 */ + pfm_bv_set( pd[0].reg_smpl_pmds, 2 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 3 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 17 ); + pfm_bv_set( pd[0].reg_reset_pmds, 17 ); + return ( 1 ); + } else if ( pfm_mont_is_iear( native_index ) ) { /* O,1 MK */ + pfm_bv_set( pd[0].reg_smpl_pmds, 0 ); + pfm_bv_set( pd[0].reg_smpl_pmds, 1 ); + pfm_bv_set( pd[0].reg_reset_pmds, 0 ); + return ( 1 ); + } + return ( 0 ); + } +#else + ( void ) native_index; /*unused */ + ( void ) pd; /*unused */ +#endif + return ( 0 ); +} + +static inline int +process_smpl_entry( unsigned int native_pfm_index, int flags, + pfm_dfl_smpl_entry_t ** ent, caddr_t * pc ) +{ +#ifndef __ia64__ + ( void ) native_pfm_index; /*unused */ + ( void ) flags; /*unused */ +#endif + SUBDBG( "process_smpl_entry(%d,%d,%p,%p)\n", native_pfm_index, flags, ent, + pc ); + +#ifdef __ia64__ + /* Fixup EAR stuff here */ + if ( is_montecito_and_dear( native_pfm_index ) ) { + pfm_mont_pmd_reg_t data_addr; + pfm_mont_pmd_reg_t latency; + pfm_mont_pmd_reg_t load_addr; + unsigned long newent; + + if ( ( flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) == 0 ) + goto safety; + + /* Skip the header */ + ++( *ent ); + + // PMD32 has data address on Montecito + // PMD33 has latency on Montecito + // PMD36 has instruction address on Montecito + data_addr = *( pfm_mont_pmd_reg_t * ) * ent; + latency = + *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( data_addr ) ); + load_addr = + *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( data_addr ) + + sizeof ( latency ) ); + + SUBDBG( "PMD[32]: %#016llx\n", + ( unsigned long long ) data_addr.pmd_val ); + SUBDBG( "PMD[33]: %#016llx\n", + ( unsigned long long ) latency.pmd_val ); + SUBDBG( "PMD[36]: %#016llx\n", + ( unsigned long long ) load_addr.pmd_val ); + + if ( ( !load_addr.pmd36_mont_reg.dear_vl ) || + ( !load_addr.pmd33_mont_reg.dear_stat ) ) { + SUBDBG + ( "Invalid DEAR sample found, dear_vl = %d, dear_stat = %#x\n", + load_addr.pmd36_mont_reg.dear_vl, + load_addr.pmd33_mont_reg.dear_stat ); + bail1: + newent = ( unsigned long ) *ent; + newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } + + if ( flags & PAPI_PROFIL_DATA_EAR ) + *pc = ( caddr_t ) data_addr.pmd_val; + else if ( flags & PAPI_PROFIL_INST_EAR ) { + unsigned long tmp = + ( ( load_addr.pmd36_mont_reg.dear_iaddr + + ( unsigned long ) load_addr.pmd36_mont_reg. + dear_bn ) << 4 ) | ( unsigned long ) load_addr. + pmd36_mont_reg.dear_slot; + *pc = ( caddr_t ) tmp; + } else { + PAPIERROR( "BUG!" ); + goto bail1; + } + + newent = ( unsigned long ) *ent; + newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } else if ( is_montecito_and_iear( native_pfm_index ) ) { + pfm_mont_pmd_reg_t latency; + pfm_mont_pmd_reg_t icache_line_addr; + unsigned long newent; + + if ( ( flags & PAPI_PROFIL_INST_EAR ) == 0 ) + goto safety; + + /* Skip the header */ + ++( *ent ); + + // PMD34 has data address on Montecito + // PMD35 has latency on Montecito + icache_line_addr = *( pfm_mont_pmd_reg_t * ) * ent; + latency = + *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( icache_line_addr ) ); + + SUBDBG( "PMD[34]: %#016llx\n", + ( unsigned long long ) icache_line_addr.pmd_val ); + SUBDBG( "PMD[35]: %#016llx\n", + ( unsigned long long ) latency.pmd_val ); + + if ( ( icache_line_addr.pmd34_mont_reg.iear_stat & 0x1 ) == 0 ) { + SUBDBG( "Invalid IEAR sample found, iear_stat = %#x\n", + icache_line_addr.pmd34_mont_reg.iear_stat ); + bail2: + newent = ( unsigned long ) *ent; + newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return ( 0 ); + } + + if ( flags & PAPI_PROFIL_INST_EAR ) { + unsigned long tmp = icache_line_addr.pmd34_mont_reg.iear_iaddr << 5; + *pc = ( caddr_t ) tmp; + } else { + PAPIERROR( "BUG!" ); + goto bail2; + } + + newent = ( unsigned long ) *ent; + newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } else if ( is_itanium2_and_dear( native_pfm_index ) ) { + pfm_ita2_pmd_reg_t data_addr; + pfm_ita2_pmd_reg_t latency; + pfm_ita2_pmd_reg_t load_addr; + unsigned long newent; + + if ( ( flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) == 0 ) + goto safety; + + /* Skip the header */ + ++( *ent ); + + // PMD2 has data address on Itanium 2 + // PMD3 has latency on Itanium 2 + // PMD17 has instruction address on Itanium 2 + data_addr = *( pfm_ita2_pmd_reg_t * ) * ent; + latency = + *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( data_addr ) ); + load_addr = + *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( data_addr ) + + sizeof ( latency ) ); + + SUBDBG( "PMD[2]: %#016llx\n", + ( unsigned long long ) data_addr.pmd_val ); + SUBDBG( "PMD[3]: %#016llx\n", ( unsigned long long ) latency.pmd_val ); + SUBDBG( "PMD[17]: %#016llx\n", + ( unsigned long long ) load_addr.pmd_val ); + + if ( ( !load_addr.pmd17_ita2_reg.dear_vl ) || + ( !load_addr.pmd3_ita2_reg.dear_stat ) ) { + SUBDBG + ( "Invalid DEAR sample found, dear_vl = %d, dear_stat = %#x\n", + load_addr.pmd17_ita2_reg.dear_vl, + load_addr.pmd3_ita2_reg.dear_stat ); + bail3: + newent = ( unsigned long ) *ent; + newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } + + if ( flags & PAPI_PROFIL_DATA_EAR ) + *pc = ( caddr_t ) data_addr.pmd_val; + else if ( flags & PAPI_PROFIL_INST_EAR ) { + unsigned long tmp = + ( ( load_addr.pmd17_ita2_reg.dear_iaddr + + ( unsigned long ) load_addr.pmd17_ita2_reg. + dear_bn ) << 4 ) | ( unsigned long ) load_addr. + pmd17_ita2_reg.dear_slot; + *pc = ( caddr_t ) tmp; + } else { + PAPIERROR( "BUG!" ); + goto bail3; + } + + newent = ( unsigned long ) *ent; + newent += 3 * sizeof ( pfm_ita2_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } else if ( is_itanium2_and_iear( native_pfm_index ) ) { + pfm_ita2_pmd_reg_t latency; + pfm_ita2_pmd_reg_t icache_line_addr; + unsigned long newent; + + if ( ( flags & PAPI_PROFIL_INST_EAR ) == 0 ) + goto safety; + + /* Skip the header */ + ++( *ent ); + + // PMD0 has address on Itanium 2 + // PMD1 has latency on Itanium 2 + icache_line_addr = *( pfm_ita2_pmd_reg_t * ) * ent; + latency = + *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + + sizeof ( icache_line_addr ) ); + + SUBDBG( "PMD[0]: %#016llx\n", + ( unsigned long long ) icache_line_addr.pmd_val ); + SUBDBG( "PMD[1]: %#016llx\n", ( unsigned long long ) latency.pmd_val ); + + if ( ( icache_line_addr.pmd0_ita2_reg.iear_stat & 0x1 ) == 0 ) { + SUBDBG( "Invalid IEAR sample found, iear_stat = %#x\n", + icache_line_addr.pmd0_ita2_reg.iear_stat ); + bail4: + newent = ( unsigned long ) *ent; + newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return ( 0 ); + } + + if ( flags & PAPI_PROFIL_INST_EAR ) { + unsigned long tmp = icache_line_addr.pmd0_ita2_reg.iear_iaddr << 5; + *pc = ( caddr_t ) tmp; + } else { + PAPIERROR( "BUG!" ); + goto bail4; + } + + newent = ( unsigned long ) *ent; + newent += 2 * sizeof ( pfm_ita2_pmd_reg_t ); + *ent = ( pfm_dfl_smpl_entry_t * ) newent; + return 0; + } +#if 0 + ( is_btb( native_pfm_index ) ) { + // PMD48-63,39 on Montecito + // PMD8-15,16 on Itanium 2 + } +#endif + else + safety: +#endif + { + *pc = ( caddr_t ) ( ( size_t ) ( ( *ent )->ip ) ); + ++( *ent ); + return ( 0 ); + } +} + +static inline int +process_smpl_buf( int num_smpl_pmds, int entry_size, ThreadInfo_t ** thr ) +{ + ( void ) num_smpl_pmds; /*unused */ + ( void ) entry_size; /*unused */ + int cidx = _perfmon2_vector.cmp_info.CmpIdx; + pfm_dfl_smpl_entry_t *ent; + uint64_t entry, count; + pfm_dfl_smpl_hdr_t *hdr = + ( ( pfm_context_t * ) ( *thr )->context[cidx] )->smpl_buf; + int ret, profile_index, flags; + unsigned int native_pfm_index; + caddr_t pc = NULL; + long long weight; + + DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl_hdr( hdr ) ); + count = hdr->hdr_count; + ent = ( pfm_dfl_smpl_entry_t * ) ( hdr + 1 ); + entry = 0; + + SUBDBG( "This buffer has %llu samples in it.\n", + ( unsigned long long ) count ); + while ( count-- ) { + SUBDBG( "Processing sample entry %llu\n", + ( unsigned long long ) entry ); + DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl( ent ) ); + + /* Find the index of the profile buffers if we are profiling on many events */ + + ret = + find_profile_index( ( *thr )->running_eventset[cidx], ent->ovfl_pmd, + &flags, &native_pfm_index, &profile_index ); + if ( ret != PAPI_OK ) + return ( ret ); + + weight = process_smpl_entry( native_pfm_index, flags, &ent, &pc ); + + _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx], pc, + weight, profile_index ); + + entry++; + } + return ( PAPI_OK ); +} + + +/* This function used when hardware overflows ARE working + or when software overflows are forced */ + +static void +_papi_pfm_dispatch_timer( int n, hwd_siginfo_t * info, void *uc ) +{ + _papi_hwi_context_t ctx; +#ifdef HAVE_PFM_MSG_TYPE + pfm_msg_t msg; +#else + pfarg_msg_t msg; +#endif + int ret, wanted_fd, fd = info->si_fd; + caddr_t address; + ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); + int cidx = _perfmon2_vector.cmp_info.CmpIdx; + + if ( thread == NULL ) { + PAPIERROR( "thread == NULL in _papi_pfm_dispatch_timer!" ); + if ( n == _perfmon2_vector.cmp_info.hardware_intr_sig ) { + ret = read( fd, &msg, sizeof ( msg ) ); + pfm_restart( fd ); + } + return; + } + + if ( thread->running_eventset[cidx] == NULL ) { + PAPIERROR + ( "thread->running_eventset == NULL in _papi_pfm_dispatch_timer!" ); + if ( n == _perfmon2_vector.cmp_info.hardware_intr_sig ) { + ret = read( fd, &msg, sizeof ( msg ) ); + pfm_restart( fd ); + } + return; + } + + if ( thread->running_eventset[cidx]->overflow.flags == 0 ) { + PAPIERROR + ( "thread->running_eventset->overflow.flags == 0 in _papi_pfm_dispatch_timer!" ); + if ( n == _perfmon2_vector.cmp_info.hardware_intr_sig ) { + ret = read( fd, &msg, sizeof ( msg ) ); + pfm_restart( fd ); + } + return; + } + + ctx.si = info; + ctx.ucontext = ( hwd_ucontext_t * ) uc; + + if ( thread->running_eventset[cidx]->overflow. + flags & PAPI_OVERFLOW_FORCE_SW ) { + address = GET_OVERFLOW_ADDRESS( ctx ); + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, NULL, + 0, 0, &thread, cidx ); + } else { + if ( thread->running_eventset[cidx]->overflow.flags == + PAPI_OVERFLOW_HARDWARE ) { + wanted_fd = + ( ( pfm_control_state_t * ) ( thread->running_eventset[cidx]-> + ctl_state ) )->ctx_fd; + } else { + wanted_fd = ( ( pfm_context_t * ) thread->context[cidx] )->ctx_fd; + } + if ( wanted_fd != fd ) { + SUBDBG( "expected fd %d, got %d in _papi_hwi_dispatch_timer!", + wanted_fd, fd ); + if ( n == _perfmon2_vector.cmp_info.hardware_intr_sig ) { + ret = read( fd, &msg, sizeof ( msg ) ); + pfm_restart( fd ); + } + return; + } + retry: + ret = read( fd, &msg, sizeof ( msg ) ); + if ( ret == -1 ) { + if ( errno == EINTR ) { + SUBDBG( "read(%d) interrupted, retrying\n", fd ); + goto retry; + } else { + PAPIERROR( "read(%d): errno %d", fd, errno ); + } + } else if ( ret != sizeof ( msg ) ) { + PAPIERROR( "read(%d): short %d vs. %d bytes", fd, ret, + sizeof ( msg ) ); + ret = -1; + } + + if ( msg.type != PFM_MSG_OVFL ) { + PAPIERROR( "unexpected msg type %d", msg.type ); + ret = -1; + } +#if 0 + if ( msg.pfm_ovfl_msg.msg_ovfl_tid != mygettid( ) ) { + PAPIERROR( "unmatched thread id %lx vs. %lx", + msg.pfm_ovfl_msg.msg_ovfl_tid, mygettid( ) ); + ret = -1; + } +#endif + + if ( ret != -1 ) { + if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && + !( thread->running_eventset[cidx]->profile. + flags & PAPI_PROFIL_FORCE_SW ) ) + process_smpl_buf( 0, sizeof ( pfm_dfl_smpl_entry_t ), &thread ); + else { + /* PAPI assumes that the overflow vector contains the register index of the + overflowing native event. That is generally true, but Stephane used some + tricks to offset the fixed counters on Core2 (Core? i7?) by 16. This hack + corrects for that hack in a (hopefully) transparent manner */ + unsigned long i, vector = msg.pfm_ovfl_msg.msg_ovfl_pmds[0]; + pfm_control_state_t *ctl = + ( pfm_control_state_t * ) thread->running_eventset[cidx]-> + ctl_state; + for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { + /* We're only comparing to pmds[0]. A more robust implementation would + compare to pmds[0-3]. The bit mask must be converted to an index + for the comparison to work */ + if ( ctl->pd[i].reg_num == + ffsl( msg.pfm_ovfl_msg.msg_ovfl_pmds[0] ) - 1 ) { + /* if a match is found, convert the index back to a bitmask */ + vector = 1 << i; + break; + } + } + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, + ( caddr_t ) ( ( size_t ) + msg. + pfm_ovfl_msg. + msg_ovfl_ip ), + NULL, vector, 0, &thread, + cidx ); + } + } + + if ( ( ret = pfm_restart( fd ) ) ) { + PAPIERROR( "pfm_restart(%d): %s", fd, strerror( ret ) ); + } + } +} + +static int +_papi_pfm_stop_profiling( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + ( void ) ESI; /*unused */ + /* Process any remaining samples in the sample buffer */ + return ( process_smpl_buf( 0, sizeof ( pfm_dfl_smpl_entry_t ), &thread ) ); +} + +static int +_papi_pfm_set_profile( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + int cidx = _perfmon2_vector.cmp_info.CmpIdx; + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ( ESI->ctl_state ); + pfm_context_t *ctx = ( pfm_context_t * ) ( ESI->master->context[cidx] ); + pfarg_ctx_t newctx; + void *buf_addr = NULL; + pfm_dfl_smpl_arg_t buf_arg; + pfm_dfl_smpl_hdr_t *hdr; + int i, ret, ctx_fd; + + memset( &newctx, 0, sizeof ( newctx ) ); + + if ( threshold == 0 ) { + SUBDBG( "MUNMAP(%p,%lld)\n", ctx->smpl_buf, + ( unsigned long long ) ctx->smpl.buf_size ); + munmap( ctx->smpl_buf, ctx->smpl.buf_size ); + + i = close( ctl->ctx_fd ); + SUBDBG( "CLOSE fd %d returned %d\n", ctl->ctx_fd, i ); + (void) i; + + /* Thread has master context */ + + ctl->ctx_fd = ctx->ctx_fd; + ctl->ctx = &ctx->ctx; + memset( &ctx->smpl, 0, sizeof ( buf_arg ) ); + ctx->smpl_buf = NULL; + ret = _papi_pfm_set_overflow( ESI, EventIndex, threshold ); +//#warning "This should be handled somewhere else" + ESI->state &= ~( PAPI_OVERFLOWING ); + ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE ); + + return ( ret ); + } + + memset( &buf_arg, 0, sizeof ( buf_arg ) ); + buf_arg.buf_size = 2 * getpagesize( ); + + SUBDBG( "PFM_CREATE_CONTEXT(%p,%s,%p,%d)\n", &newctx, PFM_DFL_SMPL_NAME, + &buf_arg, ( int ) sizeof ( buf_arg ) ); + if ( ( ret = + pfm_create_context( &newctx, PFM_DFL_SMPL_NAME, &buf_arg, + sizeof ( buf_arg ) ) ) == -1 ) { + DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl_arg( &buf_arg ) ); + PAPIERROR( "_papi_hwd_set_profile:pfm_create_context(): %s", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + ctx_fd = ret; + SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ctx_fd ); + tune_up_fd( ret ); + + SUBDBG( "MMAP(NULL,%lld,%d,%d,%d,0)\n", + ( unsigned long long ) buf_arg.buf_size, PROT_READ, MAP_PRIVATE, + ctx_fd ); + buf_addr = + mmap( NULL, ( size_t ) buf_arg.buf_size, PROT_READ, MAP_PRIVATE, ctx_fd, + 0 ); + if ( buf_addr == MAP_FAILED ) { + PAPIERROR( "mmap(NULL,%d,%d,%d,%d,0): %s", buf_arg.buf_size, PROT_READ, + MAP_PRIVATE, ctx_fd, strerror( errno ) ); + close( ctx_fd ); + return ( PAPI_ESYS ); + } + SUBDBG( "Sample buffer is located at %p\n", buf_addr ); + + hdr = ( pfm_dfl_smpl_hdr_t * ) buf_addr; + SUBDBG( "hdr_cur_offs=%llu version=%u.%u\n", + ( unsigned long long ) hdr->hdr_cur_offs, + PFM_VERSION_MAJOR( hdr->hdr_version ), + PFM_VERSION_MINOR( hdr->hdr_version ) ); + + if ( PFM_VERSION_MAJOR( hdr->hdr_version ) < 1 ) { + PAPIERROR( "invalid buffer format version %d", + PFM_VERSION_MAJOR( hdr->hdr_version ) ); + munmap( buf_addr, buf_arg.buf_size ); + close( ctx_fd ); + return PAPI_ESYS; + } + + ret = _papi_pfm_set_overflow( ESI, EventIndex, threshold ); + if ( ret != PAPI_OK ) { + munmap( buf_addr, buf_arg.buf_size ); + close( ctx_fd ); + return ( ret ); + } + + /* Look up the native event code */ + + if ( ESI->profile.flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) { + pfarg_pmd_t *pd; + int pos, native_index; + pd = ctl->pd; + pos = ESI->EventInfoArray[EventIndex].pos[0]; + native_index = + ( ( pfm_register_t * ) ( ESI->NativeInfoArray[pos].ni_bits ) )-> + event; + setup_ear_event( native_index, &pd[pos], ESI->profile.flags ); + } + + if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) { + pfarg_pmd_t *pd; + int pos; + pd = ctl->pd; + pos = ESI->EventInfoArray[EventIndex].pos[0]; + pd[pos].reg_random_seed = 5; + pd[pos].reg_random_mask = 0xff; + } + + /* Now close our context it is safe */ + + // close(ctx->ctx_fd); + + /* Copy the new data to the threads context control block */ + + ctl->ctx_fd = ctx_fd; + memcpy( &ctx->smpl, &buf_arg, sizeof ( buf_arg ) ); + ctx->smpl_buf = buf_addr; + + return ( PAPI_OK ); +} + + + +static int +_papi_pfm_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + pfm_control_state_t *this_state = + ( pfm_control_state_t * ) ( ESI->ctl_state ); + int j, retval = PAPI_OK, *pos; + + /* Which counter are we on, this looks suspicious because of the pos[0], + but this could be because of derived events. We should do more here + to figure out exactly what the position is, because the event may + actually have more than one position. */ + + pos = ESI->EventInfoArray[EventIndex].pos; + j = pos[0]; + SUBDBG( "Hardware counter %d used in overflow, threshold %d\n", j, + threshold ); + + if ( threshold == 0 ) { + /* If this counter isn't set to overflow */ + + if ( ( this_state->pd[j].reg_flags & PFM_REGFL_OVFL_NOTIFY ) == 0 ) + return ( PAPI_EINVAL ); + + /* Remove the signal handler */ + + retval = _papi_hwi_stop_signal( _perfmon2_vector.cmp_info.hardware_intr_sig ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* Disable overflow */ + + this_state->pd[j].reg_flags ^= PFM_REGFL_OVFL_NOTIFY; + + /* + * we may want to reset the other PMDs on + * every overflow. If we do not set + * this, the non-overflowed counters + * will be untouched. + + if (inp.pfp_event_count > 1) + this_state->pd[j].reg_reset_pmds[0] ^= 1UL << counter_to_reset */ + + /* Clear the overflow period */ + + this_state->pd[j].reg_value = 0; + this_state->pd[j].reg_long_reset = 0; + this_state->pd[j].reg_short_reset = 0; + this_state->pd[j].reg_random_seed = 0; + this_state->pd[j].reg_random_mask = 0; + } else { + /* Enable the signal handler */ + + retval = + _papi_hwi_start_signal( _perfmon2_vector.cmp_info.hardware_intr_sig, 1, + _perfmon2_vector.cmp_info.CmpIdx ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* Set it to overflow */ + + this_state->pd[j].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * we may want to reset the other PMDs on + * every overflow. If we do not set + * this, the non-overflowed counters + * will be untouched. + + if (inp.pfp_event_count > 1) + this_state->pd[j].reg_reset_pmds[0] |= 1UL << counter_to_reset */ + + /* Set the overflow period */ + + this_state->pd[j].reg_value = -( unsigned long long ) threshold + 1; + this_state->pd[j].reg_short_reset = + -( unsigned long long ) threshold + 1; + this_state->pd[j].reg_long_reset = + -( unsigned long long ) threshold + 1; + } + return ( retval ); +} + +static int +_papi_pfm_init_control_state( hwd_control_state_t * ctl0 ) +{ + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + pfmlib_input_param_t *inp = &ctl->in; + pfmlib_output_param_t *outp = &ctl->out; + pfarg_pmd_t *pd = ctl->pd; + pfarg_pmc_t *pc = ctl->pc; + pfarg_setdesc_t *set = ctl->set; + pfarg_setinfo_t *setinfo = ctl->setinfo; + + memset( inp, 0, sizeof ( *inp ) ); + memset( outp, 0, sizeof ( *inp ) ); + memset( pc, 0, sizeof ( ctl->pc ) ); + memset( pd, 0, sizeof ( ctl->pd ) ); + memset( set, 0, sizeof ( ctl->set ) ); + memset( setinfo, 0, sizeof ( ctl->setinfo ) ); + /* Will be filled by update now...until this gets another arg */ + ctl->ctx = NULL; + ctl->ctx_fd = -1; + ctl->load = NULL; + set_domain( ctl, _perfmon2_vector.cmp_info.default_domain ); + return ( PAPI_OK ); +} + +static int +_papi_pfm_allocate_registers( EventSetInfo_t * ESI ) +{ + int i, j; + for ( i = 0; i < ESI->NativeCount; i++ ) { + if ( _papi_libpfm_ntv_code_to_bits + ( ESI->NativeInfoArray[i].ni_event, + ESI->NativeInfoArray[i].ni_bits ) != PAPI_OK ) + goto bail; + } + return PAPI_OK; + bail: + for ( j = 0; j < i; j++ ) + memset( ESI->NativeInfoArray[j].ni_bits, 0x0, + sizeof ( pfm_register_t ) ); + return PAPI_ECNFLCT; +} + +/* This function clears the current contents of the control structure and + updates it with whatever resources are allocated for all the native events + in the native info structure array. */ + +static int +_papi_pfm_update_control_state( hwd_control_state_t * ctl0, + NativeInfo_t * native, int count, + hwd_context_t * ctx0 ) +{ + pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; + pfm_context_t *ctx = ( pfm_context_t * ) ctx0; + int i = 0, ret; + int last_reg_set = 0, reg_set_done = 0, offset = 0; + pfmlib_input_param_t tmpin, *inp = &ctl->in; + pfmlib_output_param_t tmpout, *outp = &ctl->out; + pfarg_pmd_t *pd = ctl->pd; + + if ( count == 0 ) { + SUBDBG( "Called with count == 0\n" ); + inp->pfp_event_count = 0; + outp->pfp_pmc_count = 0; + memset( inp->pfp_events, 0x0, sizeof ( inp->pfp_events ) ); + return ( PAPI_OK ); + } + + memcpy( &tmpin, inp, sizeof ( tmpin ) ); + memcpy( &tmpout, outp, sizeof ( tmpout ) ); + + for ( i = 0; i < count; i++ ) { + SUBDBG + ( "Stuffing native event index %d (code %#x) into input structure.\n", + i, ( ( pfm_register_t * ) native[i].ni_bits )->event ); + memcpy( inp->pfp_events + i, native[i].ni_bits, + sizeof ( pfmlib_event_t ) ); + } + inp->pfp_event_count = count; + + /* let the library figure out the values for the PMCS */ + + ret = compute_kernel_args( ctl ); + if ( ret != PAPI_OK ) { + /* Restore values */ + memcpy( inp, &tmpin, sizeof ( tmpin ) ); + memcpy( outp, &tmpout, sizeof ( tmpout ) ); + return ( ret ); + } + + /* Update the native structure, because the allocation is done here. */ + + last_reg_set = pd[0].reg_set; + for ( i = 0; i < count; i++ ) { + if ( pd[i].reg_set != last_reg_set ) { + offset += reg_set_done; + reg_set_done = 0; + } + reg_set_done++; + + native[i].ni_position = i; + SUBDBG( "native event index %d (code %#x) is at PMD offset %d\n", i, + ( ( pfm_register_t * ) native[i].ni_bits )->event, + native[i].ni_position ); + } + + /* If structure has not yet been filled with a context, fill it + from the thread's context. This should happen in init_control_state + when we give that a *ctx argument */ + + if ( ctl->ctx == NULL ) { + ctl->ctx = &ctx->ctx; + ctl->ctx_fd = ctx->ctx_fd; + ctl->load = &ctx->load; + } + + return ( PAPI_OK ); +} + + +papi_vector_t _perfmon2_vector = { + .cmp_info = { + /* default component information (unspecified values initialized to 0) */ + .name = "perfmon", + .description = "Linux perfmon2 CPU counters", + .version = "3.8", + + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr = 1, + .kernel_multiplex = 1, + .kernel_profile = 1, + .num_mpx_cntrs = PFMLIB_MAX_PMDS, + + /* component specific cmp_info initializations */ + .fast_real_timer = 1, + .fast_virtual_timer = 0, + .attach = 1, + .attach_must_ptrace = 1, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( pfm_context_t ), + .control_state = sizeof ( pfm_control_state_t ), + .reg_value = sizeof ( pfm_register_t ), + .reg_alloc = sizeof ( pfm_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_control_state = _papi_pfm_init_control_state, + .start = _papi_pfm_start, + .stop = _papi_pfm_stop, + .read = _papi_pfm_read, + .shutdown_thread = _papi_pfm_shutdown, + .shutdown_component = _papi_pfm_shutdown_component, + .ctl = _papi_pfm_ctl, + .update_control_state = _papi_pfm_update_control_state, + .set_domain = set_domain, + .reset = _papi_pfm_reset, + .set_overflow = _papi_pfm_set_overflow, + .set_profile = _papi_pfm_set_profile, + .stop_profiling = _papi_pfm_stop_profiling, + .init_component = _papi_pfm_init_component, + .dispatch_timer = _papi_pfm_dispatch_timer, + .init_thread = _papi_pfm_init_thread, + .allocate_registers = _papi_pfm_allocate_registers, + .write = _papi_pfm_write, + + /* from the counter name library */ + .ntv_enum_events = _papi_libpfm_ntv_enum_events, + .ntv_name_to_code = _papi_libpfm_ntv_name_to_code, + .ntv_code_to_name = _papi_libpfm_ntv_code_to_name, + .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr, + .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits, + +}; diff --git a/src/components/perfmon2/perfmon.h b/src/components/perfmon2/perfmon.h new file mode 100644 index 0000000..f5bd240 --- /dev/null +++ b/src/components/perfmon2/perfmon.h @@ -0,0 +1,103 @@ +#ifndef _PAPI_PERFMON_H +#define _PAPI_PERFMON_H +/* +* File: perfmon.h +* Author: Philip Mucci +* mucci@cs.utk.edu +* +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "perfmon/pfmlib.h" +#include "perfmon/perfmon_dfl_smpl.h" +#include "papi_lock.h" + + +#include "linux-context.h" + +#if defined(DEBUG) +#define DEBUGCALL(a,b) { if (ISLEVEL(a)) { b; } } +#else +#define DEBUGCALL(a,b) +#endif + +typedef pfmlib_event_t pfm_register_t; +typedef int pfm_register_map_t; +typedef int pfm_reg_alloc_t; + +#define MAX_COUNTERS PFMLIB_MAX_PMCS +#define MAX_COUNTER_TERMS PFMLIB_MAX_PMCS + +typedef struct +{ + /* Context structure to kernel, different for attached */ + int ctx_fd; + pfarg_ctx_t *ctx; + /* Load structure to kernel, different for attached */ + pfarg_load_t *load; + /* Which counters to use? Bits encode counters to use, may be duplicates */ + pfm_register_map_t bits; + /* Buffer to pass to library to control the counters */ + pfmlib_input_param_t in; + /* Buffer to pass from the library to control the counters */ + pfmlib_output_param_t out; + /* Is this eventset multiplexed? Actually it holds the microseconds of the switching interval, 0 if not mpx. */ + int multiplexed; + /* Arguments to kernel for multiplexing, first number of sets */ + int num_sets; + /* Arguments to kernel to set up the sets */ + pfarg_setdesc_t set[PFMLIB_MAX_PMDS]; + /* Buffer to get information out of the sets when reading */ + pfarg_setinfo_t setinfo[PFMLIB_MAX_PMDS]; + /* Arguments to the kernel */ + pfarg_pmc_t pc[PFMLIB_MAX_PMCS]; + /* Arguments to the kernel */ + pfarg_pmd_t pd[PFMLIB_MAX_PMDS]; + /* Buffer to gather counters */ + long long counts[PFMLIB_MAX_PMDS]; +} pfm_control_state_t; + +typedef struct +{ +#if defined(USE_PROC_PTTIMER) + int stat_fd; +#endif + /* Main context structure to kernel */ + int ctx_fd; + pfarg_ctx_t ctx; + /* Main load structure to kernel */ + pfarg_load_t load; + /* Structure to inform the kernel about sampling */ + pfm_dfl_smpl_arg_t smpl; + /* Address of mmap()'ed sample buffer */ + void *smpl_buf; +} pfm_context_t; + +/* typedefs to conform to PAPI component layer code. */ +/* these are void * in the PAPI framework layer code. */ +typedef pfm_reg_alloc_t cmp_reg_alloc_t; +typedef pfm_register_t cmp_register_t; +typedef pfm_control_state_t cmp_control_state_t; +typedef pfm_context_t cmp_context_t; + +#endif diff --git a/src/components/perfmon_ia64/Rules.perfmon_ia64 b/src/components/perfmon_ia64/Rules.perfmon_ia64 new file mode 100644 index 0000000..78e5eca --- /dev/null +++ b/src/components/perfmon_ia64/Rules.perfmon_ia64 @@ -0,0 +1,6 @@ + +COMPSRCS += components/perfmon_ia64/perfmon-ia64.c +COMPOBJS += perfmon-ia64.o + +perfmon-ia64.o: components/perfmon_ia64/perfmon-ia64.c components/perfmon_ia64/perfmon-ia64.h + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/perfmon_ia64/perfmon-ia64.c -o perfmon-ia64.o diff --git a/src/components/perfmon_ia64/perfmon-ia64.c b/src/components/perfmon_ia64/perfmon-ia64.c new file mode 100644 index 0000000..3033a15 --- /dev/null +++ b/src/components/perfmon_ia64/perfmon-ia64.c @@ -0,0 +1,3179 @@ +/* +* File: perfmon-ia64.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Kevin London +* london@cs.utk.edu +* Per Ekman +* pek@pdc.kth.se +* Zhou Min +* min@cs.utk.edu +*/ + + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "threads.h" +#include "papi_memory.h" +#include "papi_lock.h" + +#include "linux-memory.h" +#include "linux-timer.h" +#include "linux-common.h" + +#if defined(__INTEL_COMPILER) + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static inline int +hweight64( unsigned long x ) +{ + unsigned long result; + __asm__( "popcnt %0=%1": "=r"( result ):"r"( x ) ); + return ( int ) result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + +extern int _perfmon2_pfm_pmu_type; +extern papi_vector_t _ia64_vector; + +#define OVFL_SIGNAL SIGPROF +#define PFMW_PEVT_EVTCOUNT(evt) (evt->inp.pfp_event_count) +#define PFMW_PEVT_EVENT(evt,idx) (evt->inp.pfp_events[idx].event) +#define PFMW_PEVT_PLM(evt,idx) (evt->inp.pfp_events[idx].plm) +#define PFMW_PEVT_DFLPLM(evt) (evt->inp.pfp_dfl_plm) +#define PFMW_PEVT_PFPPC(evt) (evt->pc) +#define PFMW_PEVT_PFPPD(evt) (evt->pd) +#define PFMW_PEVT_PFPPC_COUNT(evt) (evt->outp.pfp_pmc_count) +#define PFMW_PEVT_PFPPC_REG_NUM(evt,idx) (evt->outp.pfp_pmcs[idx].reg_num) +#define PFMW_PEVT_PFPPC_REG_VAL(evt,idx) (evt->pc[idx].reg_value) +#define PFMW_PEVT_PFPPC_REG_FLG(evt,idx) (evt->pc[idx].reg_flags) +#define PFMW_ARCH_REG_PMCVAL(reg) (reg.pmc_val) +#define PFMW_ARCH_REG_PMDVAL(reg) (reg.pmd_val) + +#define PFMON_MONT_MAX_IBRS 8 +#define PFMON_MONT_MAX_DBRS 8 + +#define PFMON_ITA2_MAX_IBRS 8 +#define PFMON_ITA2_MAX_DBRS 8 +/* + #if defined(ITANIUM3) + #define PFMW_ARCH_REG_PMCPLM(reg) (reg.pmc_mont_counter_reg.pmc_plm) + #define PFMW_ARCH_REG_PMCES(reg) (reg.pmc_mont_counter_reg.pmc_es) + typedef pfm_mont_pmc_reg_t pfmw_arch_pmc_reg_t; + typedef pfm_mont_pmd_reg_t pfmw_arch_pmd_reg_t; + #elif defined(ITANIUM2) + #define PFMW_ARCH_REG_PMCPLM(reg) (reg.pmc_ita2_counter_reg.pmc_plm) + #define PFMW_ARCH_REG_PMCES(reg) (reg.pmc_ita2_counter_reg.pmc_es) + typedef pfm_ita2_pmc_reg_t pfmw_arch_pmc_reg_t; + typedef pfm_ita2_pmd_reg_t pfmw_arch_pmd_reg_t; + #else + #define PFMW_ARCH_REG_PMCPLM(reg) (reg.pmc_ita_count_reg.pmc_plm) + #define PFMW_ARCH_REG_PMCES(reg) (reg.pmc_ita_count_reg.pmc_es) + typedef pfm_ita_pmc_reg_t pfmw_arch_pmc_reg_t; + typedef pfm_ita_pmd_reg_t pfmw_arch_pmd_reg_t; + #endif +*/ +typedef pfm_default_smpl_hdr_t pfmw_smpl_hdr_t; +typedef pfm_default_smpl_entry_t pfmw_smpl_entry_t; + +static void +pfmw_start( hwd_context_t * ctx ) +{ + pfm_self_start( ( ( ia64_context_t * ) ctx )->fd ); +} + +static void +pfmw_stop( hwd_context_t * ctx ) +{ + pfm_self_stop( ( ( ia64_context_t * ) ctx )->fd ); +} + +static int +pfmw_perfmonctl( pid_t tid, int fd, int cmd, void *arg, int narg ) +{ + ( void ) tid; /*unused */ + return ( perfmonctl( fd, cmd, arg, narg ) ); +} + +static int +pfmw_destroy_context( hwd_context_t * thr_ctx ) +{ + int ret; + ret = close( ( ( ia64_context_t * ) thr_ctx )->fd ); + if ( ret ) + return PAPI_ESYS; + else + return PAPI_OK; +} + +static int +pfmw_dispatch_events( pfmw_param_t * evt ) +{ + int ret; + unsigned int i; +/* + PFMW_PEVT_DFLPLM(evt) = PFM_PLM3; +*/ +#ifdef PFMLIB_MONTECITO_PMU + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) + ret = + pfm_dispatch_events( &evt->inp, + ( pfmlib_mont_input_param_t * ) evt->mod_inp, + &evt->outp, + ( pfmlib_mont_output_param_t * ) evt-> + mod_outp ); + else +#endif + ret = + pfm_dispatch_events( &evt->inp, + ( pfmlib_ita2_input_param_t * ) evt->mod_inp, + &evt->outp, + ( pfmlib_ita2_output_param_t * ) evt-> + mod_outp ); + if ( ret ) { + return PAPI_ESYS; + } else { + for ( i = 0; i < evt->outp.pfp_pmc_count; i++ ) { + evt->pc[i].reg_num = evt->outp.pfp_pmcs[i].reg_num; + evt->pc[i].reg_value = evt->outp.pfp_pmcs[i].reg_value; + } +#if defined(HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT) + for ( i = 0; i < evt->outp.pfp_pmd_count; i++ ) { + evt->pd[i].reg_num = evt->outp.pfp_pmds[i].reg_num; + } +#else + /* This is really broken */ + for ( i = 0; i < evt->inp.pfp_event_count; i++ ) { + evt->pd[i].reg_num = evt->pc[i].reg_num; + } +#endif + return PAPI_OK; + } +} + +static int +pfmw_create_ctx_common( hwd_context_t * ctx ) +{ + pfarg_load_t load_args; + int ret; + + memset( &load_args, 0, sizeof ( load_args ) ); + /* + * we want to monitor ourself + */ + + load_args.load_pid = ( ( ia64_context_t * ) ctx )->tid; + + SUBDBG( "PFM_LOAD_CONTEXT FD %d, PID %d\n", + ( ( ia64_context_t * ) ctx )->fd, + ( ( ia64_context_t * ) ctx )->tid ); + if ( perfmonctl + ( ( ( ia64_context_t * ) ctx )->fd, PFM_LOAD_CONTEXT, &load_args, + 1 ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_LOAD_CONTEXT) errno %d", errno ); + return ( PAPI_ESYS ); + } + /* + * setup asynchronous notification on the file descriptor + */ + ret = + fcntl( ( ( ia64_context_t * ) ctx )->fd, F_SETFL, + fcntl( ( ( ia64_context_t * ) ctx )->fd, F_GETFL, + 0 ) | O_ASYNC ); + if ( ret == -1 ) { + PAPIERROR( "fcntl(%d,F_SETFL,O_ASYNC) errno %d", + ( ( ia64_context_t * ) ctx )->fd, errno ); + return ( PAPI_ESYS ); + } + + /* + * get ownership of the descriptor + */ + + ret = + fcntl( ( ( ia64_context_t * ) ctx )->fd, F_SETOWN, + ( ( ia64_context_t * ) ctx )->tid ); + if ( ret == -1 ) { + PAPIERROR( "fcntl(%d,F_SETOWN) errno %d", + ( ( ia64_context_t * ) ctx )->fd, errno ); + return ( PAPI_ESYS ); + } + + ret = + fcntl( ( ( ia64_context_t * ) ctx )->fd, F_SETSIG, + _ia64_vector.cmp_info.hardware_intr_sig ); + if ( ret == -1 ) { + PAPIERROR( "fcntl(%d,F_SETSIG) errno %d", + ( ( ia64_context_t * ) ctx )->fd, errno ); + return ( PAPI_ESYS ); + } + + /* set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. */ + + ret = fcntl( ( ( ia64_context_t * ) ctx )->fd, F_SETFD, FD_CLOEXEC ); + if ( ret == -1 ) { + PAPIERROR( "fcntl(%d,FD_CLOEXEC) errno %d", + ( ( ia64_context_t * ) ctx )->fd, errno ); + return ( PAPI_ESYS ); + } + + return ( PAPI_OK ); + +} + +static int +pfmw_create_context( hwd_context_t * thr_ctx ) +{ + pfarg_context_t ctx; + memset( &ctx, 0, sizeof ( ctx ) ); + + SUBDBG( "PFM_CREATE_CONTEXT on 0\n" ); + if ( perfmonctl( 0, PFM_CREATE_CONTEXT, &ctx, 1 ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_CREATE_CONTEXT) errno %d", errno ); + return ( PAPI_ESYS ); + } + ( ( ia64_context_t * ) thr_ctx )->fd = ctx.ctx_fd; + ( ( ia64_context_t * ) thr_ctx )->tid = mygettid( ); + SUBDBG( "PFM_CREATE_CONTEXT returns FD %d, TID %d\n", + ( int ) ( ( ia64_context_t * ) thr_ctx )->fd, + ( int ) ( ( ia64_context_t * ) thr_ctx )->tid ); + + return ( pfmw_create_ctx_common( thr_ctx ) ); +} + +static int +set_pmds_to_write( EventSetInfo_t * ESI, int index, unsigned long value ) +{ + int *pos, count, i; + unsigned int hwcntr; + ia64_control_state_t *this_state = + ( ia64_control_state_t * ) ESI->ctl_state; + pfmw_param_t *pevt = &( this_state->evt ); + + pos = ESI->EventInfoArray[index].pos; + count = 0; + while ( pos[count] != -1 && count < MAX_COUNTERS ) { + hwcntr = pos[count] + PMU_FIRST_COUNTER; + for ( i = 0; i < MAX_COUNTERS; i++ ) { + if ( PFMW_PEVT_PFPPC_REG_NUM( pevt, i ) == hwcntr ) { + this_state->evt.pc[i].reg_smpl_pmds[0] = value; + break; + } + } + count++; + } + return ( PAPI_OK ); +} + +static int +_pfm_decode_native_event( unsigned int EventCode, unsigned int *event, + unsigned int *umask ); + +static int +pfmw_recreate_context( EventSetInfo_t * ESI, hwd_context_t * thr_ctx, + void **smpl_vaddr, int EventIndex ) +{ + pfm_default_smpl_ctx_arg_t ctx; + pfm_uuid_t buf_fmt_id = PFM_DEFAULT_SMPL_UUID; + int ctx_fd; + unsigned int native_index, EventCode; + int pos; + //hwd_context_t *thr_ctx = (hwd_context_t *) &ESI->master->context; +#ifdef PFMLIB_MONTECITO_PMU + unsigned int umask; +#endif + + pos = ESI->EventInfoArray[EventIndex].pos[0]; + EventCode = ESI->EventInfoArray[EventIndex].event_code; +#ifdef PFMLIB_MONTECITO_PMU + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { + if ( _pfm_decode_native_event + ( ESI->NativeInfoArray[pos].ni_event, &native_index, + &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + } else +#endif + native_index = + ESI->NativeInfoArray[pos].ni_event & PAPI_NATIVE_AND_MASK; + + memset( &ctx, 0, sizeof ( ctx ) ); + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy( ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof ( pfm_uuid_t ) ); + /* + * the size of the buffer is indicated in bytes (not entries). + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx.buf_arg.buf_size = 4096; + /* + * now create the context for self monitoring/per-task + */ + SUBDBG( "PFM_CREATE_CONTEXT on 0\n" ); + if ( perfmonctl( 0, PFM_CREATE_CONTEXT, &ctx, 1 ) == -1 ) { + if ( errno == ENOSYS ) + PAPIERROR + ( "Your kernel does not have performance monitoring support" ); + else + PAPIERROR( "perfmonctl(PFM_CREATE_CONTEXT) errno %d", errno ); + return ( PAPI_ESYS ); + } + /* + * extract the file descriptor we will use to + * identify this newly created context + */ + ctx_fd = ctx.ctx_arg.ctx_fd; + /* save the fd into the thread context struct */ + ( ( ia64_context_t * ) thr_ctx )->fd = ctx_fd; + ( ( ia64_context_t * ) thr_ctx )->tid = mygettid( ); + SUBDBG( "PFM_CREATE_CONTEXT returns FD %d, TID %d\n", + ( int ) ( ( ia64_context_t * ) thr_ctx )->fd, + ( int ) ( ( ia64_context_t * ) thr_ctx )->tid ); + /* indicate which PMD to include in the sample */ +/* DEAR and BTB events */ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + if ( pfm_ita_is_dear( native_index ) ) + set_pmds_to_write( ESI, EventIndex, DEAR_REGS_MASK ); + else if ( pfm_ita_is_btb( native_index ) + || EventCode == ( unsigned int ) PAPI_BR_INS ) + set_pmds_to_write( ESI, EventIndex, BTB_REGS_MASK ); + break; + case PFMLIB_ITANIUM2_PMU: + if ( pfm_ita2_is_dear( native_index ) ) + set_pmds_to_write( ESI, EventIndex, DEAR_REGS_MASK ); + else if ( pfm_ita2_is_btb( native_index ) + || EventCode == ( unsigned int ) PAPI_BR_INS ) + set_pmds_to_write( ESI, EventIndex, BTB_REGS_MASK ); + break; + case PFMLIB_MONTECITO_PMU: + if ( pfm_mont_is_dear( native_index ) ) + set_pmds_to_write( ESI, EventIndex, MONT_DEAR_REGS_MASK ); + else if ( pfm_mont_is_etb( native_index ) || + EventCode == ( unsigned int ) PAPI_BR_INS ) + set_pmds_to_write( ESI, EventIndex, MONT_ETB_REGS_MASK ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } + + *smpl_vaddr = ctx.ctx_arg.ctx_smpl_vaddr; + + return ( pfmw_create_ctx_common( thr_ctx ) ); +} + +static int +pfmw_get_event_name( char *name, unsigned int idx ) +{ + unsigned int total; + + pfm_get_num_events( &total ); + if ( idx >= total ) + return PAPI_ENOEVNT; + if ( pfm_get_event_name( idx, name, PAPI_MAX_STR_LEN ) == PFMLIB_SUCCESS ) + return PAPI_OK; + else + return PAPI_ENOEVNT; +} + +static void +pfmw_get_event_description( unsigned int idx, char *dest, int len ) +{ + char *descr; + + if ( pfm_get_event_description( idx, &descr ) == PFMLIB_SUCCESS ) { + strncpy( dest, descr, len ); + free( descr ); + } else + *dest = '\0'; +} + +static int +pfmw_is_dear( unsigned int i ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( pfm_ita_is_dear( i ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( pfm_ita2_is_dear( i ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( pfm_mont_is_dear( i ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static int +pfmw_is_iear( unsigned int i ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( pfm_ita_is_iear( i ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( pfm_ita2_is_iear( i ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( pfm_mont_is_iear( i ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static int +pfmw_support_darr( unsigned int i ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( pfm_ita_support_darr( i ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( pfm_ita2_support_darr( i ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( pfm_mont_support_darr( i ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static int +pfmw_support_iarr( unsigned int i ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( pfm_ita_support_iarr( i ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( pfm_ita2_support_iarr( i ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( pfm_mont_support_iarr( i ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static int +pfmw_support_opcm( unsigned int i ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( pfm_ita_support_opcm( i ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( pfm_ita2_support_opcm( i ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( pfm_mont_support_opcm( i ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static void +check_ibrp_events( hwd_control_state_t * current_state ) +{ + ia64_control_state_t *this_state = ( ia64_control_state_t * ) current_state; + pfmw_param_t *evt = &( this_state->evt ); + unsigned long umasks_retired[4]; + unsigned long umask; + unsigned int j, i, seen_retired, ibrp, idx; + int code; + int retired_code, incr; + pfmlib_ita2_output_param_t *ita2_output_param; + pfmlib_mont_output_param_t *mont_output_param; + +#if defined(PFMLIB_ITANIUM2_PMU) || defined(PFMLIB_MONTECITO_PMU) +char *retired_events[] = { + "IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", + "IA64_TAGGED_INST_RETIRED_IBRP1_PMC9", + "IA64_TAGGED_INST_RETIRED_IBRP2_PMC8", + "IA64_TAGGED_INST_RETIRED_IBRP3_PMC9", + NULL +}; +#endif + + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM2_PMU: + ita2_output_param = + &( this_state->ita_lib_param.ita2_param.ita2_output_param ); + /* + * in fine mode, it is enough to use the event + * which only monitors the first debug register + * pair. The two pairs making up the range + * are guaranteed to be consecutive in rr_br[]. + */ + incr = pfm_ita2_irange_is_fine( &evt->outp, ita2_output_param ) ? 4 : 2; + + for ( i = 0; retired_events[i]; i++ ) { + pfm_find_event( retired_events[i], &idx ); + pfm_ita2_get_event_umask( idx, umasks_retired + i ); + } + + pfm_get_event_code( idx, &retired_code ); + + /* + * print a warning message when the using IA64_TAGGED_INST_RETIRED_IBRP* which does + * not completely cover the all the debug register pairs used to make up the range. + * This could otherwise lead to misinterpretation of the results. + */ + for ( i = 0; i < ita2_output_param->pfp_ita2_irange.rr_nbr_used; + i += incr ) { + + ibrp = ita2_output_param->pfp_ita2_irange.rr_br[i].reg_num >> 1; + + seen_retired = 0; + for ( j = 0; j < evt->inp.pfp_event_count; j++ ) { + pfm_get_event_code( evt->inp.pfp_events[j].event, &code ); + if ( code != retired_code ) + continue; + seen_retired = 1; + pfm_ita2_get_event_umask( evt->inp.pfp_events[j].event, + &umask ); + if ( umask == umasks_retired[ibrp] ) + break; + } + if ( seen_retired && j == evt->inp.pfp_event_count ) + printf + ( "warning: code range uses IBR pair %d which is not monitored using %s\n", + ibrp, retired_events[ibrp] ); + } + + break; + case PFMLIB_MONTECITO_PMU: + mont_output_param = + &( this_state->ita_lib_param.mont_param.mont_output_param ); + /* + * in fine mode, it is enough to use the event + * which only monitors the first debug register + * pair. The two pairs making up the range + * are guaranteed to be consecutive in rr_br[]. + */ + incr = pfm_mont_irange_is_fine( &evt->outp, mont_output_param ) ? 4 : 2; + + for ( i = 0; retired_events[i]; i++ ) { + pfm_find_event( retired_events[i], &idx ); + pfm_mont_get_event_umask( idx, umasks_retired + i ); + } + + pfm_get_event_code( idx, &retired_code ); + + /* + * print a warning message when the using IA64_TAGGED_INST_RETIRED_IBRP* which does + * not completely cover the all the debug register pairs used to make up the range. + * This could otherwise lead to misinterpretation of the results. + */ + for ( i = 0; i < mont_output_param->pfp_mont_irange.rr_nbr_used; + i += incr ) { + + ibrp = mont_output_param->pfp_mont_irange.rr_br[i].reg_num >> 1; + + seen_retired = 0; + for ( j = 0; j < evt->inp.pfp_event_count; j++ ) { + pfm_get_event_code( evt->inp.pfp_events[j].event, &code ); + if ( code != retired_code ) + continue; + seen_retired = 1; + pfm_mont_get_event_umask( evt->inp.pfp_events[j].event, + &umask ); + if ( umask == umasks_retired[ibrp] ) + break; + } + if ( seen_retired && j == evt->inp.pfp_event_count ) + printf + ( "warning: code range uses IBR pair %d which is not monitored using %s\n", + ibrp, retired_events[ibrp] ); + } + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + } +} + +static int +install_irange( hwd_context_t * pctx, hwd_control_state_t * current_state ) +{ + ia64_control_state_t *this_state = ( ia64_control_state_t * ) current_state; + unsigned int i, used_dbr; + int r; + int pid = ( ( ia64_context_t * ) pctx )->fd; + + pfmlib_ita2_output_param_t *ita2_output_param; + pfarg_dbreg_t ita2_dbreg[PFMON_ITA2_MAX_IBRS]; + pfmlib_mont_output_param_t *mont_output_param; + pfarg_dbreg_t mont_dbreg[PFMON_MONT_MAX_IBRS]; + + memset( mont_dbreg, 0, sizeof ( mont_dbreg ) ); + memset( ita2_dbreg, 0, sizeof ( ita2_dbreg ) ); + check_ibrp_events( current_state ); + + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM2_PMU: + ita2_output_param = + &( this_state->ita_lib_param.ita2_param.ita2_output_param ); + used_dbr = ita2_output_param->pfp_ita2_irange.rr_nbr_used; + + for ( i = 0; i < used_dbr; i++ ) { + ita2_dbreg[i].dbreg_num = + ita2_output_param->pfp_ita2_irange.rr_br[i].reg_num; + ita2_dbreg[i].dbreg_value = + ita2_output_param->pfp_ita2_irange.rr_br[i].reg_value; + } + + r = perfmonctl( pid, PFM_WRITE_IBRS, ita2_dbreg, + ita2_output_param->pfp_ita2_irange.rr_nbr_used ); + if ( r == -1 ) { + SUBDBG( "cannot install code range restriction: %s\n", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + break; + case PFMLIB_MONTECITO_PMU: + mont_output_param = + &( this_state->ita_lib_param.mont_param.mont_output_param ); + + used_dbr = mont_output_param->pfp_mont_irange.rr_nbr_used; + + for ( i = 0; i < used_dbr; i++ ) { + mont_dbreg[i].dbreg_num = + mont_output_param->pfp_mont_irange.rr_br[i].reg_num; + mont_dbreg[i].dbreg_value = + mont_output_param->pfp_mont_irange.rr_br[i].reg_value; + } + + r = perfmonctl( pid, PFM_WRITE_IBRS, mont_dbreg, + mont_output_param->pfp_mont_irange.rr_nbr_used ); + if ( r == -1 ) { + SUBDBG( "cannot install code range restriction: %s\n", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return PAPI_ENOIMPL; + } +} + +static int +install_drange( hwd_context_t * pctx, hwd_control_state_t * current_state ) +{ + ia64_control_state_t *this_state = ( ia64_control_state_t * ) current_state; + unsigned int i, used_dbr; + int r; + int pid = ( ( ia64_context_t * ) pctx )->fd; + + pfmlib_ita2_output_param_t *ita2_output_param; + pfarg_dbreg_t ita2_dbreg[PFMON_ITA2_MAX_IBRS]; + pfmlib_mont_output_param_t *mont_output_param; + pfarg_dbreg_t mont_dbreg[PFMON_MONT_MAX_IBRS]; + + memset( mont_dbreg, 0, sizeof ( mont_dbreg ) ); + memset( ita2_dbreg, 0, sizeof ( ita2_dbreg ) ); + + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM2_PMU: + ita2_output_param = + &( this_state->ita_lib_param.ita2_param.ita2_output_param ); + used_dbr = ita2_output_param->pfp_ita2_drange.rr_nbr_used; + + for ( i = 0; i < used_dbr; i++ ) { + ita2_dbreg[i].dbreg_num = + ita2_output_param->pfp_ita2_drange.rr_br[i].reg_num; + ita2_dbreg[i].dbreg_value = + ita2_output_param->pfp_ita2_drange.rr_br[i].reg_value; + } + + r = perfmonctl( pid, PFM_WRITE_DBRS, ita2_dbreg, + ita2_output_param->pfp_ita2_drange.rr_nbr_used ); + if ( r == -1 ) { + SUBDBG( "cannot install data range restriction: %s\n", + strerror( errno ) ); + return ( PAPI_ESYS ); + } + return ( PAPI_OK ); + break; + case PFMLIB_MONTECITO_PMU: + mont_output_param = + &( this_state->ita_lib_param.mont_param.mont_output_param ); + used_dbr = mont_output_param->pfp_mont_drange.rr_nbr_used; + + for ( i = 0; i < used_dbr; i++ ) { + mont_dbreg[i].dbreg_num = + mont_output_param->pfp_mont_drange.rr_br[i].reg_num; + mont_dbreg[i].dbreg_value = + mont_output_param->pfp_mont_drange.rr_br[i].reg_value; + } + + r = perfmonctl( pid, PFM_WRITE_DBRS, mont_dbreg, + mont_output_param->pfp_mont_drange.rr_nbr_used ); + if ( r == -1 ) { + SUBDBG( "cannot install data range restriction: %s\n", + strerror( errno ) ); + return PAPI_ESYS; + } + return PAPI_OK; + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return PAPI_ENOIMPL; + } +} + +/* The routines set_{d,i}range() provide places to install the data and / or + instruction address range restrictions for counting qualified events. + These routines must set up or clear the appropriate local static data structures. + The actual work of loading the hardware registers must be done in update_ctl_state(). + Both drange and irange can be set on the same eventset. + If start=end=0, the feature is disabled. +*/ +static int +set_drange( hwd_context_t * ctx, hwd_control_state_t * current_state, + _papi_int_option_t * option ) +{ + int ret = PAPI_OK; + ia64_control_state_t *this_state = ( ia64_control_state_t * ) current_state; + pfmw_param_t *evt = &( this_state->evt ); + pfmlib_input_param_t *inp = &evt->inp; + pfmlib_ita2_input_param_t *ita2_inp = + &( this_state->ita_lib_param.ita2_param.ita2_input_param ); + pfmlib_ita2_output_param_t *ita2_outp = + &( this_state->ita_lib_param.ita2_param.ita2_output_param ); + pfmlib_mont_input_param_t *mont_inp = + &( this_state->ita_lib_param.mont_param.mont_input_param ); + pfmlib_mont_output_param_t *mont_outp = + &( this_state->ita_lib_param.mont_param.mont_output_param ); + + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM2_PMU: + + if ( ( unsigned long ) option->address_range.start == + ( unsigned long ) option->address_range.end || + ( ( unsigned long ) option->address_range.start == 0 && + ( unsigned long ) option->address_range.end == 0 ) ) + return ( PAPI_EINVAL ); + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + memset( &ita2_inp->pfp_ita2_drange, 0, + sizeof ( pfmlib_ita2_input_rr_t ) ); + memset( ita2_outp, 0, sizeof ( pfmlib_ita2_output_param_t ) ); + inp->pfp_dfl_plm = PFM_PLM3; + ita2_inp->pfp_ita2_drange.rr_used = 1; + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_start = + ( unsigned long ) option->address_range.start; + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_end = + ( unsigned long ) option->address_range.end; + SUBDBG + ( "++++ before data range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_start, + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_end, + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_end - + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_start, + ita2_outp->pfp_ita2_drange.rr_nbr_used >> 1, + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_soff, + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_eoff ); + + /* + * let the library figure out the values for the PMCS + */ + if ( ( ret = pfmw_dispatch_events( evt ) ) != PFMLIB_SUCCESS ) { + SUBDBG( "cannot configure events: %s\n", pfm_strerror( ret ) ); + } + + SUBDBG + ( "++++ data range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_start, + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_end, + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_end - + ita2_inp->pfp_ita2_drange.rr_limits[0].rr_start, + ita2_outp->pfp_ita2_drange.rr_nbr_used >> 1, + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_soff, + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_eoff ); + +/* if( ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start!=0 || ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end!=0 ) + if((ret=install_irange(ctx, current_state)) ==PAPI_OK){ + option->address_range.start_off=ita2_outp->pfp_ita2_irange.rr_infos[0].rr_soff; + option->address_range.end_off=ita2_outp->pfp_ita2_irange.rr_infos[0].rr_eoff; + } +*/ + if ( ( ret = install_drange( ctx, current_state ) ) == PAPI_OK ) { + option->address_range.start_off = + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_soff; + option->address_range.end_off = + ita2_outp->pfp_ita2_drange.rr_infos[0].rr_eoff; + } + return ( ret ); + + break; + case PFMLIB_MONTECITO_PMU: + + if ( ( unsigned long ) option->address_range.start == + ( unsigned long ) option->address_range.end || + ( ( unsigned long ) option->address_range.start == 0 && + ( unsigned long ) option->address_range.end == 0 ) ) + return ( PAPI_EINVAL ); + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + memset( &mont_inp->pfp_mont_drange, 0, + sizeof ( pfmlib_mont_input_rr_t ) ); + memset( mont_outp, 0, sizeof ( pfmlib_mont_output_param_t ) ); + inp->pfp_dfl_plm = PFM_PLM3; + mont_inp->pfp_mont_drange.rr_used = 1; + mont_inp->pfp_mont_drange.rr_limits[0].rr_start = + ( unsigned long ) option->address_range.start; + mont_inp->pfp_mont_drange.rr_limits[0].rr_end = + ( unsigned long ) option->address_range.end; + SUBDBG + ( "++++ before data range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + mont_inp->pfp_mont_drange.rr_limits[0].rr_start, + mont_inp->pfp_mont_drange.rr_limits[0].rr_end, + mont_inp->pfp_mont_drange.rr_limits[0].rr_end - + mont_inp->pfp_mont_drange.rr_limits[0].rr_start, + mont_outp->pfp_mont_drange.rr_nbr_used >> 1, + mont_outp->pfp_mont_drange.rr_infos[0].rr_soff, + mont_outp->pfp_mont_drange.rr_infos[0].rr_eoff ); + /* + * let the library figure out the values for the PMCS + */ + if ( ( ret = pfmw_dispatch_events( evt ) ) != PFMLIB_SUCCESS ) { + SUBDBG( "cannot configure events: %s\n", pfm_strerror( ret ) ); + } + + SUBDBG + ( "++++ data range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + mont_inp->pfp_mont_drange.rr_limits[0].rr_start, + mont_inp->pfp_mont_drange.rr_limits[0].rr_end, + mont_inp->pfp_mont_drange.rr_limits[0].rr_end - + mont_inp->pfp_mont_drange.rr_limits[0].rr_start, + mont_outp->pfp_mont_drange.rr_nbr_used >> 1, + mont_outp->pfp_mont_drange.rr_infos[0].rr_soff, + mont_outp->pfp_mont_drange.rr_infos[0].rr_eoff ); + +/* if( ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start!=0 || ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end!=0 ) + if((ret=install_irange(ctx, current_state)) ==PAPI_OK){ + option->address_range.start_off=ita2_outp->pfp_ita2_irange.rr_infos[0].rr_soff; + option->address_range.end_off=ita2_outp->pfp_ita2_irange.rr_infos[0].rr_eoff; + } +*/ + if ( ( ret = install_drange( ctx, current_state ) ) == PAPI_OK ) { + option->address_range.start_off = + mont_outp->pfp_mont_drange.rr_infos[0].rr_soff; + option->address_range.end_off = + mont_outp->pfp_mont_drange.rr_infos[0].rr_eoff; + } + return ( ret ); + + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return PAPI_ENOIMPL; + } +} + +static int +set_irange( hwd_context_t * ctx, hwd_control_state_t * current_state, + _papi_int_option_t * option ) +{ + int ret = PAPI_OK; + ia64_control_state_t *this_state = ( ia64_control_state_t * ) current_state; + pfmw_param_t *evt = &( this_state->evt ); + pfmlib_input_param_t *inp = &evt->inp; + pfmlib_ita2_input_param_t *ita2_inp = + &( this_state->ita_lib_param.ita2_param.ita2_input_param ); + pfmlib_ita2_output_param_t *ita2_outp = + &( this_state->ita_lib_param.ita2_param.ita2_output_param ); + pfmlib_mont_input_param_t *mont_inp = + &( this_state->ita_lib_param.mont_param.mont_input_param ); + pfmlib_mont_output_param_t *mont_outp = + &( this_state->ita_lib_param.mont_param.mont_output_param ); + + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM2_PMU: + + if ( ( unsigned long ) option->address_range.start == + ( unsigned long ) option->address_range.end || + ( ( unsigned long ) option->address_range.start == 0 && + ( unsigned long ) option->address_range.end == 0 ) ) + return ( PAPI_EINVAL ); + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + memset( &ita2_inp->pfp_ita2_irange, 0, + sizeof ( pfmlib_ita2_input_rr_t ) ); + memset( ita2_outp, 0, sizeof ( pfmlib_ita2_output_param_t ) ); + inp->pfp_dfl_plm = PFM_PLM3; + ita2_inp->pfp_ita2_irange.rr_used = 1; + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start = + ( unsigned long ) option->address_range.start; + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end = + ( unsigned long ) option->address_range.end; + SUBDBG + ( "++++ before code range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start, + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end, + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end - + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start, + ita2_outp->pfp_ita2_irange.rr_nbr_used >> 1, + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_soff, + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_eoff ); + + /* + * let the library figure out the values for the PMCS + */ + if ( ( ret = pfmw_dispatch_events( evt ) ) != PFMLIB_SUCCESS ) { + SUBDBG( "cannot configure events: %s\n", pfm_strerror( ret ) ); + } + + SUBDBG + ( "++++ code range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start, + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end, + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_end - + ita2_inp->pfp_ita2_irange.rr_limits[0].rr_start, + ita2_outp->pfp_ita2_irange.rr_nbr_used >> 1, + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_soff, + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_eoff ); + if ( ( ret = install_irange( ctx, current_state ) ) == PAPI_OK ) { + option->address_range.start_off = + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_soff; + option->address_range.end_off = + ita2_outp->pfp_ita2_irange.rr_infos[0].rr_eoff; + } + + break; + case PFMLIB_MONTECITO_PMU: + + if ( ( unsigned long ) option->address_range.start == + ( unsigned long ) option->address_range.end || + ( ( unsigned long ) option->address_range.start == 0 && + ( unsigned long ) option->address_range.end == 0 ) ) + return ( PAPI_EINVAL ); + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + memset( &mont_inp->pfp_mont_irange, 0, + sizeof ( pfmlib_mont_input_rr_t ) ); + memset( mont_outp, 0, sizeof ( pfmlib_mont_output_param_t ) ); + inp->pfp_dfl_plm = PFM_PLM3; + mont_inp->pfp_mont_irange.rr_used = 1; + mont_inp->pfp_mont_irange.rr_limits[0].rr_start = + ( unsigned long ) option->address_range.start; + mont_inp->pfp_mont_irange.rr_limits[0].rr_end = + ( unsigned long ) option->address_range.end; + SUBDBG + ( "++++ before code range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + mont_inp->pfp_mont_irange.rr_limits[0].rr_start, + mont_inp->pfp_mont_irange.rr_limits[0].rr_end, + mont_inp->pfp_mont_irange.rr_limits[0].rr_end - + mont_inp->pfp_mont_irange.rr_limits[0].rr_start, + mont_outp->pfp_mont_irange.rr_nbr_used >> 1, + mont_outp->pfp_mont_irange.rr_infos[0].rr_soff, + mont_outp->pfp_mont_irange.rr_infos[0].rr_eoff ); + + /* + * let the library figure out the values for the PMCS + */ + if ( ( ret = pfmw_dispatch_events( evt ) ) != PFMLIB_SUCCESS ) { + SUBDBG( "cannot configure events: %s\n", pfm_strerror( ret ) ); + } + + SUBDBG + ( "++++ code range : [%#016lx-%#016lx=%ld]: %d pair of debug registers used\n" + " start_offset:-%#lx end_offset:+%#lx\n", + mont_inp->pfp_mont_irange.rr_limits[0].rr_start, + mont_inp->pfp_mont_irange.rr_limits[0].rr_end, + mont_inp->pfp_mont_irange.rr_limits[0].rr_end - + mont_inp->pfp_mont_irange.rr_limits[0].rr_start, + mont_outp->pfp_mont_irange.rr_nbr_used >> 1, + mont_outp->pfp_mont_irange.rr_infos[0].rr_soff, + mont_outp->pfp_mont_irange.rr_infos[0].rr_eoff ); + if ( ( ret = install_irange( ctx, current_state ) ) == PAPI_OK ) { + option->address_range.start_off = + mont_outp->pfp_mont_irange.rr_infos[0].rr_soff; + option->address_range.end_off = + mont_outp->pfp_mont_irange.rr_infos[0].rr_eoff; + } + + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return PAPI_ENOIMPL; + } + + return ret; +} + +static int +pfmw_get_num_counters( int *num ) +{ + unsigned int tmp; + if ( pfm_get_num_counters( &tmp ) != PFMLIB_SUCCESS ) + return ( PAPI_ESYS ); + *num = tmp; + return ( PAPI_OK ); +} + +static int +pfmw_get_num_events( int *num ) +{ + unsigned int tmp; + if ( pfm_get_num_events( &tmp ) != PFMLIB_SUCCESS ) + return ( PAPI_ESYS ); + *num = tmp; + return ( PAPI_OK ); +} + + +/* Globals declared extern elsewhere */ + +hwi_search_t *preset_search_map; +extern papi_vector_t _ia64_vector; + +unsigned int PAPI_NATIVE_EVENT_AND_MASK = 0x000003ff; +unsigned int PAPI_NATIVE_EVENT_SHIFT = 0; +unsigned int PAPI_NATIVE_UMASK_AND_MASK = 0x03fffc00; +unsigned int PAPI_NATIVE_UMASK_MAX = 16; +unsigned int PAPI_NATIVE_UMASK_SHIFT = 10; + +/* Static locals */ + +int _perfmon2_pfm_pmu_type = -1; + +/* +static papi_svector_t _linux_ia64_table[] = { + {(void (*)())_papi_hwd_update_shlib_info, VEC_PAPI_HWD_UPDATE_SHLIB_INFO}, + {(void (*)())_papi_hwd_init, VEC_PAPI_HWD_INIT}, + {(void (*)())_papi_hwd_init_control_state, VEC_PAPI_HWD_INIT_CONTROL_STATE}, + {(void (*)())_papi_hwd_dispatch_timer, VEC_PAPI_HWD_DISPATCH_TIMER}, + {(void (*)())_papi_hwd_ctl, VEC_PAPI_HWD_CTL}, + {(void (*)())_papi_hwd_get_real_usec, VEC_PAPI_HWD_GET_REAL_USEC}, + {(void (*)())_papi_hwd_get_real_cycles, VEC_PAPI_HWD_GET_REAL_CYCLES}, + {(void (*)())_papi_hwd_get_virt_cycles, VEC_PAPI_HWD_GET_VIRT_CYCLES}, + {(void (*)())_papi_hwd_get_virt_usec, VEC_PAPI_HWD_GET_VIRT_USEC}, + {(void (*)())_papi_hwd_update_control_state,VEC_PAPI_HWD_UPDATE_CONTROL_STATE}, + {(void (*)())_papi_hwd_start, VEC_PAPI_HWD_START }, + {(void (*)())_papi_hwd_stop, VEC_PAPI_HWD_STOP }, + {(void (*)())_papi_hwd_read, VEC_PAPI_HWD_READ }, + {(void (*)())_papi_hwd_shutdown, VEC_PAPI_HWD_SHUTDOWN }, + {(void (*)())_papi_hwd_reset, VEC_PAPI_HWD_RESET}, + {(void (*)())_papi_hwd_set_profile, VEC_PAPI_HWD_SET_PROFILE}, + {(void (*)())_papi_hwd_stop_profiling, VEC_PAPI_HWD_STOP_PROFILING}, + {(void (*)())_papi_hwd_get_dmem_info, VEC_PAPI_HWD_GET_DMEM_INFO}, + {(void (*)())_papi_hwd_set_overflow, VEC_PAPI_HWD_SET_OVERFLOW}, + {(void (*)())_papi_hwd_ntv_enum_events, VEC_PAPI_HWD_NTV_ENUM_EVENTS}, + {(void (*)())_papi_hwd_ntv_code_to_name, VEC_PAPI_HWD_NTV_CODE_TO_NAME}, + {(void (*)())_papi_hwd_ntv_code_to_descr, VEC_PAPI_HWD_NTV_CODE_TO_DESCR}, + {NULL, VEC_PAPI_END} +}; +*/ + +static itanium_preset_search_t ia1_preset_search_map[] = { + {PAPI_L1_TCM, DERIVED_ADD, + {"L1D_READ_MISSES_RETIRED", "L2_INST_DEMAND_READS"}, {0}}, + {PAPI_L1_ICM, 0, {"L2_INST_DEMAND_READS"}, {0}}, + {PAPI_L1_DCM, 0, {"L1D_READ_MISSES_RETIRED"}, {0}}, + {PAPI_L2_TCM, 0, {"L2_MISSES"}, {0}}, + {PAPI_L2_DCM, DERIVED_SUB, {"L2_MISSES", "L3_READS_INST_READS_ALL"}, {0}}, + {PAPI_L2_ICM, 0, {"L3_READS_INST_READS_ALL"}, {0}}, + {PAPI_L3_TCM, 0, {"L3_MISSES"}, {0}}, + {PAPI_L3_ICM, 0, {"L3_READS_INST_READS_MISS"}, {0}}, + {PAPI_L3_DCM, DERIVED_ADD, + {"L3_READS_DATA_READS_MISS", "L3_WRITES_DATA_WRITES_MISS"}, {0}}, + {PAPI_L3_LDM, 0, {"L3_READS_DATA_READS_MISS"}, {0}}, + {PAPI_L3_STM, 0, {"L3_WRITES_DATA_WRITES_MISS"}, {0}}, + {PAPI_L1_LDM, 0, {"L1D_READ_MISSES_RETIRED"}, {0}}, + {PAPI_L2_LDM, 0, {"L3_READS_DATA_READS_ALL"}, {0}}, + {PAPI_L2_STM, 0, {"L3_WRITES_ALL_WRITES_ALL"}, {0}}, + {PAPI_L3_DCH, DERIVED_ADD, + {"L3_READS_DATA_READS_HIT", "L3_WRITES_DATA_WRITES_HIT"}, {0}}, + {PAPI_L1_DCH, DERIVED_SUB, {"L1D_READS_RETIRED", "L1D_READ_MISSES_RETIRED"}, + {0}}, + {PAPI_L1_DCA, 0, {"L1D_READS_RETIRED"}, {0}}, + {PAPI_L2_DCA, 0, {"L2_DATA_REFERENCES_ALL"}, {0}}, + {PAPI_L3_DCA, DERIVED_ADD, + {"L3_READS_DATA_READS_ALL", "L3_WRITES_DATA_WRITES_ALL"}, {0}}, + {PAPI_L2_DCR, 0, {"L2_DATA_REFERENCES_READS"}, {0}}, + {PAPI_L3_DCR, 0, {"L3_READS_DATA_READS_ALL"}, {0}}, + {PAPI_L2_DCW, 0, {"L2_DATA_REFERENCES_WRITES"}, {0}}, + {PAPI_L3_DCW, 0, {"L3_WRITES_DATA_WRITES_ALL"}, {0}}, + {PAPI_L3_ICH, 0, {"L3_READS_INST_READS_HIT"}, {0}}, + {PAPI_L1_ICR, DERIVED_ADD, {"L1I_PREFETCH_READS", "L1I_DEMAND_READS"}, {0}}, + {PAPI_L2_ICR, DERIVED_ADD, + {"L2_INST_DEMAND_READS", "L2_INST_PREFETCH_READS"}, {0}}, + {PAPI_L3_ICR, 0, {"L3_READS_INST_READS_ALL"}, {0}}, + {PAPI_TLB_DM, 0, {"DTLB_MISSES"}, {0}}, + {PAPI_TLB_IM, 0, {"ITLB_MISSES_FETCH"}, {0}}, + {PAPI_MEM_SCY, 0, {"MEMORY_CYCLE"}, {0}}, + {PAPI_STL_ICY, 0, {"UNSTALLED_BACKEND_CYCLE"}, {0}}, + {PAPI_BR_INS, 0, {"BRANCH_EVENT"}, {0}}, + {PAPI_BR_PRC, 0, {"BRANCH_PREDICTOR_ALL_CORRECT_PREDICTIONS"}, {0}}, + {PAPI_BR_MSP, DERIVED_ADD, + {"BRANCH_PREDICTOR_ALL_WRONG_PATH", "BRANCH_PREDICTOR_ALL_WRONG_TARGET"}, + {0}}, + {PAPI_TOT_CYC, 0, {"CPU_CYCLES"}, {0}}, + {PAPI_FP_OPS, DERIVED_ADD, {"FP_OPS_RETIRED_HI", "FP_OPS_RETIRED_LO"}, {0}}, + {PAPI_TOT_INS, 0, {"IA64_INST_RETIRED"}, {0}}, + {PAPI_LD_INS, 0, {"LOADS_RETIRED"}, {0}}, + {PAPI_SR_INS, 0, {"STORES_RETIRED"}, {0}}, + {PAPI_LST_INS, DERIVED_ADD, {"LOADS_RETIRED", "STORES_RETIRED"}, {0}}, + {0, 0, {0}, {0}} +}; + +static itanium_preset_search_t ia2_preset_search_map[] = { + {PAPI_CA_SNP, 0, {"BUS_SNOOPS_SELF"}, {0}}, + {PAPI_CA_INV, DERIVED_ADD, + {"BUS_MEM_READ_BRIL_SELF", "BUS_MEM_READ_BIL_SELF"}, {0}}, + {PAPI_TLB_TL, DERIVED_ADD, {"ITLB_MISSES_FETCH_L2ITLB", "L2DTLB_MISSES"}, + {0}}, + {PAPI_STL_ICY, 0, {"DISP_STALLED"}, {0}}, + {PAPI_STL_CCY, 0, {"BACK_END_BUBBLE_ALL"}, {0}}, + {PAPI_TOT_IIS, 0, {"INST_DISPERSED"}, {0}}, + {PAPI_RES_STL, 0, {"BE_EXE_BUBBLE_ALL"}, {0}}, + {PAPI_FP_STAL, 0, {"BE_EXE_BUBBLE_FRALL"}, {0}}, + {PAPI_L2_TCR, DERIVED_ADD, + {"L2_DATA_REFERENCES_L2_DATA_READS", "L2_INST_DEMAND_READS", + "L2_INST_PREFETCHES"}, {0}}, + {PAPI_L1_TCM, DERIVED_ADD, {"L2_INST_DEMAND_READS", "L1D_READ_MISSES_ALL"}, + {0}}, + {PAPI_L1_ICM, 0, {"L2_INST_DEMAND_READS"}, {0}}, + {PAPI_L1_DCM, 0, {"L1D_READ_MISSES_ALL"}, {0}}, + {PAPI_L2_TCM, 0, {"L2_MISSES"}, {0}}, + {PAPI_L2_DCM, DERIVED_SUB, {"L2_MISSES", "L3_READS_INST_FETCH_ALL"}, {0}}, + {PAPI_L2_ICM, 0, {"L3_READS_INST_FETCH_ALL"}, {0}}, + {PAPI_L3_TCM, 0, {"L3_MISSES"}, {0}}, + {PAPI_L3_ICM, 0, {"L3_READS_INST_FETCH_MISS"}, {0}}, + {PAPI_L3_DCM, DERIVED_ADD, + {"L3_READS_DATA_READ_MISS", "L3_WRITES_DATA_WRITE_MISS"}, {0}}, + {PAPI_L3_LDM, 0, {"L3_READS_ALL_MISS"}, {0}}, + {PAPI_L3_STM, 0, {"L3_WRITES_DATA_WRITE_MISS"}, {0}}, + {PAPI_L1_LDM, DERIVED_ADD, {"L1D_READ_MISSES_ALL", "L2_INST_DEMAND_READS"}, + {0}}, + {PAPI_L2_LDM, 0, {"L3_READS_ALL_ALL"}, {0}}, + {PAPI_L2_STM, 0, {"L3_WRITES_ALL_ALL"}, {0}}, + {PAPI_L1_DCH, DERIVED_SUB, {"L1D_READS_SET1", "L1D_READ_MISSES_ALL"}, {0}}, + {PAPI_L2_DCH, DERIVED_SUB, {"L2_DATA_REFERENCES_L2_ALL", "L2_MISSES"}, {0}}, + {PAPI_L3_DCH, DERIVED_ADD, + {"L3_READS_DATA_READ_HIT", "L3_WRITES_DATA_WRITE_HIT"}, {0}}, + {PAPI_L1_DCA, 0, {"L1D_READS_SET1"}, {0}}, + {PAPI_L2_DCA, 0, {"L2_DATA_REFERENCES_L2_ALL"}, {0}}, + {PAPI_L3_DCA, DERIVED_ADD, + {"L3_READS_DATA_READ_ALL", "L3_WRITES_DATA_WRITE_ALL"}, {0}}, + {PAPI_L1_DCR, 0, {"L1D_READS_SET1"}, {0}}, + {PAPI_L2_DCR, 0, {"L2_DATA_REFERENCES_L2_DATA_READS"}, {0}}, + {PAPI_L3_DCR, 0, {"L3_READS_DATA_READ_ALL"}, {0}}, + {PAPI_L2_DCW, 0, {"L2_DATA_REFERENCES_L2_DATA_WRITES"}, {0}}, + {PAPI_L3_DCW, 0, {"L3_WRITES_DATA_WRITE_ALL"}, {0}}, + {PAPI_L3_ICH, 0, {"L3_READS_DINST_FETCH_HIT"}, {0}}, + {PAPI_L1_ICR, DERIVED_ADD, {"L1I_PREFETCHES", "L1I_READS"}, {0}}, + {PAPI_L2_ICR, DERIVED_ADD, {"L2_INST_DEMAND_READS", "L2_INST_PREFETCHES"}, + {0}}, + {PAPI_L3_ICR, 0, {"L3_READS_INST_FETCH_ALL"}, {0}}, + {PAPI_L1_ICA, DERIVED_ADD, {"L1I_PREFETCHES", "L1I_READS"}, {0}}, + {PAPI_L2_TCH, DERIVED_SUB, {"L2_REFERENCES", "L2_MISSES"}, {0}}, + {PAPI_L3_TCH, DERIVED_SUB, {"L3_REFERENCES", "L3_MISSES"}, {0}}, + {PAPI_L2_TCA, 0, {"L2_REFERENCES"}, {0}}, + {PAPI_L3_TCA, 0, {"L3_REFERENCES"}, {0}}, + {PAPI_L3_TCR, 0, {"L3_READS_ALL_ALL"}, {0}}, + {PAPI_L3_TCW, 0, {"L3_WRITES_ALL_ALL"}, {0}}, + {PAPI_TLB_DM, 0, {"L2DTLB_MISSES"}, {0}}, + {PAPI_TLB_IM, 0, {"ITLB_MISSES_FETCH_L2ITLB"}, {0}}, + {PAPI_BR_INS, 0, {"BRANCH_EVENT"}, {0}}, + {PAPI_BR_PRC, 0, {"BR_MISPRED_DETAIL_ALL_CORRECT_PRED"}, {0}}, + {PAPI_BR_MSP, DERIVED_ADD, + {"BR_MISPRED_DETAIL_ALL_WRONG_PATH", "BR_MISPRED_DETAIL_ALL_WRONG_TARGET"}, + {0}}, + {PAPI_TOT_CYC, 0, {"CPU_CYCLES"}, {0}}, + {PAPI_FP_OPS, 0, {"FP_OPS_RETIRED"}, {0}}, + {PAPI_TOT_INS, DERIVED_ADD, {"IA64_INST_RETIRED", "IA32_INST_RETIRED"}, + {0}}, + {PAPI_LD_INS, 0, {"LOADS_RETIRED"}, {0}}, + {PAPI_SR_INS, 0, {"STORES_RETIRED"}, {0}}, + {PAPI_L2_ICA, 0, {"L2_INST_DEMAND_READS"}, {0}}, + {PAPI_L3_ICA, 0, {"L3_READS_INST_FETCH_ALL"}, {0}}, + {PAPI_L1_TCR, DERIVED_ADD, {"L1D_READS_SET0", "L1I_READS"}, {0}}, + {PAPI_L1_TCA, DERIVED_ADD, {"L1D_READS_SET0", "L1I_READS"}, {0}}, + {PAPI_L2_TCW, 0, {"L2_DATA_REFERENCES_L2_DATA_WRITES"}, {0}}, + {0, 0, {0}, {0}} +}; + +static itanium_preset_search_t ia3_preset_search_map[] = { +/* not sure */ + {PAPI_CA_SNP, 0, {"BUS_SNOOP_STALL_CYCLES_ANY"}, {0}}, + {PAPI_CA_INV, DERIVED_ADD, + {"BUS_MEM_READ_BRIL_SELF", "BUS_MEM_READ_BIL_SELF"}, {0}}, +/* should be OK */ + {PAPI_TLB_TL, DERIVED_ADD, {"ITLB_MISSES_FETCH_L2ITLB", "L2DTLB_MISSES"}, + {0}}, + {PAPI_STL_ICY, 0, {"DISP_STALLED"}, {0}}, + {PAPI_STL_CCY, 0, {"BACK_END_BUBBLE_ALL"}, {0}}, + {PAPI_TOT_IIS, 0, {"INST_DISPERSED"}, {0}}, + {PAPI_RES_STL, 0, {"BE_EXE_BUBBLE_ALL"}, {0}}, + {PAPI_FP_STAL, 0, {"BE_EXE_BUBBLE_FRALL"}, {0}}, +/* should be OK */ + {PAPI_L2_TCR, DERIVED_ADD, + {"L2D_REFERENCES_READS", "L2I_READS_ALL_DMND", "L2I_READS_ALL_PFTCH"}, + {0}}, +/* what is the correct name here: L2I_READS_ALL_DMND or L2I_DEMANDS_READ ? + * do not have papi_native_avail at this time, going to use L2I_READS_ALL_DMND always + * just replace on demand + */ + {PAPI_L1_TCM, DERIVED_ADD, {"L2I_READS_ALL_DMND", "L1D_READ_MISSES_ALL"}, + {0}}, + {PAPI_L1_ICM, 0, {"L2I_READS_ALL_DMND"}, {0}}, + {PAPI_L1_DCM, 0, {"L1D_READ_MISSES_ALL"}, {0}}, + {PAPI_L2_TCM, 0, {"L2I_READS_MISS_ALL", "L2D_MISSES"}, {0}}, + {PAPI_L2_DCM, DERIVED_SUB, {"L2D_MISSES"}, {0}}, + {PAPI_L2_ICM, 0, {"L2I_READS_MISS_ALL"}, {0}}, + {PAPI_L3_TCM, 0, {"L3_MISSES"}, {0}}, + {PAPI_L3_ICM, 0, {"L3_READS_INST_FETCH_MISS:M:E:S:I"}, {0}}, + {PAPI_L3_DCM, DERIVED_ADD, + {"L3_READS_DATA_READ_MISS:M:E:S:I", "L3_WRITES_DATA_WRITE_MISS:M:E:S:I"}, + {0}}, + {PAPI_L3_LDM, 0, {"L3_READS_ALL_MISS:M:E:S:I"}, {0}}, + {PAPI_L3_STM, 0, {"L3_WRITES_DATA_WRITE_MISS:M:E:S:I"}, {0}}, +/* why L2_INST_DEMAND_READS has been added here for the Itanium II ? + * OLD: {PAPI_L1_LDM, DERIVED_ADD, {"L1D_READ_MISSES_ALL", "L2_INST_DEMAND_READS", 0, 0}} + */ + {PAPI_L1_LDM, 0, {"L1D_READ_MISSES_ALL"}, {0}}, + {PAPI_L2_LDM, 0, {"L3_READS_ALL_ALL:M:E:S:I"}, {0}}, + {PAPI_L2_STM, 0, {"L3_WRITES_ALL_ALL:M:E:S:I"}, {0}}, + {PAPI_L1_DCH, DERIVED_SUB, {"L1D_READS_SET1", "L1D_READ_MISSES_ALL"}, {0}}, + {PAPI_L2_DCH, DERIVED_SUB, {"L2D_REFERENCES_ALL", "L2D_MISSES"}, {0}}, + {PAPI_L3_DCH, DERIVED_ADD, + {"L3_READS_DATA_READ_HIT:M:E:S:I", "L3_WRITES_DATA_WRITE_HIT:M:E:S:I"}, + {0}}, + {PAPI_L1_DCA, 0, {"L1D_READS_SET1"}, {0}}, + {PAPI_L2_DCA, 0, {"L2D_REFERENCES_ALL"}, {0}}, + {PAPI_L3_DCA, 0, {"L3_REFERENCES"}, {0}}, + {PAPI_L1_DCR, 0, {"L1D_READS_SET1"}, {0}}, + {PAPI_L2_DCR, 0, {"L2D_REFERENCES_READS"}, {0}}, + {PAPI_L3_DCR, 0, {"L3_READS_DATA_READ_ALL:M:E:S:I"}, {0}}, + {PAPI_L2_DCW, 0, {"L2D_REFERENCES_WRITES"}, {0}}, + {PAPI_L3_DCW, 0, {"L3_WRITES_DATA_WRITE_ALL:M:E:S:I"}, {0}}, + {PAPI_L3_ICH, 0, {"L3_READS_DINST_FETCH_HIT:M:E:S:I"}, {0}}, + {PAPI_L1_ICR, DERIVED_ADD, {"L1I_PREFETCHES", "L1I_READS"}, {0}}, + {PAPI_L2_ICR, DERIVED_ADD, {"L2I_READS_ALL_DMND", "L2I_PREFETCHES"}, {0}}, + {PAPI_L3_ICR, 0, {"L3_READS_INST_FETCH_ALL:M:E:S:I"}, {0}}, + {PAPI_L1_ICA, DERIVED_ADD, {"L1I_PREFETCHES", "L1I_READS"}, {0}}, + {PAPI_L2_TCH, DERIVED_SUB, {"L2I_READS_HIT_ALL", "L2D_INSERT_HITS"}, {0}}, + {PAPI_L3_TCH, DERIVED_SUB, {"L3_REFERENCES", "L3_MISSES"}, {0}}, + {PAPI_L2_TCA, DERIVED_ADD, {"L2I_READS_ALL_ALL", "L2D_REFERENCES_ALL"}, + {0}}, + {PAPI_L3_TCA, 0, {"L3_REFERENCES"}, {0}}, + {PAPI_L3_TCR, 0, {"L3_READS_ALL_ALL:M:E:S:I"}, {0}}, + {PAPI_L3_TCW, 0, {"L3_WRITES_ALL_ALL:M:E:S:I"}, {0}}, + {PAPI_TLB_DM, 0, {"L2DTLB_MISSES"}, {0}}, + {PAPI_TLB_IM, 0, {"ITLB_MISSES_FETCH_L2ITLB"}, {0}}, + {PAPI_BR_INS, 0, {"BRANCH_EVENT"}, {0}}, + {PAPI_BR_PRC, 0, {"BR_MISPRED_DETAIL_ALL_CORRECT_PRED"}, {0}}, + {PAPI_BR_MSP, DERIVED_ADD, + {"BR_MISPRED_DETAIL_ALL_WRONG_PATH", "BR_MISPRED_DETAIL_ALL_WRONG_TARGET"}, + {0}}, + {PAPI_TOT_CYC, 0, {"CPU_OP_CYCLES_ALL"}, {0}}, + {PAPI_FP_OPS, 0, {"FP_OPS_RETIRED"}, {0}}, +// {PAPI_TOT_INS, DERIVED_ADD, {"IA64_INST_RETIRED", "IA32_INST_RETIRED"}, {0}}, + {PAPI_TOT_INS, 0, {"IA64_INST_RETIRED"}, {0}}, + {PAPI_LD_INS, 0, {"LOADS_RETIRED"}, {0}}, + {PAPI_SR_INS, 0, {"STORES_RETIRED"}, {0}}, + {PAPI_L2_ICA, 0, {"L2I_DEMAND_READS"}, {0}}, + {PAPI_L3_ICA, 0, {"L3_READS_INST_FETCH_ALL:M:E:S:I"}, {0}}, + {PAPI_L1_TCR, 0, {"L2I_READS_ALL_ALL"}, {0}}, +/* Why are TCA READS+READS_SET0? I used the same as PAPI_L1_TCR, because its an write through cache + * OLD: {PAPI_L1_TCA, DERIVED_ADD, {"L1D_READS_SET0", "L1I_READS"}, {0}}, + */ + {PAPI_L1_TCA, DERIVED_ADD, + {"L1I_PREFETCHES", "L1I_READS", "L1D_READS_SET0"}, {0}}, + {PAPI_L2_TCW, 0, {"L2D_REFERENCES_WRITES"}, {0}}, + {0, 0, {0}, {0}} +}; + +/* This component should never malloc anything. All allocation should be + done by the high level API. */ + + +/***************************************************************************** + * Code to support unit masks; only needed by Montecito and above * + *****************************************************************************/ +static int _ia64_modify_event( unsigned int event, int modifier ); + +/* Break a PAPI native event code into its composite event code and pfm mask bits */ +static int +_pfm_decode_native_event( unsigned int EventCode, unsigned int *event, + unsigned int *umask ) +{ + unsigned int tevent, major, minor; + + tevent = EventCode & PAPI_NATIVE_AND_MASK; + major = ( tevent & PAPI_NATIVE_EVENT_AND_MASK ) >> PAPI_NATIVE_EVENT_SHIFT; + if ( major >= ( unsigned int ) _ia64_vector.cmp_info.num_native_events ) + return ( PAPI_ENOEVNT ); + + minor = ( tevent & PAPI_NATIVE_UMASK_AND_MASK ) >> PAPI_NATIVE_UMASK_SHIFT; + *event = major; + *umask = minor; + SUBDBG( "EventCode %#08x is event %d, umask %#x\n", EventCode, major, + minor ); + return ( PAPI_OK ); +} + +/* This routine is used to step through all possible combinations of umask + values. It assumes that mask contains a valid combination of array indices + for this event. */ +static int +encode_native_event_raw( unsigned int event, unsigned int mask ) +{ + unsigned int tmp = event << PAPI_NATIVE_EVENT_SHIFT; + SUBDBG( "Old native index was %#08x with %#08x mask\n", tmp, mask ); + tmp = tmp | ( mask << PAPI_NATIVE_UMASK_SHIFT ); + SUBDBG( "New encoding is %#08x\n", tmp | PAPI_NATIVE_MASK ); + return ( tmp | PAPI_NATIVE_MASK ); +} + +/* convert a collection of pfm mask bits into an array of pfm mask indices */ +static int +prepare_umask( unsigned int foo, unsigned int *values ) +{ + unsigned int tmp = foo, i, j = 0; + + SUBDBG( "umask %#x\n", tmp ); + if ( foo == 0 ) + return 0; + while ( ( i = ffs( tmp ) ) ) { + tmp = tmp ^ ( 1 << ( i - 1 ) ); + values[j] = i - 1; + SUBDBG( "umask %d is %d\n", j, values[j] ); + j++; + } + return ( j ); +} + +int +_papi_pfm_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + unsigned int event, umask, num_masks; + int ret; + + if ( modifier == PAPI_ENUM_FIRST ) { + *EventCode = PAPI_NATIVE_MASK; /* assumes first native event is always 0x4000000 */ + return ( PAPI_OK ); + } + + if ( _pfm_decode_native_event( *EventCode, &event, &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + + ret = pfm_get_num_event_masks( event, &num_masks ); + SUBDBG( "pfm_get_num_event_masks: event=%d num_masks=%d\n", event, + num_masks ); + if ( ret != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_num_event_masks(%d,%p): %s", event, &num_masks, + pfm_strerror( ret ) ); + return ( PAPI_ENOEVNT ); + } + if ( num_masks > PAPI_NATIVE_UMASK_MAX ) + num_masks = PAPI_NATIVE_UMASK_MAX; + SUBDBG( "This is umask %d of %d\n", umask, num_masks ); + + if ( modifier == PAPI_ENUM_EVENTS ) { + if ( event < ( unsigned int ) _ia64_vector.cmp_info.num_native_events - 1 ) { + *EventCode = encode_native_event_raw( event + 1, 0 ); + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); + } else if ( modifier == PAPI_NTV_ENUM_UMASK_COMBOS ) { + if ( umask + 1 < ( unsigned ) ( 1 << num_masks ) ) { + *EventCode = encode_native_event_raw( event, umask + 1 ); + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); + } else if ( modifier == PAPI_NTV_ENUM_UMASKS ) { + int thisbit = ffs( umask ); + + SUBDBG( "First bit is %d in %08x\b\n", thisbit - 1, umask ); + thisbit = 1 << thisbit; + + if ( thisbit & ( ( 1 << num_masks ) - 1 ) ) { + *EventCode = encode_native_event_raw( event, thisbit ); + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); + } else { + while ( event++ < + ( unsigned int ) _ia64_vector.cmp_info.num_native_events - 1 ) { + *EventCode = encode_native_event_raw( event + 1, 0 ); + if ( _ia64_modify_event( event + 1, modifier ) ) + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); + } +} + +static int +_papi_pfm_ntv_name_to_code( const char *name, unsigned int *event_code ) +{ + pfmlib_event_t event; + unsigned int i, mask = 0; + int ret; + + SUBDBG( "pfm_find_full_event(%s,%p)\n", name, &event ); + ret = pfm_find_full_event( name, &event ); + if ( ret == PFMLIB_SUCCESS ) { + /* we can only capture PAPI_NATIVE_UMASK_MAX or fewer masks */ + if ( event.num_masks > PAPI_NATIVE_UMASK_MAX ) { + SUBDBG( "num_masks (%d) > max masks (%d)\n", event.num_masks, + PAPI_NATIVE_UMASK_MAX ); + return ( PAPI_ENOEVNT ); + } else { + /* no mask index can exceed PAPI_NATIVE_UMASK_MAX */ + for ( i = 0; i < event.num_masks; i++ ) { + if ( event.unit_masks[i] > PAPI_NATIVE_UMASK_MAX ) { + SUBDBG( "mask index (%d) > max masks (%d)\n", + event.unit_masks[i], PAPI_NATIVE_UMASK_MAX ); + return ( PAPI_ENOEVNT ); + } + mask |= 1 << event.unit_masks[i]; + } + *event_code = encode_native_event_raw( event.event, mask ); + SUBDBG( "event_code: %#x event: %d num_masks: %d\n", *event_code, + event.event, event.num_masks ); + return ( PAPI_OK ); + } + } else if ( ret == PFMLIB_ERR_UMASK ) { + ret = pfm_find_event( name, &event.event ); + if ( ret == PFMLIB_SUCCESS ) { + *event_code = encode_native_event_raw( event.event, 0 ); + return ( PAPI_OK ); + } + } + return ( PAPI_ENOEVNT ); +} + +int +_papi_pfm_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) +{ + int ret; + unsigned int event, umask; + pfmlib_event_t gete; + + memset( &gete, 0, sizeof ( gete ) ); + + if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + + gete.event = event; + gete.num_masks = prepare_umask( umask, gete.unit_masks ); + if ( gete.num_masks == 0 ) + ret = pfm_get_event_name( gete.event, ntv_name, len ); + else + ret = pfm_get_full_event_name( &gete, ntv_name, len ); + if ( ret != PFMLIB_SUCCESS ) { + char tmp[PAPI_2MAX_STR_LEN]; + pfm_get_event_name( gete.event, tmp, sizeof ( tmp ) ); + PAPIERROR + ( "pfm_get_full_event_name(%p(event %d,%s,%d masks),%p,%d): %d -- %s", + &gete, gete.event, tmp, gete.num_masks, ntv_name, len, ret, + pfm_strerror( ret ) ); + if ( ret == PFMLIB_ERR_FULL ) + return PAPI_EBUF; + return PAPI_ECMP; + } + return PAPI_OK; +} + +int +_papi_pfm_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len ) +{ + unsigned int event, umask; + char *eventd, **maskd, *tmp; + int i, ret, total_len = 0; + pfmlib_event_t gete; + + memset( &gete, 0, sizeof ( gete ) ); + + if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + + ret = pfm_get_event_description( event, &eventd ); + if ( ret != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_event_description(%d,%p): %s", + event, &eventd, pfm_strerror( ret ) ); + return ( PAPI_ENOEVNT ); + } + + if ( ( gete.num_masks = prepare_umask( umask, gete.unit_masks ) ) ) { + maskd = ( char ** ) malloc( gete.num_masks * sizeof ( char * ) ); + if ( maskd == NULL ) { + free( eventd ); + return ( PAPI_ENOMEM ); + } + for ( i = 0; i < ( int ) gete.num_masks; i++ ) { + ret = + pfm_get_event_mask_description( event, gete.unit_masks[i], + &maskd[i] ); + if ( ret != PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_get_event_mask_description(%d,%d,%p): %s", + event, umask, &maskd, pfm_strerror( ret ) ); + free( eventd ); + for ( ; i >= 0; i-- ) + free( maskd[i] ); + free( maskd ); + return ( PAPI_EINVAL ); + } + total_len += strlen( maskd[i] ); + } + tmp = + ( char * ) malloc( strlen( eventd ) + strlen( ", masks:" ) + + total_len + gete.num_masks + 1 ); + if ( tmp == NULL ) { + for ( i = gete.num_masks - 1; i >= 0; i-- ) + free( maskd[i] ); + free( maskd ); + free( eventd ); + } + tmp[0] = '\0'; + strcat( tmp, eventd ); + strcat( tmp, ", masks:" ); + for ( i = 0; i < ( int ) gete.num_masks; i++ ) { + if ( i != 0 ) + strcat( tmp, "," ); + strcat( tmp, maskd[i] ); + free( maskd[i] ); + } + free( maskd ); + } else { + tmp = ( char * ) malloc( strlen( eventd ) + 1 ); + if ( tmp == NULL ) { + free( eventd ); + return ( PAPI_ENOMEM ); + } + tmp[0] = '\0'; + strcat( tmp, eventd ); + free( eventd ); + } + strncpy( ntv_descr, tmp, len ); + if ( strlen( tmp ) > ( unsigned int ) len - 1 ) + ret = PAPI_EBUF; + else + ret = PAPI_OK; + free( tmp ); + return ( ret ); +} + +/***************************************************************************** + *****************************************************************************/ + +/* The values defined in this file may be X86-specific (2 general + purpose counters, 1 special purpose counter, etc.*/ + +/* PAPI stuff */ + +/* Low level functions, should not handle errors, just return codes. */ + +/* I want to keep the old way to define the preset search map. + In Itanium2, there are more than 400 native events, if I use the + index directly, it will be difficult for people to debug, so I + still keep the old way to define preset search table, but + I add this function to generate the preset search map in papi3 +*/ +int +generate_preset_search_map( hwi_search_t ** maploc, + itanium_preset_search_t * oldmap, int num_cnt ) +{ + ( void ) num_cnt; /*unused */ + int pnum, i = 0, cnt; + char **findme; + hwi_search_t *psmap; + + /* Count up the presets */ + while ( oldmap[i].preset ) + i++; + /* Add null entry */ + i++; + + psmap = ( hwi_search_t * ) papi_malloc( i * sizeof ( hwi_search_t ) ); + if ( psmap == NULL ) + return ( PAPI_ENOMEM ); + memset( psmap, 0x0, i * sizeof ( hwi_search_t ) ); + + pnum = 0; /* preset event counter */ + for ( i = 0; i <= PAPI_MAX_PRESET_EVENTS; i++ ) { + if ( oldmap[i].preset == 0 ) + break; + pnum++; + psmap[i].event_code = oldmap[i].preset; + psmap[i].data.derived = oldmap[i].derived; + strcpy( psmap[i].data.operation, oldmap[i].operation ); + findme = oldmap[i].findme; + cnt = 0; + while ( *findme != NULL ) { + if ( cnt == MAX_COUNTER_TERMS ) { + PAPIERROR( "Count (%d) == MAX_COUNTER_TERMS (%d)\n", cnt, + MAX_COUNTER_TERMS ); + papi_free( psmap ); + return ( PAPI_EBUG ); + } + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { + if ( _papi_pfm_ntv_name_to_code + ( *findme, + ( unsigned int * ) &psmap[i].data.native[cnt] ) != + PAPI_OK ) { + PAPIERROR( "_papi_pfm_ntv_name_to_code(%s) failed\n", + *findme ); + papi_free( psmap ); + return ( PAPI_EBUG ); + } else + psmap[i].data.native[cnt] ^= PAPI_NATIVE_MASK; + } else { + if ( pfm_find_event_byname + ( *findme, + ( unsigned int * ) &psmap[i].data.native[cnt] ) != + PFMLIB_SUCCESS ) { + PAPIERROR( "pfm_find_event_byname(%s) failed\n", *findme ); + papi_free( psmap ); + return ( PAPI_EBUG ); + } else + psmap[i].data.native[cnt] ^= PAPI_NATIVE_MASK; + } + + findme++; + cnt++; + } + psmap[i].data.native[cnt] = PAPI_NULL; + } + + *maploc = psmap; + return ( PAPI_OK ); +} + + +static char * +search_cpu_info( FILE * f, char *search_str, char *line ) +{ + /* This code courtesy of our friends in Germany. Thanks Rudolph Berrendorf! */ + /* See the PCL home page for the German version of PAPI. */ + + char *s; + + while ( fgets( line, 256, f ) != NULL ) { + if ( strstr( line, search_str ) != NULL ) { + /* ignore all characters in line up to : */ + for ( s = line; *s && ( *s != ':' ); ++s ); + if ( *s ) + return ( s ); + } + } + return ( NULL ); + + /* End stolen code */ +} + +int +_ia64_ita_set_domain( hwd_control_state_t * this_state, int domain ) +{ + int mode = 0, did = 0, i; + pfmw_param_t *evt = &( ( ia64_control_state_t * ) this_state )->evt; + + if ( domain & PAPI_DOM_USER ) { + did = 1; + mode |= PFM_PLM3; + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + mode |= PFM_PLM0; + } + + if ( !did ) + return ( PAPI_EINVAL ); + + PFMW_PEVT_DFLPLM( evt ) = mode; + + /* Bug fix in case we don't call pfmw_dispatch_events after this code */ + /* Who did this? This sucks, we should always call it here -PJM */ + + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + if ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ) { + pfm_ita_pmc_reg_t value; + SUBDBG( "slot %d, register %lud active, config value %#lx\n", + i, ( unsigned long ) ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ), + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + + PFMW_ARCH_REG_PMCVAL( value ) = PFMW_PEVT_PFPPC_REG_VAL( evt, i ); + value.pmc_ita_count_reg.pmc_plm = mode; + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) = PFMW_ARCH_REG_PMCVAL( value ); + + SUBDBG( "new config value %#lx\n", + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + } + } + + return PAPI_OK; +} + +int +_ia64_ita2_set_domain( hwd_control_state_t * this_state, int domain ) +{ + int mode = 0, did = 0, i; + pfmw_param_t *evt = &this_state->evt; + + if ( domain & PAPI_DOM_USER ) { + did = 1; + mode |= PFM_PLM3; + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + mode |= PFM_PLM0; + } + + if ( !did ) + return ( PAPI_EINVAL ); + + PFMW_PEVT_DFLPLM( evt ) = mode; + + /* Bug fix in case we don't call pfmw_dispatch_events after this code */ + /* Who did this? This sucks, we should always call it here -PJM */ + + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + if ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ) { + pfm_ita2_pmc_reg_t value; + SUBDBG( "slot %d, register %lud active, config value %#lx\n", + i, ( unsigned long ) ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ), + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + + PFMW_ARCH_REG_PMCVAL( value ) = PFMW_PEVT_PFPPC_REG_VAL( evt, i ); + value.pmc_ita2_counter_reg.pmc_plm = mode; + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) = PFMW_ARCH_REG_PMCVAL( value ); + + SUBDBG( "new config value %#lx\n", + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + } + } + + return ( PAPI_OK ); +} + +int +_ia64_mont_set_domain( hwd_control_state_t * this_state, int domain ) +{ + int mode = 0, did = 0, i; + pfmw_param_t *evt = &( ( ia64_control_state_t * ) this_state )->evt; + + if ( domain & PAPI_DOM_USER ) { + did = 1; + mode |= PFM_PLM3; + } + + if ( domain & PAPI_DOM_KERNEL ) { + did = 1; + mode |= PFM_PLM0; + } + + if ( !did ) + return ( PAPI_EINVAL ); + + PFMW_PEVT_DFLPLM( evt ) = mode; + + /* Bug fix in case we don't call pfmw_dispatch_events after this code */ + /* Who did this? This sucks, we should always call it here -PJM */ + + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + if ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ) { + pfm_mont_pmc_reg_t value; + SUBDBG( "slot %d, register %lud active, config value %#lx\n", + i, ( unsigned long ) ( PFMW_PEVT_PFPPC_REG_NUM( evt, i ) ), + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + + PFMW_ARCH_REG_PMCVAL( value ) = PFMW_PEVT_PFPPC_REG_VAL( evt, i ); + value.pmc_mont_counter_reg.pmc_plm = mode; + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) = PFMW_ARCH_REG_PMCVAL( value ); + + SUBDBG( "new config value %#lx\n", + PFMW_PEVT_PFPPC_REG_VAL( evt, i ) ); + } + } + + return ( PAPI_OK ); +} + +int +_ia64_set_domain( hwd_control_state_t * this_state, int domain ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( _ia64_ita_set_domain( this_state, domain ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( _ia64_ita2_set_domain( this_state, domain ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( _ia64_mont_set_domain( this_state, domain ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static int +set_granularity( hwd_control_state_t * this_state, int domain ) +{ + ( void ) this_state; /*unused */ + switch ( domain ) { + case PAPI_GRN_PROCG: + case PAPI_GRN_SYS: + case PAPI_GRN_SYS_CPU: + case PAPI_GRN_PROC: + return PAPI_ECMP; + case PAPI_GRN_THR: + break; + default: + return PAPI_EINVAL; + } + return PAPI_OK; +} + +int +_ia64_ita_read( hwd_context_t * ctx, hwd_control_state_t * machdep, + long long **events, int flags ) +{ + ( void ) flags; /*unused */ + unsigned int i; + pfarg_reg_t readem[_ia64_vector.cmp_info.num_cntrs]; + + pfmw_stop( ( ia64_context_t * ) ctx ); + memset( readem, 0x0, sizeof readem ); + +/* read the 4 counters, the high level function will process the + mapping for papi event to hardware counter +*/ + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) { + readem[i].reg_num = PMU_FIRST_COUNTER + i; + } + + if ( pfmw_perfmonctl + ( ( ( ia64_context_t * ) ctx )->tid, ( ( ia64_context_t * ) ctx )->fd, + PFM_READ_PMDS, readem, _ia64_vector.cmp_info.num_cntrs ) == -1 ) { + SUBDBG( "perfmonctl error READ_PMDS errno %d\n", errno ); + pfmw_start( ( ia64_context_t * ) ctx ); + return PAPI_ESYS; + } + + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) { + ( ( ia64_control_state_t * ) machdep )->counters[i] = + readem[i].reg_value; + SUBDBG( "read counters is %ld\n", readem[i].reg_value ); + } + + pfmw_param_t *pevt = &( ( ( ia64_control_state_t * ) machdep )->evt ); + pfm_ita_pmc_reg_t flop_hack; + /* special case, We need to scale FP_OPS_HI */ + for ( i = 0; i < PFMW_PEVT_EVTCOUNT( pevt ); i++ ) { + PFMW_ARCH_REG_PMCVAL( flop_hack ) = PFMW_PEVT_PFPPC_REG_VAL( pevt, i ); + if ( flop_hack.pmc_ita_count_reg.pmc_es == 0xa ) + ( ( ia64_control_state_t * ) machdep )->counters[i] *= 4; + } + + *events = ( ( ia64_control_state_t * ) machdep )->counters; + pfmw_start( ( ia64_context_t * ) ctx ); + return PAPI_OK; +} + + +int +_ia64_ita23_read( hwd_context_t * ctx, hwd_control_state_t * machdep, + long long **events, int flags ) +{ + ( void ) flags; /*unused */ + int i; + pfarg_reg_t readem[_ia64_vector.cmp_info.num_cntrs]; + + pfmw_stop( ( ia64_context_t * ) ctx ); + memset( readem, 0x0, sizeof readem ); + +/* read the 4 counters, the high level function will process the + mapping for papi event to hardware counter +*/ + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + readem[i].reg_num = PMU_FIRST_COUNTER + i; + } + + if ( pfmw_perfmonctl + ( ( ( ia64_context_t * ) ctx )->tid, ( ( ia64_context_t * ) ctx )->fd, + PFM_READ_PMDS, readem, _ia64_vector.cmp_info.num_cntrs ) == -1 ) { + SUBDBG( "perfmonctl error READ_PMDS errno %d\n", errno ); + pfmw_start( ( ia64_context_t * ) ctx ); + return PAPI_ESYS; + } + + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + ( ( ia64_control_state_t * ) machdep )->counters[i] = + readem[i].reg_value; + SUBDBG( "read counters is %ld\n", readem[i].reg_value ); + } + + *events = ( ( ia64_control_state_t * ) machdep )->counters; + pfmw_start( ( ia64_context_t * ) ctx ); + return PAPI_OK; +} + +int +_ia64_read( hwd_context_t * ctx, hwd_control_state_t * machdep, + long long **events, int flags ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( _ia64_ita_read( ctx, machdep, events, flags ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( _ia64_ita23_read( ctx, machdep, events, flags ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( _ia64_ita23_read( ctx, machdep, events, flags ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +/* This function should tell your kernel extension that your children + inherit performance register information and propagate the values up + upon child exit and parent wait. */ + +static int +set_inherit( int arg ) +{ + ( void ) arg; /*unused */ + return PAPI_ECMP; +} + +static int +set_default_domain( hwd_control_state_t * this_state, int domain ) +{ + return ( _ia64_set_domain( this_state, domain ) ); +} + +static int +set_default_granularity( hwd_control_state_t * this_state, int granularity ) +{ + return ( set_granularity( this_state, granularity ) ); +} + + + + +int +_ia64_init_component( int cidx ) +{ + ( void ) cidx; /*unused */ + int i, retval, type; + unsigned int version; + pfmlib_options_t pfmlib_options; + itanium_preset_search_t *ia_preset_search_map = NULL; + + /* Always initialize globals dynamically to handle forks properly. */ + + preset_search_map = NULL; + + /* Opened once for all threads. */ + if ( pfm_initialize( ) != PFMLIB_SUCCESS ) + return ( PAPI_ESYS ); + + if ( pfm_get_version( &version ) != PFMLIB_SUCCESS ) + return PAPI_ECMP; + + if ( PFM_VERSION_MAJOR( version ) != PFM_VERSION_MAJOR( PFMLIB_VERSION ) ) { + PAPIERROR( "Version mismatch of libpfm: compiled %#x vs. installed %#x", + PFM_VERSION_MAJOR( PFMLIB_VERSION ), + PFM_VERSION_MAJOR( version ) ); + return PAPI_ECMP; + } + + memset( &pfmlib_options, 0, sizeof ( pfmlib_options ) ); +#ifdef DEBUG + if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { + pfmlib_options.pfm_debug = 1; + pfmlib_options.pfm_verbose = 1; + } +#endif + + if ( pfm_set_options( &pfmlib_options ) ) + return ( PAPI_ESYS ); + + if ( pfm_get_pmu_type( &type ) != PFMLIB_SUCCESS ) + return ( PAPI_ESYS ); + + _perfmon2_pfm_pmu_type = type; + + /* Setup presets */ + + switch ( type ) { + case PFMLIB_ITANIUM_PMU: + ia_preset_search_map = ia1_preset_search_map; + break; + case PFMLIB_ITANIUM2_PMU: + ia_preset_search_map = ia2_preset_search_map; + break; + case PFMLIB_MONTECITO_PMU: + ia_preset_search_map = ia3_preset_search_map; + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", type ); + return ( PAPI_EBUG ); + } + + int ncnt, nnev; + + retval = pfmw_get_num_events( &nnev ); + if ( retval != PAPI_OK ) + return ( retval ); + + retval = pfmw_get_num_counters( &ncnt ); + if ( retval != PAPI_OK ) + return ( retval ); + + sprintf( _ia64_vector.cmp_info.support_version, + "%08x", PFMLIB_VERSION ); + sprintf( _ia64_vector.cmp_info.kernel_version, + "%08x", 2 << 16 ); /* 2.0 */ + + _ia64_vector.cmp_info.num_native_events = nnev; + _ia64_vector.cmp_info.num_cntrs = ncnt; + _ia64_vector.cmp_info.num_mpx_cntrs = ncnt; + + _ia64_vector.cmp_info.clock_ticks = sysconf( _SC_CLK_TCK ); + /* Put the signal handler in use to consume PFM_END_MSG's */ + _papi_hwi_start_signal( _ia64_vector.cmp_info.hardware_intr_sig, 1, + _ia64_vector.cmp_info.CmpIdx ); + + retval = mmtimer_setup(); + if ( retval ) + return ( retval ); + + retval = + generate_preset_search_map( &preset_search_map, ia_preset_search_map, + _ia64_vector.cmp_info.num_cntrs ); + if ( retval ) + return ( retval ); + + retval = _papi_hwi_setup_all_presets( preset_search_map, NULL ); + if ( retval ) + return ( retval ); + + /* get_memory_info has a CPU model argument that is not used, + * faking it here with hw_info.model which is not set by this + * component + */ + retval = _linux_get_memory_info( &_papi_hwi_system_info.hw_info, + _papi_hwi_system_info.hw_info.model ); + if ( retval ) + return ( retval ); + + return ( PAPI_OK ); +} + +int +_ia64_init( hwd_context_t * zero ) +{ +#if defined(USE_PROC_PTTIMER) + { + char buf[LINE_MAX]; + int fd; + sprintf( buf, "/proc/%d/task/%d/stat", getpid( ), mygettid( ) ); + fd = open( buf, O_RDONLY ); + if ( fd == -1 ) { + PAPIERROR( "open(%s)", buf ); + return ( PAPI_ESYS ); + } + zero->stat_fd = fd; + } +#endif + return ( pfmw_create_context( zero ) ); +} + +/* reset the hardware counters */ +int +_ia64_reset( hwd_context_t * ctx, hwd_control_state_t * machdep ) +{ + pfmw_param_t *pevt = &( machdep->evt ); + pfarg_reg_t writeem[MAX_COUNTERS]; + int i; + + pfmw_stop( ctx ); + memset( writeem, 0, sizeof writeem ); + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + /* Writing doesn't matter, we're just zeroing the counter. */ + writeem[i].reg_num = PMU_FIRST_COUNTER + i; + if ( PFMW_PEVT_PFPPC_REG_FLG( pevt, i ) & PFM_REGFL_OVFL_NOTIFY ) + writeem[i].reg_value = machdep->pd[i].reg_long_reset; + } + if ( pfmw_perfmonctl + ( ctx->tid, ctx->fd, PFM_WRITE_PMDS, writeem, + _ia64_vector.cmp_info.num_cntrs ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_WRITE_PMDS) errno %d", errno ); + return PAPI_ESYS; + } + pfmw_start( ctx ); + return ( PAPI_OK ); +} + +int +_ia64_start( hwd_context_t * ctx, hwd_control_state_t * current_state ) +{ + int i; + pfmw_param_t *pevt = &( current_state->evt ); + + pfmw_stop( ctx ); + +/* write PMCS */ + if ( pfmw_perfmonctl( ctx->tid, ctx->fd, PFM_WRITE_PMCS, + PFMW_PEVT_PFPPC( pevt ), + PFMW_PEVT_PFPPC_COUNT( pevt ) ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_WRITE_PMCS) errno %d", errno ); + return ( PAPI_ESYS ); + } + if ( pfmw_perfmonctl + ( ctx->tid, ctx->fd, PFM_WRITE_PMDS, PFMW_PEVT_PFPPD( pevt ), + PFMW_PEVT_EVTCOUNT( pevt ) ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_WRITE_PMDS) errno %d", errno ); + return ( PAPI_ESYS ); + } + +/* set the initial value of the hardware counter , if PAPI_overflow or + PAPI_profil are called, then the initial value is the threshold +*/ + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) + current_state->pd[i].reg_num = PMU_FIRST_COUNTER + i; + + if ( pfmw_perfmonctl( ctx->tid, ctx->fd, + PFM_WRITE_PMDS, current_state->pd, + _ia64_vector.cmp_info.num_cntrs ) == -1 ) { + PAPIERROR( "perfmonctl(WRITE_PMDS) errno %d", errno ); + return ( PAPI_ESYS ); + } + + pfmw_start( ctx ); + + return PAPI_OK; +} + +int +_ia64_stop( hwd_context_t * ctx, hwd_control_state_t * zero ) +{ + ( void ) zero; /*unused */ + pfmw_stop( ctx ); + return PAPI_OK; +} + +static int +round_requested_ns( int ns ) +{ + if ( ns < _papi_os_info.itimer_res_ns ) { + return _papi_os_info.itimer_res_ns; + } else { + int leftover_ns = ns % _papi_os_info.itimer_res_ns; + return ns + leftover_ns; + } +} + +int +_ia64_ctl( hwd_context_t * zero, int code, _papi_int_option_t * option ) +{ + int ret; + switch ( code ) { + case PAPI_DEFDOM: + return ( set_default_domain( option->domain.ESI->ctl_state, + option->domain.domain ) ); + case PAPI_DOMAIN: + return ( _ia64_set_domain + ( option->domain.ESI->ctl_state, option->domain.domain ) ); + case PAPI_DEFGRN: + return ( set_default_granularity + ( option->granularity.ESI->ctl_state, + option->granularity.granularity ) ); + case PAPI_GRANUL: + return ( set_granularity( option->granularity.ESI->ctl_state, + option->granularity.granularity ) ); +#if 0 + case PAPI_INHERIT: + return ( set_inherit( option->inherit.inherit ) ); +#endif + case PAPI_DATA_ADDRESS: + ret = + set_default_domain( option->address_range.ESI->ctl_state, + option->address_range.domain ); + if ( ret != PAPI_OK ) + return ( ret ); + set_drange( zero, option->address_range.ESI->ctl_state, option ); + return ( PAPI_OK ); + case PAPI_INSTR_ADDRESS: + ret = + set_default_domain( option->address_range.ESI->ctl_state, + option->address_range.domain ); + if ( ret != PAPI_OK ) + return ( ret ); + set_irange( zero, option->address_range.ESI->ctl_state, option ); + return ( PAPI_OK ); + case PAPI_DEF_ITIMER:{ + /* flags are currently ignored, eventually the flags will be able + to specify whether or not we use POSIX itimers (clock_gettimer) */ + if ( ( option->itimer.itimer_num == ITIMER_REAL ) && + ( option->itimer.itimer_sig != SIGALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && + ( option->itimer.itimer_sig != SIGVTALRM ) ) + return PAPI_EINVAL; + if ( ( option->itimer.itimer_num == ITIMER_PROF ) && + ( option->itimer.itimer_sig != SIGPROF ) ) + return PAPI_EINVAL; + if ( option->itimer.ns > 0 ) + option->itimer.ns = round_requested_ns( option->itimer.ns ); + /* At this point, we assume the user knows what he or + she is doing, they maybe doing something arch specific */ + return PAPI_OK; + } + case PAPI_DEF_MPX_NS:{ + option->multiplex.ns = round_requested_ns( option->multiplex.ns ); + return ( PAPI_OK ); + } + case PAPI_DEF_ITIMER_NS:{ + option->itimer.ns = round_requested_ns( option->itimer.ns ); + return ( PAPI_OK ); + } + default: + return ( PAPI_EINVAL ); + } +} + +int +_ia64_shutdown( hwd_context_t * ctx ) +{ +#if defined(USE_PROC_PTTIMER) + close( ctx->stat_fd ); +#endif + + return ( pfmw_destroy_context( ctx ) ); +} + +static int +ia64_ita_process_profile_buffer( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + ( void ) thread; /*unused */ + pfmw_smpl_hdr_t *hdr; + pfmw_smpl_entry_t *ent; + unsigned long buf_pos; + unsigned long entry_size; + int ret, reg_num, count, pos; + unsigned int i, EventCode = 0, eventindex, native_index = 0; + ia64_control_state_t *this_state; + pfm_ita_pmd_reg_t *reg; + unsigned long overflow_vector, pc; + + + if ( ( ESI->state & PAPI_PROFILING ) == 0 ) + return ( PAPI_EBUG ); + + this_state = ( ia64_control_state_t * ) ( ESI->ctl_state ); + hdr = ( pfmw_smpl_hdr_t * ) this_state->smpl_vaddr; + + entry_size = sizeof ( pfmw_smpl_entry_t ); + + /* + * walk through all the entries recorded in the buffer + */ + buf_pos = ( unsigned long ) ( hdr + 1 ); + for ( i = 0; i < hdr->hdr_count; i++ ) { + ret = 0; + ent = ( pfmw_smpl_entry_t * ) buf_pos; + /* PFM30 only one PMD overflows in each sample */ + overflow_vector = 1 << ent->ovfl_pmd; + + SUBDBG( "Entry %d PID:%d CPU:%d ovfl_vector:%#lx IIP:%#016lx\n", + i, ent->pid, ent->cpu, overflow_vector, ent->ip ); + + while ( overflow_vector ) { + reg_num = ffs( overflow_vector ) - 1; + /* find the event code */ + for ( count = 0; count < ESI->profile.event_counter; count++ ) { + eventindex = ESI->profile.EventIndex[count]; + pos = ESI->EventInfoArray[eventindex].pos[0]; + if ( pos + PMU_FIRST_COUNTER == reg_num ) { + EventCode = ESI->profile.EventCode[count]; + native_index = + ESI->NativeInfoArray[pos]. + ni_event & PAPI_NATIVE_AND_MASK; + break; + } + } + /* something is wrong */ + if ( count == ESI->profile.event_counter ) { + PAPIERROR + ( "wrong count: %d vs. ESI->profile.event_counter %d\n", + count, ESI->profile.event_counter ); + return ( PAPI_EBUG ); + } + + /* print entry header */ + pc = ent->ip; + if ( pfm_ita_is_dear( native_index ) ) { + reg = ( pfm_ita_pmd_reg_t * ) ( ent + 1 ); + reg++; + reg++; + pc = ( reg->pmd17_ita_reg.dear_iaddr << 4 ) | ( reg-> + pmd17_ita_reg. + dear_slot ); + /* adjust pointer position */ + buf_pos += ( hweight64( DEAR_REGS_MASK ) << 3 ); + } + + _papi_hwi_dispatch_profile( ESI, ( caddr_t ) pc, ( long long ) 0, + count ); + overflow_vector ^= ( unsigned long ) 1 << reg_num; + } + /* move to next entry */ + buf_pos += entry_size; + } /* end of if */ + return ( PAPI_OK ); +} + +static int +ia64_ita2_process_profile_buffer( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + ( void ) thread; /*unused */ + pfmw_smpl_hdr_t *hdr; + pfmw_smpl_entry_t *ent; + unsigned long buf_pos; + unsigned long entry_size; + int ret, reg_num, count, pos; + unsigned int i, EventCode = 0, eventindex, native_index = 0; + ia64_control_state_t *this_state; + pfm_ita2_pmd_reg_t *reg; + unsigned long overflow_vector, pc; + + + if ( ( ESI->state & PAPI_PROFILING ) == 0 ) + return ( PAPI_EBUG ); + + this_state = ( ia64_control_state_t * ) ( ESI->ctl_state ); + hdr = ( pfmw_smpl_hdr_t * ) ( this_state->smpl_vaddr ); + + entry_size = sizeof ( pfmw_smpl_entry_t ); + + /* + * walk through all the entries recorded in the buffer + */ + buf_pos = ( unsigned long ) ( hdr + 1 ); + for ( i = 0; i < hdr->hdr_count; i++ ) { + ret = 0; + ent = ( pfmw_smpl_entry_t * ) buf_pos; + /* PFM30 only one PMD overflows in each sample */ + overflow_vector = 1 << ent->ovfl_pmd; + + SUBDBG( "Entry %d PID:%d CPU:%d ovfl_vector:%#lx IIP:%#016lx\n", + i, ent->pid, ent->cpu, overflow_vector, ent->ip ); + + while ( overflow_vector ) { + reg_num = ffs( overflow_vector ) - 1; + /* find the event code */ + for ( count = 0; count < ESI->profile.event_counter; count++ ) { + eventindex = ESI->profile.EventIndex[count]; + pos = ESI->EventInfoArray[eventindex].pos[0]; + if ( pos + PMU_FIRST_COUNTER == reg_num ) { + EventCode = ESI->profile.EventCode[count]; + native_index = + ESI->NativeInfoArray[pos]. + ni_event & PAPI_NATIVE_AND_MASK; + break; + } + } + /* something is wrong */ + if ( count == ESI->profile.event_counter ) { + PAPIERROR + ( "wrong count: %d vs. ESI->profile.event_counter %d\n", + count, ESI->profile.event_counter ); + return ( PAPI_EBUG ); + } + + /* print entry header */ + pc = ent->ip; + if ( pfm_ita2_is_dear( native_index ) ) { + reg = ( pfm_ita2_pmd_reg_t * ) ( ent + 1 ); + reg++; + reg++; + pc = ( ( reg->pmd17_ita2_reg.dear_iaddr + + reg->pmd17_ita2_reg.dear_bn ) << 4 ) + | reg->pmd17_ita2_reg.dear_slot; + + /* adjust pointer position */ + buf_pos += ( hweight64( DEAR_REGS_MASK ) << 3 ); + } + + _papi_hwi_dispatch_profile( ESI, ( caddr_t ) pc, ( long long ) 0, + count ); + overflow_vector ^= ( unsigned long ) 1 << reg_num; + } + /* move to next entry */ + buf_pos += entry_size; + } /* end of if */ + return ( PAPI_OK ); +} + +static int +ia64_mont_process_profile_buffer( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + ( void ) thread; /*unused */ + pfmw_smpl_hdr_t *hdr; + pfmw_smpl_entry_t *ent; + unsigned long buf_pos; + unsigned long entry_size; + int ret, reg_num, count, pos; + unsigned int i, EventCode = 0, eventindex, native_index = 0; + ia64_control_state_t *this_state; + pfm_mont_pmd_reg_t *reg; + unsigned long overflow_vector, pc; + unsigned int umask; + + + if ( ( ESI->state & PAPI_PROFILING ) == 0 ) + return ( PAPI_EBUG ); + + this_state = ( ia64_control_state_t * ) ESI->ctl_state; + hdr = ( pfmw_smpl_hdr_t * ) this_state->smpl_vaddr; + + entry_size = sizeof ( pfmw_smpl_entry_t ); + + /* + * walk through all the entries recorded in the buffer + */ + buf_pos = ( unsigned long ) ( hdr + 1 ); + for ( i = 0; i < hdr->hdr_count; i++ ) { + ret = 0; + ent = ( pfmw_smpl_entry_t * ) buf_pos; + /* PFM30 only one PMD overflows in each sample */ + overflow_vector = 1 << ent->ovfl_pmd; + + SUBDBG( "Entry %d PID:%d CPU:%d ovfl_vector:%#lx IIP:%#016lx\n", + i, ent->pid, ent->cpu, overflow_vector, ent->ip ); + + while ( overflow_vector ) { + reg_num = ffs( overflow_vector ) - 1; + /* find the event code */ + for ( count = 0; count < ESI->profile.event_counter; count++ ) { + eventindex = ESI->profile.EventIndex[count]; + pos = ESI->EventInfoArray[eventindex].pos[0]; + if ( pos + PMU_FIRST_COUNTER == reg_num ) { + EventCode = ESI->profile.EventCode[count]; + if ( _pfm_decode_native_event + ( ESI->NativeInfoArray[pos].ni_event, &native_index, + &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + break; + } + } + /* something is wrong */ + if ( count == ESI->profile.event_counter ) { + PAPIERROR + ( "wrong count: %d vs. ESI->profile.event_counter %d\n", + count, ESI->profile.event_counter ); + return ( PAPI_EBUG ); + } + + /* print entry header */ + pc = ent->ip; + if ( pfm_mont_is_dear( native_index ) ) { + reg = ( pfm_mont_pmd_reg_t * ) ( ent + 1 ); + reg++; + reg++; + pc = ( ( reg->pmd36_mont_reg.dear_iaddr + + reg->pmd36_mont_reg.dear_bn ) << 4 ) + | reg->pmd36_mont_reg.dear_slot; + /* adjust pointer position */ + buf_pos += ( hweight64( DEAR_REGS_MASK ) << 3 ); + } + + _papi_hwi_dispatch_profile( ESI, ( caddr_t ) pc, ( long long ) 0, + count ); + overflow_vector ^= ( unsigned long ) 1 << reg_num; + } + /* move to next entry */ + buf_pos += entry_size; + } /* end of if */ + return ( PAPI_OK ); +} + +static int +ia64_process_profile_buffer( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( ia64_ita_process_profile_buffer( thread, ESI ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( ia64_ita2_process_profile_buffer( thread, ESI ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( ia64_mont_process_profile_buffer( thread, ESI ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +static void +ia64_dispatch_sigprof( int n, hwd_siginfo_t * info, hwd_ucontext_t *sc ) +{ + ( void ) n; /*unused */ + _papi_hwi_context_t ctx; + ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); + caddr_t address; + int cidx = _ia64_vector.cmp_info.CmpIdx; + +#if defined(DEBUG) + if ( thread == NULL ) { + PAPIERROR( "thread == NULL in _papi_hwd_dispatch_timer!" ); + return; + } +#endif + + ctx.si = info; + ctx.ucontext = sc; + address = GET_OVERFLOW_ADDRESS( ( ctx ) ); + + if ( ( thread == NULL ) || ( thread->running_eventset[cidx] == NULL ) ) { + SUBDBG( "%p, %p\n", thread, thread->running_eventset[cidx] ); + return; + } + + if ( thread->running_eventset[cidx]->overflow. + flags & PAPI_OVERFLOW_FORCE_SW ) { + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, NULL, 0, + 0, &thread, cidx ); + return; + } + + pfm_msg_t msg; + int ret, fd; + fd = info->si_fd; + retry: + ret = read( fd, &msg, sizeof ( msg ) ); + if ( ret == -1 ) { + if ( errno == EINTR ) { + SUBDBG( "read(%d) interrupted, retrying\n", fd ); + goto retry; + } else { + PAPIERROR( "read(%d): errno %d", fd, errno ); + } + } else if ( ret != sizeof ( msg ) ) { + PAPIERROR( "read(%d): short %d vs. %d bytes", fd, ret, sizeof ( msg ) ); + ret = -1; + } +#if defined(HAVE_PFM_MSG_TYPE) + if ( msg.type == PFM_MSG_END ) { + SUBDBG( "PFM_MSG_END\n" ); + return; + } + if ( msg.type != PFM_MSG_OVFL ) { + PAPIERROR( "unexpected msg type %d", msg.type ); + return; + } +#else + if ( msg.pfm_gen_msg.msg_type == PFM_MSG_END ) { + SUBDBG( "PFM_MSG_END\n" ); + return; + } + if ( msg.pfm_gen_msg.msg_type != PFM_MSG_OVFL ) { + PAPIERROR( "unexpected msg type %d", msg.pfm_gen_msg.msg_type ); + return; + } +#endif + if ( ret != -1 ) { + if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && + !( thread->running_eventset[cidx]->profile. + flags & PAPI_PROFIL_FORCE_SW ) ) + ia64_process_profile_buffer( thread, + thread->running_eventset[cidx] ); + else + _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, + NULL, + msg.pfm_ovfl_msg. + msg_ovfl_pmds[0] >> + PMU_FIRST_COUNTER, 0, + &thread, cidx ); + } + if ( pfmw_perfmonctl( 0, fd, PFM_RESTART, 0, 0 ) == -1 ) { + PAPIERROR( "perfmonctl(PFM_RESTART) errno %d, %s", errno, + strerror( errno ) ); + return; + } +} + +void +_ia64_dispatch_timer( int signal, hwd_siginfo_t * info, void *context ) +{ + ia64_dispatch_sigprof( signal, info, context ); +} + +static int +set_notify( EventSetInfo_t * ESI, int index, int value ) +{ + int *pos, count, i; + unsigned int hwcntr; + pfmw_param_t *pevt = + &( ( ( ia64_control_state_t * ) ESI->ctl_state )->evt ); + + pos = ESI->EventInfoArray[index].pos; + count = 0; + while ( pos[count] != -1 && count < _ia64_vector.cmp_info.num_cntrs ) { + hwcntr = pos[count] + PMU_FIRST_COUNTER; + for ( i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++ ) { + if ( PFMW_PEVT_PFPPC_REG_NUM( pevt, i ) == hwcntr ) { + SUBDBG( "Found hw counter %d in %d, flags %d\n", hwcntr, i, + value ); + PFMW_PEVT_PFPPC_REG_FLG( pevt, i ) = value; +/* + #ifdef PFM30 + if (value) + pevt->pc[i].reg_reset_pmds[0] = 1UL << pevt->pc[i].reg_num; + else + pevt->pc[i].reg_reset_pmds[0] = 0; + #endif +*/ + break; + } + } + count++; + } + return ( PAPI_OK ); +} + +int +_ia64_stop_profiling( ThreadInfo_t * thread, EventSetInfo_t * ESI ) +{ + int cidx = _ia64_vector.cmp_info.CmpIdx; + + pfmw_stop( thread->context[cidx] ); + return ( ia64_process_profile_buffer( thread, ESI ) ); +} + + +int +_ia64_set_profile( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + int cidx = _ia64_vector.cmp_info.CmpIdx; + hwd_control_state_t *this_state = ESI->ctl_state; + hwd_context_t *ctx = ESI->master->context[cidx]; + int ret; + + ret = _ia64_vector.set_overflow( ESI, EventIndex, threshold ); + if ( ret != PAPI_OK ) + return ret; + ret = pfmw_destroy_context( ctx ); + if ( ret != PAPI_OK ) + return ret; + if ( threshold == 0 ) + ret = pfmw_create_context( ctx ); + else + ret = + pfmw_recreate_context( ESI, ctx, &this_state->smpl_vaddr, + EventIndex ); + +//#warning "This should be handled in the high level layers" + ESI->state ^= PAPI_OVERFLOWING; + ESI->overflow.flags ^= PAPI_OVERFLOW_HARDWARE; + + return ( ret ); +} + +int +_ia64_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) +{ + hwd_control_state_t *this_state = ESI->ctl_state; + int j, retval = PAPI_OK, *pos; + int cidx = _ia64_vector.cmp_info.CmpIdx; + + pos = ESI->EventInfoArray[EventIndex].pos; + j = pos[0]; + SUBDBG( "Hardware counter %d used in overflow, threshold %d\n", j, + threshold ); + + if ( threshold == 0 ) { + /* Remove the signal handler */ + + retval = _papi_hwi_stop_signal( _ia64_vector.cmp_info.hardware_intr_sig ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* Remove the overflow notifier on the proper event. */ + + set_notify( ESI, EventIndex, 0 ); + + this_state->pd[j].reg_value = 0; + this_state->pd[j].reg_long_reset = 0; + this_state->pd[j].reg_short_reset = 0; + } else { + retval = + _papi_hwi_start_signal( _ia64_vector.cmp_info.hardware_intr_sig, 1, + cidx ); + if ( retval != PAPI_OK ) + return ( retval ); + + /* Set the overflow notifier on the proper event. Remember that selector */ + + set_notify( ESI, EventIndex, PFM_REGFL_OVFL_NOTIFY ); + + this_state->pd[j].reg_value = + ( ~0UL ) - ( unsigned long ) threshold + 1; + this_state->pd[j].reg_short_reset = + ( ~0UL ) - ( unsigned long ) threshold + 1; + this_state->pd[j].reg_long_reset = + ( ~0UL ) - ( unsigned long ) threshold + 1; + + } + return ( retval ); +} + +int +_ia64_ntv_code_to_name( unsigned int EventCode, char *ntv_name, int len ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) + return ( _papi_pfm_ntv_code_to_name( EventCode, ntv_name, len ) ); + else { + char name[PAPI_MAX_STR_LEN]; + int ret = 0; + + pfmw_get_event_name( name, EventCode ^ PAPI_NATIVE_MASK ); + + if ( ret != PAPI_OK ) + return ( PAPI_ENOEVNT ); + + strncpy( ntv_name, name, len ); + return ( PAPI_OK ); + } +} + +int +_ia64_ntv_code_to_descr( unsigned int EventCode, char *ntv_descr, int len ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) + return ( _papi_pfm_ntv_code_to_descr( EventCode, ntv_descr, len ) ); + else { +#if defined(HAVE_PFM_GET_EVENT_DESCRIPTION) + pfmw_get_event_description( EventCode ^ PAPI_NATIVE_MASK, ntv_descr, + len ); + return ( PAPI_OK ); +#else + return ( _ia64_ntv_code_to_name( EventCode, ntv_descr, len ) ); +#endif + } +} + +static int +_ia64_modify_event( unsigned int event, int modifier ) +{ + switch ( modifier ) { + case PAPI_NTV_ENUM_IARR: + return ( pfmw_support_iarr( event ) ); + case PAPI_NTV_ENUM_DARR: + return ( pfmw_support_darr( event ) ); + case PAPI_NTV_ENUM_OPCM: + return ( pfmw_support_opcm( event ) ); + case PAPI_NTV_ENUM_DEAR: + return ( pfmw_is_dear( event ) ); + case PAPI_NTV_ENUM_IEAR: + return ( pfmw_is_iear( event ) ); + default: + return ( 1 ); + } +} + +int +_ia64_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) + return ( _papi_pfm_ntv_enum_events( EventCode, modifier ) ); + else { + int index = *EventCode & PAPI_NATIVE_AND_MASK; + + if ( modifier == PAPI_ENUM_FIRST ) { + *EventCode = PAPI_NATIVE_MASK; + return ( PAPI_OK ); + } + + while ( index++ < _ia64_vector.cmp_info.num_native_events - 1 ) { + *EventCode += 1; + if ( _ia64_modify_event + ( ( *EventCode ^ PAPI_NATIVE_MASK ), modifier ) ) + return ( PAPI_OK ); + } + return ( PAPI_ENOEVNT ); + } +} + +int +_ia64_ita_init_control_state( hwd_control_state_t * this_state ) +{ + pfmw_param_t *evt; + pfmw_ita1_param_t *param; + ia64_control_state_t *ptr; + + ptr = ( ia64_control_state_t * ) this_state; + evt = &( ptr->evt ); + + param = &( ptr->ita_lib_param.ita_param ); + memset( evt, 0, sizeof ( pfmw_param_t ) ); + memset( param, 0, sizeof ( pfmw_ita1_param_t ) ); + + _ia64_ita_set_domain( this_state, _ia64_vector.cmp_info.default_domain ); +/* set library parameter pointer */ + + return ( PAPI_OK ); +} + +int +_ia64_ita2_init_control_state( hwd_control_state_t * this_state ) +{ + pfmw_param_t *evt; + pfmw_ita2_param_t *param; + ia64_control_state_t *ptr; + + ptr = ( ia64_control_state_t * ) this_state; + evt = &( ptr->evt ); + + param = &( ptr->ita_lib_param.ita2_param ); + memset( evt, 0, sizeof ( pfmw_param_t ) ); + memset( param, 0, sizeof ( pfmw_ita2_param_t ) ); + + _ia64_ita2_set_domain( this_state, _ia64_vector.cmp_info.default_domain ); +/* set library parameter pointer */ + evt->mod_inp = &( param->ita2_input_param ); + evt->mod_outp = &( param->ita2_output_param ); + + return ( PAPI_OK ); +} + +int +_ia64_mont_init_control_state( hwd_control_state_t * this_state ) +{ + pfmw_param_t *evt; + pfmw_mont_param_t *param; + ia64_control_state_t *ptr; + + ptr = ( ia64_control_state_t * ) this_state; + evt = &( ptr->evt ); + + param = &( ptr->ita_lib_param.mont_param ); + memset( evt, 0, sizeof ( pfmw_param_t ) ); + memset( param, 0, sizeof ( pfmw_mont_param_t ) ); + + _ia64_mont_set_domain( this_state, _ia64_vector.cmp_info.default_domain ); +/* set library parameter pointer */ + evt->mod_inp = &( param->mont_input_param ); + evt->mod_outp = &( param->mont_output_param ); + + return ( PAPI_OK ); +} + +int +_ia64_init_control_state( hwd_control_state_t * this_state ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( _ia64_ita_init_control_state( this_state ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( _ia64_ita2_init_control_state( this_state ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( _ia64_mont_init_control_state( this_state ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +void +_ia64_remove_native( hwd_control_state_t * this_state, + NativeInfo_t * nativeInfo ) +{ + ( void ) this_state; /*unused */ + ( void ) nativeInfo; /*unused */ + return; +} + +int +_ia64_mont_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * zero ) +{ + ( void ) zero; /*unused */ + int org_cnt; + pfmw_param_t *evt = &this_state->evt; + pfmw_param_t copy_evt; + unsigned int i, j, event, umask, EventCode; + pfmlib_event_t gete; + char name[128]; + + if ( count == 0 ) { + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) + PFMW_PEVT_EVENT( evt, i ) = 0; + PFMW_PEVT_EVTCOUNT( evt ) = 0; + memset( PFMW_PEVT_PFPPC( evt ), 0, sizeof ( PFMW_PEVT_PFPPC( evt ) ) ); + memset( &evt->inp.pfp_unavail_pmcs, 0, sizeof ( pfmlib_regmask_t ) ); + return ( PAPI_OK ); + } + +/* save the old data */ + org_cnt = PFMW_PEVT_EVTCOUNT( evt ); + + memcpy( ©_evt, evt, sizeof ( pfmw_param_t ) ); + + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) + PFMW_PEVT_EVENT( evt, i ) = 0; + PFMW_PEVT_EVTCOUNT( evt ) = 0; + memset( PFMW_PEVT_PFPPC( evt ), 0, sizeof ( PFMW_PEVT_PFPPC( evt ) ) ); + memset( &evt->inp.pfp_unavail_pmcs, 0, sizeof ( pfmlib_regmask_t ) ); + + SUBDBG( " original count is %d\n", org_cnt ); + +/* add new native events to the evt structure */ + for ( i = 0; i < ( unsigned int ) count; i++ ) { + memset( &gete, 0, sizeof ( gete ) ); + EventCode = native[i].ni_event; + _papi_pfm_ntv_code_to_name( EventCode, name, 128 ); + if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) + return ( PAPI_ENOEVNT ); + + SUBDBG( " evtcode=%#x evtindex=%d name: %s\n", EventCode, event, + name ); + + PFMW_PEVT_EVENT( evt, i ) = event; + evt->inp.pfp_events[i].num_masks = 0; + gete.event = event; + gete.num_masks = prepare_umask( umask, gete.unit_masks ); + if ( gete.num_masks ) { + evt->inp.pfp_events[i].num_masks = gete.num_masks; + for ( j = 0; j < gete.num_masks; j++ ) + evt->inp.pfp_events[i].unit_masks[j] = gete.unit_masks[j]; + } + } + PFMW_PEVT_EVTCOUNT( evt ) = count; + /* Recalcuate the pfmlib_param_t structure, may also signal conflict */ + if ( pfmw_dispatch_events( evt ) ) { + SUBDBG( "pfmw_dispatch_events fail\n" ); + /* recover the old data */ + PFMW_PEVT_EVTCOUNT( evt ) = org_cnt; + /*for (i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++) + PFMW_PEVT_EVENT(evt,i) = events[i]; + */ + memcpy( evt, ©_evt, sizeof ( pfmw_param_t ) ); + return ( PAPI_ECNFLCT ); + } + SUBDBG( "event_count=%d\n", PFMW_PEVT_EVTCOUNT( evt ) ); + + for ( i = 0; i < PFMW_PEVT_EVTCOUNT( evt ); i++ ) { + native[i].ni_position = PFMW_PEVT_PFPPC_REG_NUM( evt, i ) + - PMU_FIRST_COUNTER; + SUBDBG( "event_code is %d, reg_num is %d\n", + native[i].ni_event & PAPI_NATIVE_AND_MASK, + native[i].ni_position ); + } + + return ( PAPI_OK ); +} + +int +_ia64_ita_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * zero ) +{ + ( void ) zero; /*unused */ + int index, org_cnt; + unsigned int i; + pfmw_param_t *evt = &this_state->evt; + pfmw_param_t copy_evt; + + if ( count == 0 ) { + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) + PFMW_PEVT_EVENT( evt, i ) = 0; + PFMW_PEVT_EVTCOUNT( evt ) = 0; + memset( PFMW_PEVT_PFPPC( evt ), 0, sizeof ( PFMW_PEVT_PFPPC( evt ) ) ); + memset( &evt->inp.pfp_unavail_pmcs, 0, sizeof ( pfmlib_regmask_t ) ); + return ( PAPI_OK ); + } + +/* save the old data */ + org_cnt = PFMW_PEVT_EVTCOUNT( evt ); + + memcpy( ©_evt, evt, sizeof ( pfmw_param_t ) ); + for ( i = 0; i < ( unsigned int ) _ia64_vector.cmp_info.num_cntrs; i++ ) + PFMW_PEVT_EVENT( evt, i ) = 0; + PFMW_PEVT_EVTCOUNT( evt ) = 0; + memset( PFMW_PEVT_PFPPC( evt ), 0, sizeof ( PFMW_PEVT_PFPPC( evt ) ) ); + memset( &evt->inp.pfp_unavail_pmcs, 0, sizeof ( pfmlib_regmask_t ) ); + + SUBDBG( " original count is %d\n", org_cnt ); + +/* add new native events to the evt structure */ + for ( i = 0; i < ( unsigned int ) count; i++ ) { + index = native[i].ni_event & PAPI_NATIVE_AND_MASK; + PFMW_PEVT_EVENT( evt, i ) = index; + } + PFMW_PEVT_EVTCOUNT( evt ) = count; + /* Recalcuate the pfmlib_param_t structure, may also signal conflict */ + if ( pfmw_dispatch_events( evt ) ) { + SUBDBG( "pfmw_dispatch_events fail\n" ); + /* recover the old data */ + PFMW_PEVT_EVTCOUNT( evt ) = org_cnt; + /*for (i = 0; i < _ia64_vector.cmp_info.num_cntrs; i++) + PFMW_PEVT_EVENT(evt,i) = events[i]; + */ + memcpy( evt, ©_evt, sizeof ( pfmw_param_t ) ); + return ( PAPI_ECNFLCT ); + } + SUBDBG( "event_count=%d\n", PFMW_PEVT_EVTCOUNT( evt ) ); + + for ( i = 0; i < PFMW_PEVT_EVTCOUNT( evt ); i++ ) { + native[i].ni_position = PFMW_PEVT_PFPPC_REG_NUM( evt, i ) + - PMU_FIRST_COUNTER; + SUBDBG( "event_code is %d, reg_num is %d\n", + native[i].ni_event & PAPI_NATIVE_AND_MASK, + native[i].ni_position ); + } + + return ( PAPI_OK ); +} + +int +_ia64_update_control_state( hwd_control_state_t * this_state, + NativeInfo_t * native, int count, + hwd_context_t * zero ) +{ + switch ( _perfmon2_pfm_pmu_type ) { + case PFMLIB_ITANIUM_PMU: + return ( _ia64_ita_update_control_state + ( this_state, native, count, zero ) ); + break; + case PFMLIB_ITANIUM2_PMU: + return ( _ia64_ita_update_control_state + ( this_state, native, count, zero ) ); + break; + case PFMLIB_MONTECITO_PMU: + return ( _ia64_mont_update_control_state + ( this_state, native, count, zero ) ); + break; + default: + PAPIERROR( "PMU type %d is not supported by this component", + _perfmon2_pfm_pmu_type ); + return ( PAPI_EBUG ); + } +} + +papi_vector_t _ia64_vector = { + .cmp_info = { + .name = "perfmon-ia64.c", + .version = "5.0", + + /* default component information (unspecified values initialized to 0) */ + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .hardware_intr = 1, + /* component specific cmp_info initializations */ + .fast_real_timer = 1, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .kernel_profile = 1, + .cntr_umasks = 1; + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( ia64_context_t ), + .control_state = sizeof ( ia64_control_state_t ), + .reg_value = sizeof ( ia64_register_t ), + .reg_alloc = sizeof ( ia64_reg_alloc_t ), + } + , + + /* function pointers in this component */ + .init_control_state = _ia64_init_control_state, + .start = _ia64_start, + .stop = _ia64_stop, + .read = _ia64_read, + .shutdown_thread = _ia64_shutdown, + .ctl = _ia64_ctl, + .update_control_state = _ia64_update_control_state, + .set_domain = _ia64_set_domain, + .reset = _ia64_reset, + .set_overflow = _ia64_set_overflow, + .set_profile = _ia64_set_profile, + .stop_profiling = _ia64_stop_profiling, + .init_component = _ia64_init_component, + .dispatch_timer = _ia64_dispatch_timer, + .init_thread = _ia64_init, + + .ntv_enum_events = _ia64_ntv_enum_events, + .ntv_code_to_name = _ia64_ntv_code_to_name, + .ntv_code_to_descr = _ia64_ntv_code_to_descr, + +}; diff --git a/src/components/perfmon_ia64/perfmon-ia64.h b/src/components/perfmon_ia64/perfmon-ia64.h new file mode 100644 index 0000000..8287dca --- /dev/null +++ b/src/components/perfmon_ia64/perfmon-ia64.h @@ -0,0 +1,176 @@ +#ifndef _PAPI_PERFMON_IA64_H +#define _PAPI_PERFMON_IA64_H +/* +* File: perfmon-ia64.h +* CVS: $Id$ +* Author: Philip Mucci +* mucci@cs.utk.edu +* +* Kevin London +* london@cs.utk.edu +* +* Mods: Per Ekman +* pek@pdc.kth.se +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(HAVE_MMTIMER) +#include +#include +#include +#ifndef MMTIMER_FULLNAME +#define MMTIMER_FULLNAME "/dev/mmtimer" +#endif +#endif + +#ifdef __INTEL_COMPILER +#include +#include +#endif + +#include "papi_defines.h" +#include "config.h" +#include "perfmon/pfmlib.h" +#include "perfmon/perfmon.h" +#include "perfmon/perfmon_default_smpl.h" +#include "perfmon/pfmlib_montecito.h" +#include "perfmon/pfmlib_itanium2.h" +#include "perfmon/pfmlib_itanium.h" + +typedef int ia64_register_t; +typedef int ia64_register_map_t; +typedef int ia64_reg_alloc_t; + + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +typedef struct param_t +{ + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + void *mod_inp; /* model specific input parameters to libpfm */ + void *mod_outp; /* model specific output parameters from libpfm */ +} pfmw_param_t; +// #ifdef ITANIUM3 +typedef struct mont_param_t +{ + pfmlib_mont_input_param_t mont_input_param; + pfmlib_mont_output_param_t mont_output_param; +} pfmw_mont_param_t; +// typedef pfmw_mont_param_t pfmw_ita_param_t; +// #elif defined(ITANIUM2) +typedef struct ita2_param_t +{ + pfmlib_ita2_input_param_t ita2_input_param; + pfmlib_ita2_output_param_t ita2_output_param; +} pfmw_ita2_param_t; +// typedef pfmw_ita2_param_t pfmw_ita_param_t; +// #else +typedef int pfmw_ita1_param_t; +// #endif + +#define PMU_FIRST_COUNTER 4 + +typedef union +{ + pfmw_ita1_param_t ita_param; + pfmw_ita2_param_t ita2_param; + pfmw_mont_param_t mont_param; +} pfmw_ita_param_t; + + +#define MAX_COUNTERS 12 +#define MAX_COUNTER_TERMS MAX_COUNTERS + +typedef struct ia64_control_state +{ + /* Which counters to use? Bits encode counters to use, may be duplicates */ + ia64_register_map_t bits; + + pfmw_ita_param_t ita_lib_param; + + /* Buffer to pass to kernel to control the counters */ + pfmw_param_t evt; + + long long counters[MAX_COUNTERS]; + pfarg_reg_t pd[NUM_PMDS]; + +/* sampling buffer address */ + void *smpl_vaddr; + /* Buffer to pass to library to control the counters */ +} ia64_control_state_t; + + +typedef struct itanium_preset_search +{ + /* Preset code */ + int preset; + /* Derived code */ + int derived; + /* Strings to look for */ + char *( findme[MAX_COUNTERS] ); + char operation[MAX_COUNTERS * 5]; +} itanium_preset_search_t; + +typedef struct +{ + int fd; /* file descriptor */ + pid_t tid; /* thread id */ +#if defined(USE_PROC_PTTIMER) + int stat_fd; +#endif +} ia64_context_t; + +#undef hwd_context_t +typedef ia64_context_t hwd_context_t; + +#include "linux-context.h" + +//#undef hwd_ucontext_t +//typedef struct sigcontext hwd_ucontext_t; + +/* Override void* definitions from PAPI framework layer */ +/* with typedefs to conform to PAPI component layer code. */ +#undef hwd_reg_alloc_t +typedef ia64_reg_alloc_t hwd_reg_alloc_t; +#undef hwd_register_t +typedef ia64_register_t hwd_register_t; +#undef hwd_control_state_t +typedef ia64_control_state_t hwd_control_state_t; + +#define SMPL_BUF_NENTRIES 64 +#define M_PMD(x) (1UL<<(x)) + +#define MONT_DEAR_REGS_MASK (M_PMD(32)|M_PMD(33)|M_PMD(36)) +#define MONT_ETB_REGS_MASK (M_PMD(38)| M_PMD(39)| \ + M_PMD(48)|M_PMD(49)|M_PMD(50)|M_PMD(51)|M_PMD(52)|M_PMD(53)|M_PMD(54)|M_PMD(55)|\ + M_PMD(56)|M_PMD(57)|M_PMD(58)|M_PMD(59)|M_PMD(60)|M_PMD(61)|M_PMD(62)|M_PMD(63)) + +#define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) +#define BTB_REGS_MASK (M_PMD(8)|M_PMD(9)|M_PMD(10)|M_PMD(11)|M_PMD(12)|M_PMD(13)|M_PMD(14)|M_PMD(15)|M_PMD(16)) + +#endif /* _PAPI_PERFMON_IA64_H */ diff --git a/src/components/powercap/README b/src/components/powercap/README new file mode 100644 index 0000000..eece6bf --- /dev/null +++ b/src/components/powercap/README @@ -0,0 +1,64 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Philip Vaccaro (ICL/UTK) +* @defgroup papi_components Components +* @brief Component Specific Readme file: powercap +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +powercap/ + +The PAPI powercap component supports measuring and capping power usage +on recent Intel architectures (Sandybridge or later) using the powercap interface exposed +through the Linux kernel. + +-------------------------------------------------- +MEASURING AND CAPPING POWER + +The powercap sysfs interface exposes energy counters and R/W regsiter-like +power settings. The counters and R/W settings apply to a power domain on a system. + +For example, a single KNL chip exposes package power and DRAM power information. +On KNL this component can be used to read package/DRAM energy counters and set package/DRAM power limits. +There are two limits in the package domain and a single limit in the DRAM domain. The two limits +in the package domain correspond to long/short term limits. + +For all supported processors, each package/DRAM power limit has an associated +time window. The time window for each limit can also be changed, which changes the enforcement time window of +that limit. + +These counters and settings are exposed though this PAPI component and can be accessed just like any normal PAPI +counter. Running the "powercap_basic" test in the test directory will list all the events on a system. There is also a +"powercap_limit" test in the test directory that shows how a power limit is applied. + +-------------------------------------------------- +CONFIGURING THE PAPI POWERCAP COMPONENT + +At the higher src dirctory, configure with this component + % cd /src + % ./configure --with-components="powercap" +Follow the standard PAPI build (make) instructions + % make +To use the module, make sure that the libraries are accessible. + % export LD_LIBRARY_PATH=${PAPIDIR}/src:${PAPIDIR}/src/libpfm4/lib:${LD_LIBRARY_PATH} +To check the installation, the following should show some available counters + % ./utils/papi_native_avail | grep powercap + +-------------------------------------------------- +SYSTEM SETUP + +The actions described below will generally require superuser ability. +Note, these actions may have security and performance consequences, so +please make sure you know what you are doing. + + Ensure the "CONFIG_POWERCAP" and "CONFIG_INTEL_RAPL" kernel + modules are enabled. + + Use chmod to set site-appropriate access permissions (e.g. 766) + for /sys/class/powercap/* + +-------------------------------------------------- diff --git a/src/components/powercap/Rules.powercap b/src/components/powercap/Rules.powercap new file mode 100644 index 0000000..52449e1 --- /dev/null +++ b/src/components/powercap/Rules.powercap @@ -0,0 +1,6 @@ + +COMPSRCS += components/powercap/linux-powercap.c +COMPOBJS += linux-powercap.o + +linux-powercap.o: components/powercap/linux-powercap.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/powercap/linux-powercap.c -o linux-powercap.o diff --git a/src/components/powercap/linux-powercap.c b/src/components/powercap/linux-powercap.c new file mode 100644 index 0000000..f6895b8 --- /dev/null +++ b/src/components/powercap/linux-powercap.c @@ -0,0 +1,522 @@ +/** + * @file linux-powercap.c + * @author Philip Vaccaro + * @ingroup papi_components + * @brief powercap component + * + * To work, the powercap kernel module must be loaded. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + + +typedef struct _powercap_register { + unsigned int selector; +} _powercap_register_t; + +typedef struct _powercap_native_event_entry { + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int socket_id; + int component_id; + int event_id; + int type; + int return_type; + _powercap_register_t resources; +} _powercap_native_event_entry_t; + +typedef struct _powercap_reg_alloc { + _powercap_register_t ra_bits; +} _powercap_reg_alloc_t; + +static char read_buff[PAPI_MAX_STR_LEN]; +static char write_buff[PAPI_MAX_STR_LEN]; + +static int num_events=0; + +// package events +#define PKG_ENERGY 0 +#define PKG_MAX_ENERGY_RANGE 1 +#define PKG_MAX_POWER_A 2 +#define PKG_POWER_LIMIT_A 3 +#define PKG_TIME_WINDOW_A 4 +#define PKG_MAX_POWER_B 5 +#define PKG_POWER_LIMIT_B 6 +#define PKG_TIME_WINDOW_B 7 +#define PKG_ENABLED 8 +#define PKG_NAME 9 + +#define PKG_NUM_EVENTS 10 +static int pkg_events[PKG_NUM_EVENTS] = {PKG_ENERGY, PKG_MAX_ENERGY_RANGE, PKG_MAX_POWER_A, PKG_POWER_LIMIT_A, PKG_TIME_WINDOW_A, PKG_MAX_POWER_B, PKG_POWER_LIMIT_B, PKG_TIME_WINDOW_B, PKG_ENABLED, PKG_NAME}; +static char *pkg_event_names[PKG_NUM_EVENTS] = {"ENERGY_UJ", "MAX_ENERGY_RANGE_UJ", "MAX_POWER_A_UW", "POWER_LIMIT_A_UW", "TIME_WINDOW_A_US", "MAX_POWER_B_UW", "POWER_LIMIT_B_UW", "TIME_WINDOW_B", "ENABLED", "NAME"}; +static char *pkg_sys_names[PKG_NUM_EVENTS] = {"energy_uj", "max_energy_range_uj", "constraint_0_max_power_uw", "constraint_0_power_limit_uw", "constraint_0_time_window_us", "constraint_1_max_power_uw", "constraint_1_power_limit_uw", "constraint_1_time_window_us", "enabled", "name"}; +static mode_t pkg_sys_flags[PKG_NUM_EVENTS] = {O_RDONLY, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDONLY}; + + +// non-package events +#define COMPONENT_ENERGY 10 +#define COMPONENT_MAX_ENERGY_RANGE 11 +#define COMPONENT_MAX_POWER_A 12 +#define COMPONENT_POWER_LIMIT_A 13 +#define COMPONENT_TIME_WINDOW_A 14 +#define COMPONENT_ENABLED 15 +#define COMPONENT_NAME 16 + +#define COMPONENT_NUM_EVENTS 7 +static int component_events[COMPONENT_NUM_EVENTS] = {COMPONENT_ENERGY, COMPONENT_MAX_ENERGY_RANGE, COMPONENT_MAX_POWER_A, COMPONENT_POWER_LIMIT_A, COMPONENT_TIME_WINDOW_A, COMPONENT_ENABLED, COMPONENT_NAME}; +static char *component_event_names[COMPONENT_NUM_EVENTS] = {"ENERGY_UJ", "MAX_ENERGY_RANGE_UJ", "MAX_POWER_A_UW", "POWER_LIMIT_A_UW", "TIME_WINDOW_A_US", "ENABLED", "NAME"}; +static char *component_sys_names[COMPONENT_NUM_EVENTS] = {"energy_uj", "max_energy_range_uj", "constraint_0_max_power_uw", "constraint_0_power_limit_uw", "constraint_0_time_window_us", "enabled", "name"}; +static mode_t component_sys_flags[COMPONENT_NUM_EVENTS] = {O_RDONLY, O_RDONLY, O_RDONLY, O_RDWR, O_RDONLY, O_RDONLY, O_RDONLY}; + +#define POWERCAP_MAX_COUNTERS (2 * (PKG_NUM_EVENTS + (3 * COMPONENT_NUM_EVENTS))) + +static _powercap_native_event_entry_t powercap_ntv_events[(2 * (PKG_NUM_EVENTS + (3 * COMPONENT_NUM_EVENTS)))]; + +static int event_fds[POWERCAP_MAX_COUNTERS]; + +typedef struct _powercap_control_state { + long long count[POWERCAP_MAX_COUNTERS]; + long long which_counter[POWERCAP_MAX_COUNTERS]; + long long lastupdate; +} _powercap_control_state_t; + +typedef struct _powercap_context { + long long start_value[POWERCAP_MAX_COUNTERS]; + _powercap_control_state_t state; +} _powercap_context_t; + +papi_vector_t _powercap_vector; + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + +/* Null terminated version of strncpy */ +static char * _local_strlcpy( char *dst, const char *src, size_t size ) +{ + char *retval = strncpy( dst, src, size ); + if ( size>0 ) dst[size-1] = '\0'; + return( retval ); +} + +static long long read_powercap_value( int index ) +{ + int sz = pread(event_fds[index], read_buff, PAPI_MAX_STR_LEN, 0); + read_buff[sz] = '\0'; + + return atoll(read_buff); +} + +static int write_powercap_value( int index, long long value ) +{ + snprintf(write_buff, sizeof(write_buff), "%lld", value); + int sz = pwrite(event_fds[index], write_buff, PAPI_MAX_STR_LEN, 0); + if(sz == -1) { + perror("Error in pwrite(): "); + } + return 1; +} + +/************************* PAPI Functions **********************************/ + +/* + * This is called whenever a thread is initialized + */ +static int _powercap_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + +/* + * Called when PAPI process is initialized (i.e. PAPI_library_init) + */ +static int _powercap_init_component( int cidx ) +{ + + int num_sockets = -1; + int s = -1, e = -1, c = -1; + + char events_dir[128]; + char event_path[128]; + + DIR *events; + + // get hw info + const PAPI_hw_info_t *hw_info; + hw_info=&( _papi_hwi_system_info.hw_info ); + + // check if intel processor + if ( hw_info->vendor!=PAPI_VENDOR_INTEL ) { + strncpy(_powercap_vector.cmp_info.disabled_reason, "Not an Intel processor", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + // store number of sockets for adding events + num_sockets = hw_info->sockets; + + num_events = 0; + for(s = 0; s < num_sockets; s++) { + + // compose string of a pkg directory path + snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d/", s); + + // open directory to make sure it exists + events = opendir(events_dir); + + // not a valid pkg/component directory so continue + if (events == NULL) { continue; } + + // loop through pkg events and create powercap event entries + for (e = 0; e < PKG_NUM_EVENTS; e++) { + + // compose string to individual event + snprintf(event_path, sizeof(event_path), "%s%s", events_dir, pkg_sys_names[e]); + // not a valid pkg event path so continue + if (access(event_path, F_OK) == -1) { continue; } + + snprintf(powercap_ntv_events[num_events].name, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); + //snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); + //snprintf(powercap_ntv_events[num_events].units, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d", pkg_event_names[e], s); + powercap_ntv_events[num_events].return_type = PAPI_DATATYPE_UINT64; + powercap_ntv_events[num_events].type = pkg_events[e]; + + powercap_ntv_events[num_events].resources.selector = num_events + 1; + + event_fds[num_events] = open(event_path, O_SYNC|pkg_sys_flags[e]); + + if(powercap_ntv_events[num_events].type == PKG_NAME) { + int sz = pread(event_fds[num_events], read_buff, PAPI_MAX_STR_LEN, 0); + read_buff[sz] = '\0'; + snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].description), "%s", read_buff); + } + + num_events++; + } + + // reset component count for each socket + c = 0; + snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d:%d/", s, c); + while((events = opendir(events_dir)) != NULL) { + + // loop through pkg events and create powercap event entries + for (e = 0; e < COMPONENT_NUM_EVENTS; e++) { + + // compose string to individual event + snprintf(event_path, sizeof(event_path), "%s%s", events_dir, component_sys_names[e]); + + // not a valid pkg event path so continue + if (access(event_path, F_OK) == -1) { continue; } + + snprintf(powercap_ntv_events[num_events].name, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); + //snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); + //snprintf(powercap_ntv_events[num_events].units, sizeof(powercap_ntv_events[num_events].name), "%s:ZONE%d_SUBZONE%d", component_event_names[e], s, c); + powercap_ntv_events[num_events].return_type = PAPI_DATATYPE_UINT64; + powercap_ntv_events[num_events].type = component_events[e]; + + powercap_ntv_events[num_events].resources.selector = num_events + 1; + + event_fds[num_events] = open(event_path, O_SYNC|component_sys_flags[e]); + + if(powercap_ntv_events[num_events].type == COMPONENT_NAME) { + int sz = pread(event_fds[num_events], read_buff, PAPI_MAX_STR_LEN, 0); + read_buff[sz] = '\0'; + snprintf(powercap_ntv_events[num_events].description, sizeof(powercap_ntv_events[num_events].description), "%s", read_buff); + } + + num_events++; + } + + // test for next component + c++; + + // compose string of an pkg directory path + snprintf(events_dir, sizeof(events_dir), "/sys/class/powercap/intel-rapl:%d:%d/", s, c); + } + } + + /* Export the total number of events available */ + _powercap_vector.cmp_info.num_native_events = num_events; + _powercap_vector.cmp_info.num_cntrs = num_events; + _powercap_vector.cmp_info.num_mpx_cntrs = num_events; + + /* Export the component id */ + _powercap_vector.cmp_info.CmpIdx = cidx; + + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +static int _powercap_init_control_state( hwd_control_state_t *ctl ) +{ + _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; + memset( control, 0, sizeof ( _powercap_control_state_t ) ); + return PAPI_OK; +} + +static int _powercap_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + _powercap_context_t* context = ( _powercap_context_t* ) ctx; + (void) ctl; + + int b; + for( b = 0; b < num_events; b++ ) { + context->start_value[b]=read_powercap_value(b); + } + + return PAPI_OK; +} + +static int _powercap_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; + + int c; + for( c = 0; c < num_events; c++ ) { + control->count[c]=read_powercap_value(c); + } + + return PAPI_OK; +} + +/* Shutdown a thread */ +static int +_powercap_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + SUBDBG( "Enter\n" ); + return PAPI_OK; +} + + +static int +_powercap_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + (void) ctx; + (void) flags; + _powercap_control_state_t* control = ( _powercap_control_state_t* ) ctl; + + int i; + + for(i=0;icount[i]=read_powercap_value(control->which_counter[i]); + } + + *events = ( ( _powercap_control_state_t* ) ctl )->count; + + return PAPI_OK; +} + +static int _powercap_write( hwd_context_t * ctx, hwd_control_state_t * ctl, long long *values ) +{ + /* write values */ + ( void ) ctx; + _powercap_control_state_t *control = ( _powercap_control_state_t * ) ctl; + + int i; + + for(i=0;iwhich_counter[i]].type == PKG_POWER_LIMIT_A) || (powercap_ntv_events[control->which_counter[i]].type == PKG_POWER_LIMIT_B) ) { + write_powercap_value(control->which_counter[i], values[i]); + } + } + + return PAPI_OK; +} +/* + * Clean up what was setup in powercap_init_component(). + */ +static int _powercap_shutdown_component( void ) +{ + int i; + + /* Read counters into expected slot */ + for(i=0;iwhich_counter[i]=index; + native[i].ni_position = i; + } + + return PAPI_OK; + +} + +static int _powercap_set_domain( hwd_control_state_t *ctl, int domain ) +{ + ( void ) ctl; + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int _powercap_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + return PAPI_OK; +} + +/* + * Native Event functions + */ +static int _powercap_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + int index; + switch ( modifier ) { + + case PAPI_ENUM_FIRST: + *EventCode = 0; + return PAPI_OK; + case PAPI_ENUM_EVENTS:index = *EventCode; + if (index < num_events - 1) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + + default:return PAPI_EINVAL; + } +} + +/* + * + */ +static int _powercap_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode & PAPI_NATIVE_AND_MASK; + + if ( index >= 0 && index < num_events ) { + _local_strlcpy( name, powercap_ntv_events[index].name, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +/* + * + */ +static int _powercap_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index < 0 && index >= num_events ) + return PAPI_ENOEVNT; + _local_strlcpy( name, powercap_ntv_events[index].description, len ); + return PAPI_OK; +} + +static int _powercap_ntv_code_to_info( unsigned int EventCode, PAPI_event_info_t *info ) +{ + int index = EventCode; + + if ( index < 0 || index >= num_events ) + return PAPI_ENOEVNT; + + _local_strlcpy( info->symbol, powercap_ntv_events[index].name, sizeof( info->symbol )); + _local_strlcpy( info->long_descr, powercap_ntv_events[index].description, sizeof( info->long_descr ) ); + _local_strlcpy( info->units, powercap_ntv_events[index].units, sizeof( info->units ) ); + + info->data_type = powercap_ntv_events[index].return_type; + return PAPI_OK; +} + + +papi_vector_t _powercap_vector = { + .cmp_info = { /* (unspecified values are initialized to 0) */ + .name = "powercap", + .short_name = "powercap", + .description = "Linux powercap energy measurements", + .version = "5.3.0", + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( _powercap_context_t ), + .control_state = sizeof ( _powercap_control_state_t ), + .reg_value = sizeof ( _powercap_register_t ), + .reg_alloc = sizeof ( _powercap_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_thread = _powercap_init_thread, + .init_component = _powercap_init_component, + .init_control_state = _powercap_init_control_state, + .update_control_state = _powercap_update_control_state, + .start = _powercap_start, + .stop = _powercap_stop, + .read = _powercap_read, + .write = _powercap_write, + .shutdown_thread = _powercap_shutdown_thread, + .shutdown_component = _powercap_shutdown_component, + .ctl = _powercap_ctl, + + .set_domain = _powercap_set_domain, + .reset = _powercap_reset, + + .ntv_enum_events = _powercap_ntv_enum_events, + .ntv_code_to_name = _powercap_ntv_code_to_name, + .ntv_code_to_descr = _powercap_ntv_code_to_descr, + .ntv_code_to_info = _powercap_ntv_code_to_info, +}; diff --git a/src/components/powercap/tests/Makefile b/src/components/powercap/tests/Makefile new file mode 100644 index 0000000..4c968ce --- /dev/null +++ b/src/components/powercap/tests/Makefile @@ -0,0 +1,22 @@ +NAME=powercap +include ../../Makefile_comp_tests.target + +TESTS = powercap_basic powercap_limit + +powercap_tests: $(TESTS) + +powercap_basic.o: powercap_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c powercap_basic.c -o powercap_basic.o -DBASIC_TEST=1 + +powercap_basic: powercap_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o powercap_basic powercap_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +powercap_limit.o: powercap_limit.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c powercap_limit.c -o powercap_limit.o + +powercap_limit: powercap_limit.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o powercap_limit powercap_limit.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o *~ diff --git a/src/components/powercap/tests/powercap_basic.c b/src/components/powercap/tests/powercap_basic.c new file mode 100644 index 0000000..6b0fefc --- /dev/null +++ b/src/components/powercap/tests/powercap_basic.c @@ -0,0 +1,286 @@ +/** + * @author PAPI team UTK/ICL + * Test case for powercap component + * @brief + * Tests basic functionality of powercap component + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_powercap_EVENTS 64 + +#ifdef BASIC_TEST + +void run_test( int quiet ) +{ + if ( !quiet ) { + printf( "Sleeping 1 second...\n" ); + } + sleep( 1 ); +} + +#else /* NOT BASIC_TEST */ + +#define MATRIX_SIZE 1024 +static double a[MATRIX_SIZE][MATRIX_SIZE]; +static double b[MATRIX_SIZE][MATRIX_SIZE]; +static double c[MATRIX_SIZE][MATRIX_SIZE]; + +/* Naive matrix multiply */ +void run_test( int quiet ) +{ + double s; + int i,j,k; + + if ( !quiet ) printf( "Doing a naive %dx%d MMM...\n",MATRIX_SIZE,MATRIX_SIZE ); + + for( i=0; i 1 ) { + if ( strstr( argv[1], "-w" ) ) { + do_wrap = 1; + } + } +#endif + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__,"PAPI_library_init failed\n",retval ); + + if ( !TESTS_QUIET ) printf( "Trying all powercap events\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"powercap" ) ) { + powercap_cid=cid; + if ( !TESTS_QUIET ) printf( "Found powercap component at cid %d\n",powercap_cid ); + if ( cmpinfo->disabled ) { + if ( !TESTS_QUIET ) { + printf( "powercap component disabled: %s\n", + cmpinfo->disabled_reason ); + } + test_skip( __FILE__,__LINE__,"powercap component disabled",0 ); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + test_skip( __FILE__,__LINE__,"No powercap component found\n",0 ); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + test_skip( __FILE__,__LINE__,"No counters in the powercap component\n",0 ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); + + /* Add all events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__,"Error from PAPI_event_code_to_name", retval ); + + retval = PAPI_get_event_info( code,&evinfo ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "Error getting event info\n",retval ); + + strncpy( event_descrs[num_events],evinfo.long_descr,sizeof( event_descrs[0] )-1 ); + strncpy( units[num_events],evinfo.units,sizeof( units[0] )-1 ); + // buffer must be null terminated to safely use strstr operation on it below + units[num_events][sizeof( units[0] )-1] = '\0'; + data_type[num_events] = evinfo.data_type; + retval = PAPI_add_event( EventSet, code ); + + if ( retval != PAPI_OK ) + break; /* We've hit an event limit */ + num_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); + } + + values=calloc( num_events,sizeof( long long ) ); + if ( values==NULL ) + test_fail( __FILE__, __LINE__,"No memory",retval ); + + if ( !TESTS_QUIET ) printf( "\nStarting measurements...\n\n" ); + + /* Start Counting */ + before_time=PAPI_get_real_nsec(); + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); + + /* Run test */ + run_test( TESTS_QUIET ); + + /* Stop Counting */ + after_time=PAPI_get_real_nsec(); + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop()",retval ); + + elapsed_time=( ( double )( after_time-before_time ) )/1.0e9; + + if ( !TESTS_QUIET ) { + printf( "\nStopping measurements, took %.3fs, gathering results...\n\n", elapsed_time ); + + printf( "\n" ); + printf( "scaled energy measurements:\n" ); + for( i=0; i +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_powercap_EVENTS 64 + +int main ( int argc, char **argv ) +{ + (void) argv; + (void) argc; + int retval,cid,powercap_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_powercap_EVENTS]; + int limit_map[MAX_powercap_EVENTS]; + int num_events=0, num_limits=0; + int code; + char event_names[MAX_powercap_EVENTS][PAPI_MAX_STR_LEN]; + int r,i; + + const PAPI_component_info_t *cmpinfo = NULL; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__,"PAPI_library_init\n",retval ); + + if ( !TESTS_QUIET ) printf( "Trying all powercap events\n" ); + + numcmp = PAPI_num_components(); + + for( cid=0; cidname,"powercap" ) ) { + powercap_cid=cid; + if ( !TESTS_QUIET ) printf( "Found powercap component at cid %d\n",powercap_cid ); + if ( cmpinfo->disabled ) { + if ( !TESTS_QUIET ) { + printf( "powercap component disabled: %s\n", + cmpinfo->disabled_reason ); + } + test_skip( __FILE__,__LINE__,"powercap component disabled",0 ); + } + break; + } + } + + /* Component not found */ + if ( cid==numcmp ) + test_skip( __FILE__,__LINE__,"No powercap component found\n",0 ); + + /* Skip if component has no counters */ + if ( cmpinfo->num_cntrs==0 ) + test_skip( __FILE__,__LINE__,"No counters in the powercap component\n",0 ); + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset()",retval ); + + /* Add all package limit events */ + code = PAPI_NATIVE_MASK; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, powercap_cid ); + + + /* find all package power events */ + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__,"PAPI_event_code_to_name", retval ); + + retval = PAPI_add_event(EventSet, code); + if (retval != PAPI_OK) + break; /* We've hit an event limit */ + + if (!(strstr(event_names[num_events],"SUBZONE")) && (strstr(event_names[num_events],"POWER_LIMIT"))) { + + limit_map[num_limits] = num_events; + num_limits++; + } + num_events++; + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, powercap_cid ); + } + + + /* start collecting power data */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); + + /* initial read of package limits */ + retval = PAPI_read( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start()",retval ); + + + printf("\nCURRENT LIMITS\n"); + for( i=0; i +#include +#include +#include + +#include "papi.h" + +#define MAX_EVENTS 128 + +char events[MAX_EVENTS][BUFSIZ]; +char units[MAX_EVENTS][BUFSIZ]; +int data_type[MAX_EVENTS]; +char filenames[MAX_EVENTS][BUFSIZ]; + +FILE *fff[MAX_EVENTS]; + +static int num_events=0; + +int main (int argc, char **argv) +{ + + int retval,cid,rapl_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_EVENTS]; + int i,code,enum_retval; + PAPI_event_info_t evinfo; + const PAPI_component_info_t *cmpinfo = NULL; + long long start_time,before_time,after_time; + double elapsed_time,total_time; + char event_name[BUFSIZ]; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + fprintf(stderr,"PAPI_library_init failed\n"); + exit(1); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"powercap")) { + rapl_cid=cid; + printf("Found rapl component at cid %d\n", rapl_cid); + + if (cmpinfo->disabled) { + fprintf(stderr,"No rapl events found: %s\n", + cmpinfo->disabled_reason); + exit(1); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + fprintf(stderr,"No powercap component found\n"); + exit(1); + } + + /* Find Events */ + code = PAPI_NATIVE_MASK; + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( enum_retval == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + exit(1); + } + + printf("Found: %s\n",event_name); + strncpy(events[num_events],event_name,BUFSIZ); + sprintf(filenames[num_events],"results.%s",event_name); + + + /* Find additional event information: unit, data type */ + retval = PAPI_get_event_info(code, &evinfo); + if (retval != PAPI_OK) { + printf("Error getting event info for %#x\n",code); + exit(1); + } + + strncpy(units[num_events],evinfo.units,sizeof(units[0])-1); + /* buffer must be null terminated to safely use strstr operation on it below */ + units[num_events][sizeof(units[0])-1] = '\0'; + + data_type[num_events] = evinfo.data_type; + + num_events++; + + if (num_events==MAX_EVENTS) { + printf("Too many events! %d\n",num_events); + exit(1); + } + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + + } + + + + if (num_events==0) { + printf("Error! No RAPL events found!\n"); + exit(1); + } + + /* Open output files */ + for(i=0;i + +Note that one needs superuser privileges to grant the RAWIO capability to an executable, and that the executable cannot be located on a shared network file system partition. + +The dynamic linker on most operating systems will remove variables that control dynamic linking from the environment of executables with extended rights, such as setuid executables or executables with raised capabilities. One such variable is LD_LIBRARY_PATH. Therefore, executables that have the RAWIO capability can only load shared libraries from default system directories. +One can work around this restriction by either installing the shared libraries in system directories, linking statically against those libraries, or using the -rpath linker option to specify the full path to the shared libraries during the linking step. + +[1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c903f0456bc69176912dee6dd25c6a66ee1aed00 + +*/ diff --git a/src/components/rapl/Rules.rapl b/src/components/rapl/Rules.rapl new file mode 100644 index 0000000..2213d2d --- /dev/null +++ b/src/components/rapl/Rules.rapl @@ -0,0 +1,6 @@ + +COMPSRCS += components/rapl/linux-rapl.c +COMPOBJS += linux-rapl.o + +linux-rapl.o: components/rapl/linux-rapl.c + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/rapl/linux-rapl.c -o linux-rapl.o diff --git a/src/components/rapl/linux-rapl.c b/src/components/rapl/linux-rapl.c new file mode 100644 index 0000000..b2fb35e --- /dev/null +++ b/src/components/rapl/linux-rapl.c @@ -0,0 +1,1147 @@ +/** + * @file linux-rapl.c + * @author Vince Weaver + * + * @ingroup papi_components + * + * @brief rapl component + * + * This component enables RAPL (Running Average Power Level) + * energy measurements on Intel SandyBridge/IvyBridge/Haswell + * + * To work, either msr_safe kernel module from LLNL + * (https://github.com/scalability-llnl/msr-safe), or + * the x86 generic MSR driver must be installed + * (CONFIG_X86_MSR) and the /dev/cpu/?/ files must have read permissions + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + + +/* + * Platform specific RAPL Domains. + * Note that PP1 RAPL Domain is supported on 062A only + * And DRAM RAPL Domain is supported on 062D only + */ + + +/* RAPL defines */ +#define MSR_RAPL_POWER_UNIT 0x606 + +/* Package */ +#define MSR_PKG_RAPL_POWER_LIMIT 0x610 +#define MSR_PKG_ENERGY_STATUS 0x611 +#define MSR_PKG_PERF_STATUS 0x613 +#define MSR_PKG_POWER_INFO 0x614 + +/* PP0 */ +#define MSR_PP0_POWER_LIMIT 0x638 +#define MSR_PP0_ENERGY_STATUS 0x639 +#define MSR_PP0_POLICY 0x63A +#define MSR_PP0_PERF_STATUS 0x63B + +/* PP1 */ +#define MSR_PP1_POWER_LIMIT 0x640 +#define MSR_PP1_ENERGY_STATUS 0x641 +#define MSR_PP1_POLICY 0x642 + +/* DRAM */ +#define MSR_DRAM_POWER_LIMIT 0x618 +#define MSR_DRAM_ENERGY_STATUS 0x619 +#define MSR_DRAM_PERF_STATUS 0x61B +#define MSR_DRAM_POWER_INFO 0x61C + +/* PSYS RAPL Domain */ +#define MSR_PLATFORM_ENERGY_STATUS 0x64d + +/* RAPL bitsmasks */ +#define POWER_UNIT_OFFSET 0 +#define POWER_UNIT_MASK 0x0f + +#define ENERGY_UNIT_OFFSET 0x08 +#define ENERGY_UNIT_MASK 0x1f + +#define TIME_UNIT_OFFSET 0x10 +#define TIME_UNIT_MASK 0x0f + +/* RAPL POWER UNIT MASKS */ +#define POWER_INFO_UNIT_MASK 0x7fff +#define THERMAL_SHIFT 0 +#define MINIMUM_POWER_SHIFT 16 +#define MAXIMUM_POWER_SHIFT 32 +#define MAXIMUM_TIME_WINDOW_SHIFT 48 + + +typedef struct _rapl_register +{ + unsigned int selector; +} _rapl_register_t; + +typedef struct _rapl_native_event_entry +{ + char name[PAPI_MAX_STR_LEN]; + char units[PAPI_MIN_STR_LEN]; + char description[PAPI_MAX_STR_LEN]; + int fd_offset; + int msr; + int type; + int return_type; + _rapl_register_t resources; +} _rapl_native_event_entry_t; + +typedef struct _rapl_reg_alloc +{ + _rapl_register_t ra_bits; +} _rapl_reg_alloc_t; + +/* actually 32? But setting this to be safe? */ +#define RAPL_MAX_COUNTERS 64 + +typedef struct _rapl_control_state +{ + int being_measured[RAPL_MAX_COUNTERS]; + long long count[RAPL_MAX_COUNTERS]; + int need_difference[RAPL_MAX_COUNTERS]; + long long lastupdate; +} _rapl_control_state_t; + + +typedef struct _rapl_context +{ + long long start_value[RAPL_MAX_COUNTERS]; + _rapl_control_state_t state; +} _rapl_context_t; + + +papi_vector_t _rapl_vector; + +struct fd_array_t { + int fd; + int open; +}; + +static _rapl_native_event_entry_t * rapl_native_events=NULL; +static int num_events = 0; +struct fd_array_t *fd_array=NULL; +static int num_packages=0,num_cpus=0; + +int power_divisor,time_divisor; +int cpu_energy_divisor,dram_energy_divisor; + +#define PACKAGE_ENERGY 0 +#define PACKAGE_THERMAL 1 +#define PACKAGE_MINIMUM 2 +#define PACKAGE_MAXIMUM 3 +#define PACKAGE_TIME_WINDOW 4 +#define PACKAGE_ENERGY_CNT 5 +#define PACKAGE_THERMAL_CNT 6 +#define PACKAGE_MINIMUM_CNT 7 +#define PACKAGE_MAXIMUM_CNT 8 +#define PACKAGE_TIME_WINDOW_CNT 9 +#define DRAM_ENERGY 10 +#define PLATFORM_ENERGY 11 + +/***************************************************************************/ +/****** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT *******/ +/***************************************************************************/ + + +static long long read_msr(int fd, int which) { + + uint64_t data; + + if ( fd<0 || pread(fd, &data, sizeof data, which) != sizeof data ) { + perror("rdmsr:pread"); + exit(127); + } + + return (long long)data; +} + +static int open_fd(int offset) { + + int fd=-1; + char filename[BUFSIZ]; + + if (fd_array[offset].open==0) { + sprintf(filename,"/dev/cpu/%d/msr_safe",offset); + fd = open(filename, O_RDONLY); + if (fd<0) { + sprintf(filename,"/dev/cpu/%d/msr",offset); + fd = open(filename, O_RDONLY); + } + if (fd>=0) { + fd_array[offset].fd=fd; + fd_array[offset].open=1; + } + } + else { + fd=fd_array[offset].fd; + } + + return fd; +} + +static long long read_rapl_value(int index) { + + int fd; + + fd=open_fd(rapl_native_events[index].fd_offset); + return read_msr(fd,rapl_native_events[index].msr); + +} + +static long long convert_rapl_energy(int index, long long value) { + + union { + long long ll; + double fp; + } return_val; + + return_val.ll = value; /* default case: return raw input value */ + + if (rapl_native_events[index].type==PACKAGE_ENERGY) { + return_val.ll = (long long)(((double)value/cpu_energy_divisor)*1e9); + } + + if (rapl_native_events[index].type==DRAM_ENERGY) { + return_val.ll = (long long)(((double)value/dram_energy_divisor)*1e9); + } + + if (rapl_native_events[index].type==PLATFORM_ENERGY) { + return_val.ll = (long long)(((double)value/cpu_energy_divisor)*1e9); + } + + if (rapl_native_events[index].type==PACKAGE_THERMAL) { + return_val.fp = (double) + ((value>>THERMAL_SHIFT)&POWER_INFO_UNIT_MASK) / + (double)power_divisor; + } + + if (rapl_native_events[index].type==PACKAGE_MINIMUM) { + return_val.fp = (double) + ((value>>MINIMUM_POWER_SHIFT)&POWER_INFO_UNIT_MASK)/ + (double)power_divisor; + } + + if (rapl_native_events[index].type==PACKAGE_MAXIMUM) { + return_val.fp = (double) + ((value>>MAXIMUM_POWER_SHIFT)&POWER_INFO_UNIT_MASK)/ + (double)power_divisor; + } + + if (rapl_native_events[index].type==PACKAGE_TIME_WINDOW) { + return_val.fp = (double) + ((value>>MAXIMUM_TIME_WINDOW_SHIFT)&POWER_INFO_UNIT_MASK)/ + (double)time_divisor; + } + + if (rapl_native_events[index].type==PACKAGE_THERMAL_CNT) { + return_val.ll = ((value>>THERMAL_SHIFT)&POWER_INFO_UNIT_MASK); + } + + if (rapl_native_events[index].type==PACKAGE_MINIMUM_CNT) { + return_val.ll = ((value>>MINIMUM_POWER_SHIFT)&POWER_INFO_UNIT_MASK); + } + + if (rapl_native_events[index].type==PACKAGE_MAXIMUM_CNT) { + return_val.ll = ((value>>MAXIMUM_POWER_SHIFT)&POWER_INFO_UNIT_MASK); + } + + if (rapl_native_events[index].type==PACKAGE_TIME_WINDOW_CNT) { + return_val.ll = ((value>>MAXIMUM_TIME_WINDOW_SHIFT)&POWER_INFO_UNIT_MASK); + } + + return return_val.ll; +} + +static int +get_kernel_nr_cpus(void) +{ + FILE *fff; + int num_read, nr_cpus = 1; + fff=fopen("/sys/devices/system/cpu/kernel_max","r"); + if (fff==NULL) return nr_cpus; + num_read=fscanf(fff,"%d",&nr_cpus); + fclose(fff); + if (num_read==1) { + nr_cpus++; + } else { + nr_cpus = 1; + } + return nr_cpus; +} + +/************************* PAPI Functions **********************************/ + + +/* + * This is called whenever a thread is initialized + */ +static int +_rapl_init_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + + return PAPI_OK; +} + + + +/* + * Called when PAPI process is initialized (i.e. PAPI_library_init) + */ +static int +_rapl_init_component( int cidx ) +{ + int i,j,k,fd; + FILE *fff; + char filename[BUFSIZ]; + + int package_avail, dram_avail, pp0_avail, pp1_avail, psys_avail; + int different_units; + + long long result; + int package; + + const PAPI_hw_info_t *hw_info; + + int nr_cpus = get_kernel_nr_cpus(); + int packages[nr_cpus]; + int cpu_to_use[nr_cpus]; + + /* Fill with sentinel values */ + for (i=0; ivendor!=PAPI_VENDOR_INTEL) { + strncpy(_rapl_vector.cmp_info.disabled_reason, + "Not an Intel processor",PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + /* Make sure it is a family 6 Intel Chip */ + if (hw_info->cpuid_family!=6) { + /* Not a family 6 machine */ + strncpy(_rapl_vector.cmp_info.disabled_reason, + "CPU family not supported",PAPI_MAX_STR_LEN); + return PAPI_ENOIMPL; + } + + /* Detect RAPL support */ + switch(hw_info->cpuid_model) { + + /* Desktop / Laptops */ + + case 42: /* SandyBridge */ + case 58: /* IvyBridge */ + package_avail=1; + pp0_avail=1; + pp1_avail=1; + dram_avail=0; + psys_avail=0; + different_units=0; + break; + + case 60: /* Haswell */ + case 69: /* Haswell ULT */ + case 70: /* Haswell GT3E */ + case 92: /* Atom Goldmont */ + case 122: /* Atom Gemini Lake */ + case 95: /* Atom Denverton */ + package_avail=1; + pp0_avail=1; + pp1_avail=1; + dram_avail=1; + psys_avail=0; + different_units=0; + break; + + case 61: /* Broadwell */ + case 71: /* Broadwell-H (GT3E) */ + case 86: /* Broadwell XEON_D */ + package_avail=1; + pp0_avail=1; + pp1_avail=0; + dram_avail=1; + psys_avail=0; + different_units=0; + break; + + case 78: /* Skylake Mobile */ + case 94: /* Skylake Desktop (H/S) */ + case 142: /* Kabylake Mobile */ + case 158: /* Kabylake Desktop */ + package_avail=1; + pp0_avail=1; + pp1_avail=0; + dram_avail=1; + psys_avail=1; + different_units=0; + break; + + /* Server Class Machines */ + + case 45: /* SandyBridge-EP */ + case 62: /* IvyBridge-EP */ + package_avail=1; + pp0_avail=1; + pp1_avail=0; + dram_avail=1; + psys_avail=0; + different_units=0; + break; + + case 63: /* Haswell-EP */ + case 79: /* Broadwell-EP */ + case 85: /* Skylake-X */ + package_avail=1; + pp0_avail=1; + pp1_avail=0; + dram_avail=1; + psys_avail=0; + different_units=1; + break; + + + case 87: /* Knights Landing (KNL) */ + case 133: /* Knights Mill (KNM) */ + package_avail=1; + pp0_avail=0; + pp1_avail=0; + dram_avail=1; + psys_avail=0; + different_units=1; + break; + + default: /* not a supported model */ + strncpy(_rapl_vector.cmp_info.disabled_reason, + "CPU model not supported", + PAPI_MAX_STR_LEN); + return PAPI_ENOIMPL; + } + + /* Detect how many packages */ + j=0; + while(1) { + int num_read; + + sprintf(filename, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",j); + fff=fopen(filename,"r"); + if (fff==NULL) break; + num_read=fscanf(fff,"%d",&package); + fclose(fff); + if (num_read!=1) { + strcpy(_rapl_vector.cmp_info.disabled_reason, "Error reading file: "); + strncat(_rapl_vector.cmp_info.disabled_reason, filename, PAPI_MAX_STR_LEN - strlen(_rapl_vector.cmp_info.disabled_reason) - 1); + _rapl_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN-1] = '\0'; + return PAPI_ESYS; + } + + /* Check if a new package */ + if ((package >= 0) && (package < nr_cpus)) { + if (packages[package] == -1) { + SUBDBG("Found package %d out of total %d\n",package,num_packages); + packages[package]=package; + cpu_to_use[package]=j; + num_packages++; + } + } else { + SUBDBG("Package outside of allowed range\n"); + strncpy(_rapl_vector.cmp_info.disabled_reason, + "Package outside of allowed range",PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + j++; + } + num_cpus=j; + + if (num_packages==0) { + SUBDBG("Can't access /dev/cpu/*/\n"); + strncpy(_rapl_vector.cmp_info.disabled_reason, + "Can't access /dev/cpu/*/",PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + SUBDBG("Found %d packages with %d cpus\n",num_packages,num_cpus); + + /* Init fd_array */ + + fd_array=papi_calloc(num_cpus, sizeof(struct fd_array_t)); + if (fd_array==NULL) return PAPI_ENOMEM; + + fd=open_fd(cpu_to_use[0]); + if (fd<0) { + sprintf(_rapl_vector.cmp_info.disabled_reason, + "Can't open fd for cpu0: %s",strerror(errno)); + return PAPI_ESYS; + } + + /* Verify needed MSR is readable. In a guest VM it may not be readable*/ + if (pread(fd, &result, sizeof result, MSR_RAPL_POWER_UNIT) != sizeof result ) { + strncpy(_rapl_vector.cmp_info.disabled_reason, + "Unable to access RAPL registers",PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + /* Calculate the units used */ + result=read_msr(fd,MSR_RAPL_POWER_UNIT); + + /* units are 0.5^UNIT_VALUE */ + /* which is the same as 1/(2^UNIT_VALUE) */ + + power_divisor=1<<((result>>POWER_UNIT_OFFSET)&POWER_UNIT_MASK); + cpu_energy_divisor=1<<((result>>ENERGY_UNIT_OFFSET)&ENERGY_UNIT_MASK); + time_divisor=1<<((result>>TIME_UNIT_OFFSET)&TIME_UNIT_MASK); + + /* Note! On Haswell-EP DRAM energy is fixed at 15.3uJ */ + /* see https://lkml.org/lkml/2015/3/20/582 */ + /* Knights Landing is the same */ + /* so is Broadwell-EP */ + if ( different_units ) { + dram_energy_divisor=1<<16; + } + else { + dram_energy_divisor=cpu_energy_divisor; + } + + SUBDBG("Power units = %.3fW\n",1.0/power_divisor); + SUBDBG("CPU Energy units = %.8fJ\n",1.0/cpu_energy_divisor); + SUBDBG("DRAM Energy units = %.8fJ\n",1.0/dram_energy_divisor); + SUBDBG("Time units = %.8fs\n",1.0/time_divisor); + + /* Allocate space for events */ + /* Include room for both counts and scaled values */ + + num_events= ((package_avail*num_packages) + + (pp0_avail*num_packages) + + (pp1_avail*num_packages) + + (dram_avail*num_packages) + + (psys_avail*num_packages) + + (4*num_packages)) * 2; + + rapl_native_events = (_rapl_native_event_entry_t*) + papi_calloc(num_events, sizeof(_rapl_native_event_entry_t)); + + + i = 0; + k = num_events/2; + + /* Create events for package power info */ + + for(j=0;jbeing_measured[i]=0; + } + + return PAPI_OK; +} + +static int +_rapl_start( hwd_context_t *ctx, hwd_control_state_t *ctl) +{ + _rapl_context_t* context = (_rapl_context_t*) ctx; + _rapl_control_state_t* control = (_rapl_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + + for( i = 0; i < RAPL_MAX_COUNTERS; i++ ) { + if ((control->being_measured[i]) && (control->need_difference[i])) { + context->start_value[i]=read_rapl_value(i); + } + } + + control->lastupdate = now; + + return PAPI_OK; +} + +static int +_rapl_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + /* read values */ + _rapl_context_t* context = (_rapl_context_t*) ctx; + _rapl_control_state_t* control = (_rapl_control_state_t*) ctl; + long long now = PAPI_get_real_usec(); + int i; + long long temp; + + for ( i = 0; i < RAPL_MAX_COUNTERS; i++ ) { + if (control->being_measured[i]) { + temp = read_rapl_value(i); + if (context->start_value[i]) + if (control->need_difference[i]) { + /* test for wrap around */ + if (temp < context->start_value[i] ) { + SUBDBG("Wraparound!\nstart:\t%#016x\ttemp:\t%#016x", + (unsigned)context->start_value[i], (unsigned)temp); + temp += (0x100000000 - context->start_value[i]); + SUBDBG("\tresult:\t%#016x\n", (unsigned)temp); + } else { + temp -= context->start_value[i]; + } + } + control->count[i] = convert_rapl_energy( i, temp ); + } + } + control->lastupdate = now; + return PAPI_OK; +} + +/* Shutdown a thread */ +static int +_rapl_shutdown_thread( hwd_context_t *ctx ) +{ + ( void ) ctx; + return PAPI_OK; +} + +int +_rapl_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags) +{ + (void) flags; + + _rapl_stop( ctx, ctl ); + + /* Pass back a pointer to our results */ + *events = ((_rapl_control_state_t*) ctl)->count; + + return PAPI_OK; +} + + +/* + * Clean up what was setup in rapl_init_component(). + */ +static int +_rapl_shutdown_component( void ) +{ + int i; + + if (rapl_native_events) papi_free(rapl_native_events); + if (fd_array) { + for(i=0;ibeing_measured[i]=0; + } + + for( i = 0; i < count; i++ ) { + index=native[i].ni_event&PAPI_NATIVE_AND_MASK; + native[i].ni_position=rapl_native_events[index].resources.selector - 1; + control->being_measured[index]=1; + + /* Only need to subtract if it's a PACKAGE_ENERGY or ENERGY_CNT type */ + control->need_difference[index]= + (rapl_native_events[index].type==PACKAGE_ENERGY || + rapl_native_events[index].type==DRAM_ENERGY || + rapl_native_events[index].type==PLATFORM_ENERGY || + rapl_native_events[index].type==PACKAGE_ENERGY_CNT); + } + + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_rapl_set_domain( hwd_control_state_t *ctl, int domain ) +{ + ( void ) ctl; + + /* In theory we only support system-wide mode */ + /* How to best handle that? */ + if ( PAPI_DOM_ALL != domain ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +static int +_rapl_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + ( void ) ctx; + ( void ) ctl; + + return PAPI_OK; +} + + +/* + * Native Event functions + */ +static int +_rapl_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + int index; + + switch ( modifier ) { + + case PAPI_ENUM_FIRST: + + if (num_events==0) { + return PAPI_ENOEVNT; + } + *EventCode = 0; + + return PAPI_OK; + + + case PAPI_ENUM_EVENTS: + + index = *EventCode & PAPI_NATIVE_AND_MASK; + + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + + default: + return PAPI_EINVAL; + } + + return PAPI_EINVAL; +} + +/* + * + */ +static int +_rapl_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + + int index = EventCode & PAPI_NATIVE_AND_MASK; + + if ( index >= 0 && index < num_events ) { + strncpy( name, rapl_native_events[index].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + +/* + * + */ +static int +_rapl_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, rapl_native_events[index].description, len ); + return PAPI_OK; + } + return PAPI_ENOEVNT; +} + +static int +_rapl_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; + + strncpy( info->symbol, rapl_native_events[index].name, sizeof(info->symbol)-1); + info->symbol[sizeof(info->symbol)-1] = '\0'; + + strncpy( info->long_descr, rapl_native_events[index].description, sizeof(info->long_descr)-1); + info->long_descr[sizeof(info->long_descr)-1] = '\0'; + + strncpy( info->units, rapl_native_events[index].units, sizeof(info->units)-1); + info->units[sizeof(info->units)-1] = '\0'; + + info->data_type = rapl_native_events[index].return_type; + + return PAPI_OK; +} + + + +papi_vector_t _rapl_vector = { + .cmp_info = { /* (unspecified values are initialized to 0) */ + .name = "rapl", + .short_name = "rapl", + .description = "Linux RAPL energy measurements", + .version = "5.3.0", + .default_domain = PAPI_DOM_ALL, + .default_granularity = PAPI_GRN_SYS, + .available_granularities = PAPI_GRN_SYS, + .hardware_intr_sig = PAPI_INT_SIGNAL, + .available_domains = PAPI_DOM_ALL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( _rapl_context_t ), + .control_state = sizeof ( _rapl_control_state_t ), + .reg_value = sizeof ( _rapl_register_t ), + .reg_alloc = sizeof ( _rapl_reg_alloc_t ), + }, + /* function pointers in this component */ + .init_thread = _rapl_init_thread, + .init_component = _rapl_init_component, + .init_control_state = _rapl_init_control_state, + .start = _rapl_start, + .stop = _rapl_stop, + .read = _rapl_read, + .shutdown_thread = _rapl_shutdown_thread, + .shutdown_component = _rapl_shutdown_component, + .ctl = _rapl_ctl, + + .update_control_state = _rapl_update_control_state, + .set_domain = _rapl_set_domain, + .reset = _rapl_reset, + + .ntv_enum_events = _rapl_ntv_enum_events, + .ntv_code_to_name = _rapl_ntv_code_to_name, + .ntv_code_to_descr = _rapl_ntv_code_to_descr, + .ntv_code_to_info = _rapl_ntv_code_to_info, +}; diff --git a/src/components/rapl/tests/Makefile b/src/components/rapl/tests/Makefile new file mode 100644 index 0000000..e32c057 --- /dev/null +++ b/src/components/rapl/tests/Makefile @@ -0,0 +1,40 @@ +NAME=rapl +include ../../Makefile_comp_tests.target + +TESTS = rapl_basic rapl_busy rapl_wraparound rapl_overflow + +DOLOOPS= $(testlibdir)/do_loops.o + +rapl_tests: $(TESTS) + +rapl_overflow.o: rapl_overflow.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c rapl_overflow.c + +rapl_overflow: rapl_overflow.o $(DOLOOPS) $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o rapl_overflow rapl_overflow.o $(DOLOOPS) $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +rapl_basic.o: rapl_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c rapl_basic.c -o rapl_basic.o -DBASIC_TEST=1 + +rapl_basic: rapl_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o rapl_basic rapl_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +rapl_busy.o: rapl_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c rapl_basic.c -o rapl_busy.o -DBUSY_TEST=1 + +rapl_busy: rapl_busy.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o rapl_busy rapl_busy.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +rapl_wraparound.o: rapl_basic.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c rapl_basic.c -o rapl_wraparound.o -DWRAP_TEST=1 + +rapl_wraparound: rapl_wraparound.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o rapl_wraparound rapl_wraparound.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + + +clean: + rm -f $(TESTS) *.o *~ + diff --git a/src/components/rapl/tests/rapl_basic.c b/src/components/rapl/tests/rapl_basic.c new file mode 100644 index 0000000..368c3eb --- /dev/null +++ b/src/components/rapl/tests/rapl_basic.c @@ -0,0 +1,370 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @author Vince Weaver + * + * test case for RAPL component + * + * @brief + * Tests basic functionality of RAPL component + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define MAX_RAPL_EVENTS 64 + + +#ifdef BASIC_TEST + +void run_test(int quiet) { + + if (!quiet) { + printf("Sleeping 1 second...\n"); + } + + /* Sleep */ + sleep(1); +} + +#else + +#define MATRIX_SIZE 1024 + + static double a[MATRIX_SIZE][MATRIX_SIZE]; + static double b[MATRIX_SIZE][MATRIX_SIZE]; + static double c[MATRIX_SIZE][MATRIX_SIZE]; + +/* Naive matrix multiply */ +void run_test(int quiet) { + + double s; + int i,j,k; + + if (!quiet) { + printf("Doing a naive %dx%d MMM...\n",MATRIX_SIZE,MATRIX_SIZE); + } + + for(i=0;i 1 ) { + if ( strstr( argv[1], "-w" ) ) { + do_wrap = 1; + } + } + +#endif + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all RAPL events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"rapl")) { + + rapl_cid=cid; + + if (!TESTS_QUIET) { + printf("Found rapl component at cid %d\n",rapl_cid); + } + + if (cmpinfo->disabled) { + if (!TESTS_QUIET) { + printf("RAPL component disabled: %s\n", + cmpinfo->disabled_reason); + } + test_skip(__FILE__,__LINE__,"RAPL component disabled",0); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + test_skip(__FILE__,__LINE__,"No rapl component found\n",0); + } + + /* Create EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + /* Add all events */ + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, rapl_cid ); + + while ( r == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_names[num_events] ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + retval = PAPI_get_event_info(code,&evinfo); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, + "Error getting event info\n",retval); + } + + strncpy(units[num_events],evinfo.units,sizeof(units[0])-1); + // buffer must be null terminated to safely use strstr operation on it below + units[num_events][sizeof(units[0])-1] = '\0'; + + data_type[num_events] = evinfo.data_type; + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + break; /* We've hit an event limit */ + } + num_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, rapl_cid ); + } + + values=calloc(num_events,sizeof(long long)); + if (values==NULL) { + test_fail(__FILE__, __LINE__, + "No memory",retval); + } + + if (!TESTS_QUIET) { + printf("\nStarting measurements...\n\n"); + } + + /* Start Counting */ + before_time=PAPI_get_real_nsec(); + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + /* Run test */ + run_test(TESTS_QUIET); + + /* Stop Counting */ + after_time=PAPI_get_real_nsec(); + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_stop()",retval); + } + + elapsed_time=((double)(after_time-before_time))/1.0e9; + + if (!TESTS_QUIET) { + printf("\nStopping measurements, took %.3fs, gathering results...\n\n", + elapsed_time); + + printf("Scaled energy measurements:\n"); + + for(i=0;i +#include + +#include "papi.h" + +#include "do_loops.h" +#include "papi_test.h" + +static int total = 0; /* total overflows */ + +static long long values[2]; +static long long rapl_values[2]; +static long long old_rapl_values[2] = {0,0}; +static int rapl_backward=0; + +int EventSet2=PAPI_NULL; + +int quiet=0; + +void handler( int EventSet, void *address, + long long overflow_vector, void *context ) { + + ( void ) context; + ( void ) address; + ( void ) overflow_vector; + +#if 0 + fprintf( stderr, "handler(%d ) Overflow at %p! bit=%#llx \n", + EventSet, address, overflow_vector ); +#endif + + PAPI_read(EventSet,values); + if (!quiet) printf("%lld %lld\t",values[0],values[1]); + PAPI_read(EventSet2,rapl_values); + if (!quiet) printf("RAPL: %lld %lld\n",rapl_values[0],rapl_values[1]); + + if ((rapl_values[0]name,"rapl")) { + rapl_cid=cid; + if (!TESTS_QUIET) printf("Found rapl component at cid %d\n", + rapl_cid); + if (cmpinfo->num_native_events==0) { + test_skip(__FILE__,__LINE__,"No rapl events found",0); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + test_skip(__FILE__,__LINE__,"No rapl component found\n",0); + } + + + /* add PAPI_TOT_CYC and PAPI_TOT_INS */ + retval=PAPI_create_eventset(&EventSet); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_create_eventset",retval); + } + + retval=PAPI_add_event(EventSet,PAPI_TOT_CYC); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_add_event",retval); + } + + retval=PAPI_add_event(EventSet,PAPI_TOT_INS); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_add_event",retval); + } + + /* Add some RAPL events */ + retval=PAPI_create_eventset(&EventSet2); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_create_eventset",retval); + } + + /* Add an event for each packages 0-n */ + i = 0; + do { + char buffer[80]; + sprintf(&(buffer[0]), "rapl:::PACKAGE_ENERGY:PACKAGE%d", i); + retval=PAPI_add_named_event(EventSet2,buffer); + ++i; + /* protect against insane PAPI library, the value 64 is the same value as + * RAPL_MAX_COUNTERS in linux-rapl.c, and feels reasonable. */ + } while ( 0 < retval && i < 64 ); + + PAPI_event=PAPI_TOT_CYC; + + /* arbitrary */ + mythreshold = 2000000; + if (!TESTS_QUIET) { + printf("Using %#x for the overflow event, threshold %d\n", + PAPI_event,mythreshold); + } + + /* Start the run calibration run */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_start",retval); + } + + do_ints(num_flops,TESTS_QUIET); + do_flops( 3000000 ); + + /* stop the calibration run */ + retval = PAPI_stop( EventSet, values0 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_stop",retval); + } + + + /* set up overflow handler */ + retval = PAPI_overflow( EventSet,PAPI_event,mythreshold, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_overflow",retval); + } + + /* Start overflow run */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_start",retval); + } + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_start",retval); + } + + do_ints(num_flops,TESTS_QUIET); + do_flops( num_flops ); + + /* stop overflow run */ + retval = PAPI_stop( EventSet, values1 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_stop",retval); + } + + retval = PAPI_stop( EventSet2, values2 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_stop",retval); + } + + retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_overflow",retval); + } + + retval = PAPI_event_code_to_name( PAPI_event, event_name1 ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__,"PAPI_event_code_to_name\n", retval); + } + + if (!TESTS_QUIET) { + printf("%s: %lld %lld\n",event_name1,values0[0],values1[0]); + } + + retval = PAPI_event_code_to_name( PAPI_TOT_INS, event_name1 ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__,"PAPI_event_code_to_name\n",retval); + } + + if (!TESTS_QUIET) { + printf("%s: %lld %lld\n",event_name1,values0[1],values1[1]); + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_cleanup_eventset",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__,"PAPI_destroy_eventset",retval); + } + + if (rapl_backward) { + test_fail(__FILE__, __LINE__,"RAPL counts went backward!",0); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/components/rapl/utils/Makefile b/src/components/rapl/utils/Makefile new file mode 100644 index 0000000..67cde8e --- /dev/null +++ b/src/components/rapl/utils/Makefile @@ -0,0 +1,16 @@ +CC = gcc +CFLAGS = -O2 -Wall +LFLAGS = +PAPI_INCLUDE = ../../.. +PAPI_LIBRARY = ../../../libpapi.a + +all: rapl_plot + +rapl_plot: rapl_plot.o + $(CC) $(LFLAGS) -o rapl_plot rapl_plot.o $(PAPI_LIBRARY) + +rapl_plot.o: rapl_plot.c + $(CC) $(CFLAGS) -I$(PAPI_INCLUDE) -c rapl_plot.c + +clean: + rm -f *~ *.o rapl_plot results.* diff --git a/src/components/rapl/utils/README b/src/components/rapl/utils/README new file mode 100644 index 0000000..9d8591c --- /dev/null +++ b/src/components/rapl/utils/README @@ -0,0 +1,19 @@ +This tool can be used to gather Energy measurements on a SandyBridge +chip using RAPL. + +Be sure to configure PAPI with --with-components="rapl" and have +read permissions on the /dev/cpu/*/msr files. + +It works by using PAPI to poll the RAPL stats every 100ms. +It will dump each statistic to different files, which then +can be plotted. + +The measurements (in nJ) are dumped every 100ms. +You can adjust the frequency by changing the source code. + +You can then take those files and put them into your favorite plotting +program. You might want to edit the source to remove the extra +commentary from the data, the plotting program I use ignores things +surrounded by (* brackets. + + diff --git a/src/components/rapl/utils/rapl_plot.c b/src/components/rapl/utils/rapl_plot.c new file mode 100644 index 0000000..377c192 --- /dev/null +++ b/src/components/rapl/utils/rapl_plot.c @@ -0,0 +1,229 @@ +/** + * @author Vince Weaver + */ + +#include +#include +#include +#include + +#include "papi.h" + +#define MAX_EVENTS 128 + +char events[MAX_EVENTS][BUFSIZ]; +char units[MAX_EVENTS][BUFSIZ]; +int data_type[MAX_EVENTS]; +char filenames[MAX_EVENTS][BUFSIZ]; + +FILE *fff[MAX_EVENTS]; + +static int num_events=0; + +int main (int argc, char **argv) +{ + + int retval,cid,rapl_cid=-1,numcmp; + int EventSet = PAPI_NULL; + long long values[MAX_EVENTS]; + int i,code,enum_retval; + PAPI_event_info_t evinfo; + const PAPI_component_info_t *cmpinfo = NULL; + long long start_time,before_time,after_time; + double elapsed_time,total_time; + char event_name[BUFSIZ]; + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + fprintf(stderr,"PAPI_library_init failed\n"); + exit(1); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"rapl")) { + rapl_cid=cid; + printf("Found rapl component at cid %d\n", rapl_cid); + + if (cmpinfo->disabled) { + fprintf(stderr,"No rapl events found: %s\n", + cmpinfo->disabled_reason); + exit(1); + } + break; + } + } + + /* Component not found */ + if (cid==numcmp) { + fprintf(stderr,"No rapl component found\n"); + exit(1); + } + + /* Find Events */ + code = PAPI_NATIVE_MASK; + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( enum_retval == PAPI_OK ) { + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + exit(1); + } + + printf("Found: %s\n",event_name); + strncpy(events[num_events],event_name,BUFSIZ); + sprintf(filenames[num_events],"results.%s",event_name); + + + /* Find additional event information: unit, data type */ + retval = PAPI_get_event_info(code, &evinfo); + if (retval != PAPI_OK) { + printf("Error getting event info for %#x\n",code); + exit(1); + } + + strncpy(units[num_events],evinfo.units,sizeof(units[0])-1); + /* buffer must be null terminated to safely use strstr operation on it below */ + units[num_events][sizeof(units[0])-1] = '\0'; + + data_type[num_events] = evinfo.data_type; + + num_events++; + + if (num_events==MAX_EVENTS) { + printf("Too many events! %d\n",num_events); + exit(1); + } + + enum_retval = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + + } + + + + if (num_events==0) { + printf("Error! No RAPL events found!\n"); + exit(1); + } + + /* Open output files */ + for(i=0;i +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +struct counter_info +{ + char *name; + char *description; + char *units; + unsigned long long value; +}; + +typedef struct counter_info STEALTIME_register_t; +typedef struct counter_info STEALTIME_native_event_entry_t; +typedef struct counter_info STEALTIME_reg_alloc_t; + + +struct STEALTIME_control_state +{ + long long *values; + int *which_counter; + int num_events; +}; + + +struct STEALTIME_context +{ + long long *start_count; + long long *current_count; + long long *value; +}; + + +static int num_events = 0; + +static struct counter_info *event_info=NULL; + +/* Advance declaration of buffer */ +papi_vector_t _stealtime_vector; + +/****************************************************************************** + ******** BEGIN FUNCTIONS USED INTERNALLY SPECIFIC TO THIS COMPONENT ******** + *****************************************************************************/ + +struct statinfo { + long long user; + long long nice; + long long system; + long long idle; + long long iowait; + long long irq; + long long softirq; + long long steal; + long long guest; +}; + +static int +read_stealtime( struct STEALTIME_context *context, int starting) { + + FILE *fff; + char buffer[BUFSIZ],*result; + int i,count; + struct statinfo our_stat; + + int hz=sysconf(_SC_CLK_TCK); + + + fff=fopen("/proc/stat","r"); + if (fff==NULL) { + return PAPI_ESYS; + } + + for(i=0;istart_count[i]=our_stat.steal; + } + context->current_count[i]=our_stat.steal; + + /* convert to us */ + context->value[i]=(context->current_count[i]-context->start_count[i])* + (1000000/hz); + } + + + fclose(fff); + + return PAPI_OK; + +} + + + +/***************************************************************************** + ******************* BEGIN PAPI's COMPONENT REQUIRED FUNCTIONS ************* + *****************************************************************************/ + +/* + * Component setup and shutdown + */ + +static int +_stealtime_init_component( int cidx ) +{ + + (void)cidx; + + FILE *fff; + char buffer[BUFSIZ],*result,string[BUFSIZ]; + int i; + + /* Make sure /proc/stat exists */ + fff=fopen("/proc/stat","r"); + if (fff==NULL) { + strncpy(_stealtime_vector.cmp_info.disabled_reason, + "Cannot open /proc/stat",PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + num_events=0; + while(1) { + result=fgets(buffer,BUFSIZ,fff); + if (result==NULL) break; + + /* /proc/stat line with cpu stats always starts with "cpu" */ + + if (!strncmp(buffer,"cpu",3)) { + num_events++; + } + else { + break; + } + + } + + fclose(fff); + + if (num_events<1) { + strncpy(_stealtime_vector.cmp_info.disabled_reason, + "Cannot find enough CPU lines in /proc/stat", + PAPI_MAX_STR_LEN); + return PAPI_ESYS; + } + + event_info=calloc(num_events,sizeof(struct counter_info)); + if (event_info==NULL) { + return PAPI_ENOMEM; + } + + + sysconf(_SC_CLK_TCK); + event_info[0].name=strdup("TOTAL"); + event_info[0].description=strdup("Total amount of steal time"); + event_info[0].units=strdup("us"); + + for(i=1;istart_count=calloc(num_events,sizeof(long long)); + if (context->start_count==NULL) return PAPI_ENOMEM; + + context->current_count=calloc(num_events,sizeof(long long)); + if (context->current_count==NULL) return PAPI_ENOMEM; + + context->value=calloc(num_events,sizeof(long long)); + if (context->value==NULL) return PAPI_ENOMEM; + + return PAPI_OK; +} + + +/* + * + */ +static int +_stealtime_shutdown_component( void ) +{ + int i; + int num_events = _stealtime_vector.cmp_info.num_native_events; + if (event_info!=NULL) { + for (i=0; istart_count!=NULL) free(context->start_count); + if (context->current_count!=NULL) free(context->current_count); + if (context->value!=NULL) free(context->value); + + return PAPI_OK; +} + + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) functions + */ +static int +_stealtime_init_control_state( hwd_control_state_t *ctl ) +{ + + struct STEALTIME_control_state *control = + (struct STEALTIME_control_state *)ctl; + + control->values=NULL; + control->which_counter=NULL; + control->num_events=0; + + return PAPI_OK; +} + + +/* + * + */ +static int +_stealtime_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + + struct STEALTIME_control_state *control; + + ( void ) ctx; + int i, index; + + control= (struct STEALTIME_control_state *)ctl; + + if (count!=control->num_events) { + // printf("Resizing %d to %d\n",control->num_events,count); + control->which_counter=realloc(control->which_counter, + count*sizeof(int)); + control->values=realloc(control->values, + count*sizeof(long long)); + + } + + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + control->which_counter[i]=index; + native[i].ni_position = i; + } + + control->num_events=count; + + return PAPI_OK; +} + + +/* + * + */ +static int +_stealtime_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void)ctl; + + // struct STEALTIME_control_state *control; + struct STEALTIME_context *context; + + //control = (struct STEALTIME_control_state *)ctl; + context = (struct STEALTIME_context *)ctx; + + read_stealtime( context, 1 ); + + /* no need to update control, as we assume only one EventSet */ + /* is active at once, so starting things at the context level */ + /* is fine, since stealtime is system-wide */ + + return PAPI_OK; +} + + +/* + * + */ +static int +_stealtime_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + (void) ctl; + + // struct STEALTIME_control_state *control; + struct STEALTIME_context *context; + + //control = (struct STEALTIME_control_state *)ctl; + context = (struct STEALTIME_context *)ctx; + + read_stealtime( context, 0 ); + + return PAPI_OK; + +} + + + +/* + * + */ +static int +_stealtime_read( hwd_context_t *ctx, hwd_control_state_t *ctl, + long long **events, int flags ) +{ + ( void ) flags; + + struct STEALTIME_control_state *control; + struct STEALTIME_context *context; + + int i; + + control = (struct STEALTIME_control_state *)ctl; + context = (struct STEALTIME_context *)ctx; + + read_stealtime( context, 0 ); + + for(i=0;inum_events;i++) { + control->values[i]= + context->value[control->which_counter[i]]; + } + + *events = control->values; + + return PAPI_OK; + +} + + + + +/* + * + */ +static int +_stealtime_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl ) +{ + + /* re-initializes counter_start values to current */ + + _stealtime_start(ctx,ctrl); + + return PAPI_OK; +} + + +/* + * Unused stealtime write function + */ +/* static int */ +/* _stealtime_write( hwd_context_t * ctx, hwd_control_state_t * ctrl, long long *from ) */ +/* { */ +/* ( void ) ctx; */ +/* ( void ) ctrl; */ +/* ( void ) from; */ + +/* return PAPI_OK; */ +/* } */ + + +/* + * Functions for setting up various options + */ + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +static int +_stealtime_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) +{ + ( void ) ctx; + ( void ) code; + ( void ) option; + + return PAPI_OK; +} + + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +static int +_stealtime_set_domain( hwd_control_state_t * cntrl, int domain ) +{ + ( void ) cntrl; + int found = 0; + if ( PAPI_DOM_USER & domain ) { + found = 1; + } + if ( PAPI_DOM_KERNEL & domain ) { + found = 1; + } + if ( PAPI_DOM_OTHER & domain ) { + found = 1; + } + if ( !found ) + return PAPI_EINVAL; + + return PAPI_OK; +} + + +/* + * + */ +static int +_stealtime_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, event_info[event].name, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + +/* + * + */ +static int +_stealtime_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + + int event=EventCode; + + if (event >=0 && event < num_events) { + strncpy( name, event_info[event].description, len ); + return PAPI_OK; + } + + return PAPI_ENOEVNT; +} + + + +static int +_stealtime_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; + + strncpy( info->symbol, event_info[index].name,sizeof(info->symbol)); + info->symbol[sizeof(info->symbol)-1] = '\0'; + + strncpy( info->long_descr, event_info[index].description,sizeof(info->symbol)); + info->long_descr[sizeof(info->symbol)-1] = '\0'; + + strncpy( info->units, event_info[index].units,sizeof(info->units)); + info->units[sizeof(info->units)-1] = '\0'; + + return PAPI_OK; + +} + + + + +/* + * + */ +static int +_stealtime_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + if ( modifier == PAPI_ENUM_FIRST ) { + if (num_events==0) return PAPI_ENOEVNT; + *EventCode = 0; + return PAPI_OK; + } + + if ( modifier == PAPI_ENUM_EVENTS ) { + int index; + + index = *EventCode; + + if ( (index+1) < num_events ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + } + + return PAPI_EINVAL; +} + + +/* + * + */ +papi_vector_t _stealtime_vector = { + .cmp_info = { + /* component information (unspecified values initialized to 0) */ + .name = "stealtime", + .short_name="stealtime", + .version = "5.0", + .description = "Stealtime filesystem statistics", + .default_domain = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + }, + + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( struct STEALTIME_context ), + .control_state = sizeof ( struct STEALTIME_control_state ), + .reg_value = sizeof ( STEALTIME_register_t ), + .reg_alloc = sizeof ( STEALTIME_reg_alloc_t ), + }, + + /* function pointers in this component */ + .init_thread = _stealtime_init_thread, + .init_component = _stealtime_init_component, + .init_control_state = _stealtime_init_control_state, + .start = _stealtime_start, + .stop = _stealtime_stop, + .read = _stealtime_read, + .shutdown_thread = _stealtime_shutdown_thread, + .shutdown_component = _stealtime_shutdown_component, + .ctl = _stealtime_ctl, + .update_control_state = _stealtime_update_control_state, + .set_domain = _stealtime_set_domain, + .reset = _stealtime_reset, + + .ntv_enum_events = _stealtime_ntv_enum_events, + .ntv_code_to_name = _stealtime_ntv_code_to_name, + .ntv_code_to_descr = _stealtime_ntv_code_to_descr, + .ntv_code_to_info = _stealtime_ntv_code_to_info, +}; + + + + diff --git a/src/components/stealtime/tests/Makefile b/src/components/stealtime/tests/Makefile new file mode 100644 index 0000000..fc8dcc1 --- /dev/null +++ b/src/components/stealtime/tests/Makefile @@ -0,0 +1,20 @@ +NAME=stealtime +include ../../Makefile_comp_tests.target + +%.o:%.c + $(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = stealtime_basic + +stealtime_tests: $(TESTS) + +stealtime_basic: stealtime_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(INCLUDE) -o stealtime_basic stealtime_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/stealtime/tests/stealtime_basic.c b/src/components/stealtime/tests/stealtime_basic.c new file mode 100644 index 0000000..a303fbf --- /dev/null +++ b/src/components/stealtime/tests/stealtime_basic.c @@ -0,0 +1,131 @@ +/** + * @author Vince Weaver + * + * test case for stealtime component + * + * + * @brief + * Tests basic stealtime functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!quiet) { + printf("Trying all stealtime events\n"); + } + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"stealtime")) { + if (!quiet) printf("\tFound stealtime component %d - %s\n", cid, cmpinfo->name); + } + else { + continue; + } + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!quiet) printf(" %s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!quiet) printf(" value: %lld\n",values[0]); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No stealtime events found",0); + } + + if (!quiet) { + printf("Note: for this test the values are expected to all be 0\n\t unless run inside a VM on a busy system.\n"); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/vmware/Makefile.vmware.in b/src/components/vmware/Makefile.vmware.in new file mode 100644 index 0000000..409a3d6 --- /dev/null +++ b/src/components/vmware/Makefile.vmware.in @@ -0,0 +1,2 @@ +VMWARE_INCDIR = @VMWARE_INCDIR@ +VMGUESTLIB = @VMGUESTLIB@ \ No newline at end of file diff --git a/src/components/vmware/PAPI-VMwareComponentDocument.pdf b/src/components/vmware/PAPI-VMwareComponentDocument.pdf new file mode 100644 index 0000000..5656cee Binary files /dev/null and b/src/components/vmware/PAPI-VMwareComponentDocument.pdf differ diff --git a/src/components/vmware/README b/src/components/vmware/README new file mode 100644 index 0000000..0106a7d --- /dev/null +++ b/src/components/vmware/README @@ -0,0 +1,20 @@ +/** +* @file: README +* CVS: $Id$ +* @author: Dan Terpstra +* terpstra@icl.utk.edu +* @defgroup papi_components Components +* @brief Component Specific Readme file: VMware +*/ + +/** @page component_readme Component Readme + +@section Component Specific Information + +vmware/ + +To make the generic VMware component do --with-vmware_incdir=< path_to_VMWare_Guest_SDK > from the component directory. + +For further information see the VMwareComponentDocument.txt file in the component directory, or the ComponentGuide pdf file. + +*/ diff --git a/src/components/vmware/Rules.vmware b/src/components/vmware/Rules.vmware new file mode 100644 index 0000000..149fcd6 --- /dev/null +++ b/src/components/vmware/Rules.vmware @@ -0,0 +1,10 @@ +include components/vmware/Makefile.vmware + +COMPSRCS += components/vmware/vmware.c +COMPOBJS += vmware.o + +CFLAGS += -I$(VMWARE_INCDIR) -DVMGUESTLIB=$(VMGUESTLIB) -DVMWARE_INCDIR=\"$(VMWARE_INCDIR)\" +LDFLAGS += $(LDL) + +vmware.o: components/vmware/vmware.c $(HEADERS) + $(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/vmware/vmware.c -o vmware.o diff --git a/src/components/vmware/VMwareComponentDocument.txt b/src/components/vmware/VMwareComponentDocument.txt new file mode 100644 index 0000000..49da3c7 --- /dev/null +++ b/src/components/vmware/VMwareComponentDocument.txt @@ -0,0 +1,188 @@ +PAPI-V VMware Component Document +Matthew R. Johnson +John Nelson +21 November 2011 +Revised: 23 January 2012 + + +This document is intended to detail the features of the PAPI-V VMware component, and more specifically the installation, usage, and pseudo performance counters available. In order to make this component possible, extensive research into the actual counters available, as well as the leveraging of the VMware Guest API1, was needed. As this is the first of the PAPI-V components, we seem to be stepping into a new realm of performance measurements that, previously, has been a new frontier, or unexplored all-together. + + +Installation: + +To make PAPI with the VMware component you must go to the PAPI_ROOT/papi/src/components/vmware directory and configure with the flag: --with-vmware_incdir=, where is the path to the VMware Guest SDK for your machine. + +NOTE: The VMware Guest SDK is normally found in the following default vmware-tools path: + + /usr/lib/vmware-tools/GuestSDK + +or: + + /opt/GuestSDK + + e.g.: ./configure --with-vmware_incdir=/usr/lib/vmware-tools/GuestSDK + + +After running configure in the vmware directory, go to PAPI_CVS_ROOT/papi/src and configure again using the flag: + + --with-components=vmware + + e.g.: ./configure --with-components=vmware + +After running the main configure script you can then type make, the Makefiles have been automatically generated. If at any point you would like to uninstall PAPI and the VMware comonent, from the PAPI_ROOT/papi/src directory, just type: + + make clean clobber + +To make use of VMWare timekeeping pseudo-performance counters, the following configuration must be added through the vSphere client: + + monitor_control.pseudo_perfctr = TRUE + +As well as adding the + + --with-vmware_pseudo_perfctr + + WARNING: If you do not enable the monitor_control.pseudo_perfctr on the host side, and give configure the --with-vmware_pseudo_perfctr, you will get a segmentation fault upon readpmc trying to access protected memory wiothout priveledged access. This is expected behavior. + +flag during component configure in the vmware component directory. + +Available Performance Counters: + +Below is the list of available performance metrics available to PAPI through the VMware component. If at any time you would like to see a full list of counters available to PAPI type ./papi_native_avail from within the utils directory. + +It is important to know that the counters VMWARE_HOST_TSC, VMWARE_ELAPSED_TIME, and VMWARE_ELAPSED_APPARENT are currently the only true to name register counters available from withing a VMware virtual machine. Any Guest OS running on a VMware host must have the access enabled from within the VMware vSphere client managing the system for each virtual machine that wishes to use the VMware component, this exposes the counters to the virtual machine. All other counters you will see in the following lost are software counters that are being exposed through the use of the VMware API1. + + + +Event Code | Symbol | Long Description | +-------------------------------------------------------------------------------- +0x44000000 | VMWARE_HOST_TSC | Physical host TSC | +-------------------------------------------------------------------------------- +0x44000001 | VMWARE_ELAPSED_TIME | Elapsed real time in ns. | +-------------------------------------------------------------------------------- +0x44000002 | VMWARE_ELAPSED_APPARENT | Elapsed apparent time in ns. | +-------------------------------------------------------------------------------- +0x44000003 | VMWARE_CPU_LIMIT | Retrieves the upper limit of processor use in | + | MHz available to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000004 | VMWARE_CPU_RESERVATION | Retrieves the minimum processing power | + | in MHz reserved for the virtual machine. | +-------------------------------------------------------------------------------- +0x44000005 | VMWARE_CPU_SHARES | Retrieves the number of CPU shares allocated | + | to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000006 | VMWARE_CPU_STOLEN | Retrieves the number of milliseconds that th | + | e virtual machine was in a ready state (able to transition to a r | + | un state), but was not scheduled to run. | +-------------------------------------------------------------------------------- +0x44000007 | VMWARE_CPU_USED | Retrieves the number of milliseconds during wh | + | ich the virtual machine has used the CPU. This value includes the | + | time used by the guest operating system and the time used by vir | + | tualization code for tasks for this virtual machine. You can comb | + | ine this value with the elapsed time (VMWARE_ELAPSED) to estimate | + | the effective virtual machine CPU speed. This value is a subset | + | of elapsedMs. | +-------------------------------------------------------------------------------- +0x44000008 | VMWARE_ELAPSED | Retrieves the number of milliseconds that have | + | passed in the virtual machine since it last started running on th | + | e server. The count of elapsed time restarts each time the virtua | + | l machine is powered on, resumed, or migrated using VMotion. This | + | value counts milliseconds, regardless of whether the virtual mac | + | hine is using processing power during that time. You can combine | + | this value with the CPU time used by the virtual machine (VMWARE_ | + | CPU_USED) to estimate the effective virtual machine xCPU speed. c | + | puUsedMS is a subset of this value. | +-------------------------------------------------------------------------------- +0x44000009 | VMWARE_MEM_ACTIVE | Retrieves the amount of memory the virtual m | + | achine is actively using in MB€”its estimated working set size. | +-------------------------------------------------------------------------------- +0x4400000a | VMWARE_MEM_BALLOONED | Retrieves the amount of memory that has b | + | een reclaimed from this virtual machine by the vSphere memory bal | + | loon driver (also referred to as the “vmmemctl” driver) in MB. | +-------------------------------------------------------------------------------- +0x4400000b | VMWARE_MEM_LIMIT | Retrieves the upper limit of memory that is a | + | vailable to the virtual machine in MB. | +-------------------------------------------------------------------------------- +0x4400000c | VMWARE_MEM_MAPPED | Retrieves the amount of memory that is alloc | + | ated to the virtual machine in MB. Memory that is ballooned, swap | + | ped, or has never been accessed is excluded. | +-------------------------------------------------------------------------------- +0x4400000d | VMWARE_MEM_OVERHEAD | Retrieves the amount of €œoverhead mem | + | ory associated with this virtual machine that is currently consum | + | ed on the host system in MB. Overhead memory is additional memory | + | that is reserved for data structures required by the virtualizat | + | ion layer. | +-------------------------------------------------------------------------------- +0x4400000e | VMWARE_MEM_RESERVATION | Retrieves the minimum amount of memory | + | that is reserved for the virtual machine in MB. | +-------------------------------------------------------------------------------- +0x4400000f | VMWARE_MEM_SHARED | Retrieves the amount of physical memory asso | + | ciated with this virtual machine that is copy €on €write (COW) | + | shared on the host in MB. | +-------------------------------------------------------------------------------- +0x44000010 | VMWARE_MEM_SHARES | Retrieves the number of memory shares alloca | + | ted to the virtual machine. | +-------------------------------------------------------------------------------- +0x44000011 | VMWARE_MEM_SWAPPED | Retrieves the amount of memory that has bee | + | n reclaimed from this virtual machine by transparently swapping g | + | uest memory to disk in MB. | +-------------------------------------------------------------------------------- +0x44000012 | VMWARE_MEM_TARGET_SIZE | Retrieves the size of the target memory | + | allocation for this virtual machine in MB. | +-------------------------------------------------------------------------------- +0x44000013 | VMWARE_MEM_USED | Retrieves the estimated amount of physical hos | + | t memory currently consumed for this virtual machine’s physical | + | memory. | +-------------------------------------------------------------------------------- +0x44000014 | VMWARE_HOST_CPU | Retrieves the speed of the ESX system’€™s phys | + | ical CPU in MHz. | + + + + +Timekeeping Counters: + + + The pseudo-performance counter feature uses a trap to catch a privileged machine instruction issued by software running in the virtual machine and therefore has more overhead than reading a performance counter or the TSC on physical hardware. The feature will only trap correctly if the configuration setting is applied as described in Installation. + + +The timekeeping counters behave as follows: + + +VMWARE_HOST_TSC - Provides access the the Time Stamp Counter on the host machine which counts ticks since reset. +VMWARE_ELAPSED_TIME - Provides access to the elapsed time in ns since reset as measured on the host machine. +VMWARE_ELAPSED_APPARENT - Apparent time is the time visible the Guest OS using virtualized timer devices. This timer may fall behind real time and catch up as needed. + + +Usage: + + + After installation of the VMware Component, you may use the papi_commmand_line interface, found in PAPI_ROOT/papi/src/utils to read out an instantaneous value from a certain counter from the above list. If you would like to read out a counter, type: ./papi_command_line COUNTER_SYMBOL_NAME. + + +e.g.: To read out the value of the VMWARE_MEM_USED counter + + +user@vm1:~/papi/src/utils$ ./papi_command_line VMWARE_MEM_USED +Successfully added: VMWARE_MEM_USED + + +VMWARE_MEM_USED : 13074 + + +---------------------------------- +Verification: Checks for valid event name. +This utility lets you add events from the command line interface to see if they work. +command_line.c PASSED + + +For further usage of PAPI and it’s API on how to incorporate these counters into a program of your own please see the PAPI Documentation2. + + +________________ + + +References: + + +[1] VMware: http://www.vmware.com/support/developer/guest-sdk. Last accessed November 28, 2011 + [2] PAPI : http://icl.cs.utk.edu/projects/papi/wiki/Main_Page. Last accessed November 28, 2011 diff --git a/src/components/vmware/configure b/src/components/vmware/configure new file mode 100755 index 0000000..525c837 --- /dev/null +++ b/src/components/vmware/configure @@ -0,0 +1,4308 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.67. +# +# +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, +# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software +# Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + # We cannot yet assume a decent shell, so we have to provide a + # neutralization value for shells without unset; and this also + # works around shells that cannot unset nonexistent variables. + BASH_ENV=/dev/null + ENV=/dev/null + (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in #( + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= +PACKAGE_URL= + +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +VMGUESTLIB +VMWARE_INCDIR +EGREP +GREP +CPP +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_vmware_incdir +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used" >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) +--with-vmware_incdir= Specify path to VMware GuestSDK includes + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +configure +generated by GNU Autoconf 2.67 + +Copyright (C) 2010 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval "test \"\${$3+set}\"" = set; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval "test \"\${$3+set}\"" = set; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval "test \"\${$3+set}\"" = set; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval "test \"\${$3+set}\"" = set; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;} + +} # ac_fn_c_check_header_compile +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.67. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5 ; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5 ; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5 ; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5 ; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5 ; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if test "${ac_cv_objext+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5 ; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if test "${ac_cv_c_compiler_gnu+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if test "${ac_cv_prog_cc_g+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if test "${ac_cv_prog_cc_c89+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5 ; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if test "${ac_cv_path_GREP+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if test "${ac_cv_path_EGREP+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if test "${ac_cv_header_stdc+set}" = set; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + +# Check whether --with-vmware_incdir was given. +if test "${with_vmware_incdir+set}" = set; then : + withval=$with_vmware_incdir; VMWARE_INCDIR=$withval + CFLAGS="$CFLAGS -I$withval" + ac_fn_c_check_header_mongrel "$LINENO" "vmGuestLib.h" "ac_cv_header_vmGuestLib_h" "$ac_includes_default" +if test "x$ac_cv_header_vmGuestLib_h" = x""yes; then : + VMGUESTLIB=1 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: vmGuestLib.h not found" >&5 +$as_echo "$as_me: WARNING: vmGuestLib.h not found" >&2;} +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Component requires path to vmware includes" >&5 +$as_echo "$as_me: WARNING: Component requires path to vmware includes" >&2;} +fi + + + + +ac_config_files="$ac_config_files Makefile.vmware" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + test "x$cache_file" != "x/dev/null" && + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + cat confcache >$cache_file + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Transform confdefs.h into DEFS. +# Protect against shell expansion while executing Makefile rules. +# Protect against Makefile macro expansion. +# +# If the first sed substitution is executed (which looks for macros that +# take arguments), then branch to the quote section. Otherwise, +# look for a macro that doesn't take arguments. +ac_script=' +:mline +/\\$/{ + N + s,\\\n,, + b mline +} +t clear +:clear +s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g +t quote +s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g +t quote +b any +:quote +s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g +s/\[/\\&/g +s/\]/\\&/g +s/\$/$$/g +H +:any +${ + g + s/^\n// + s/\n/ /g + p +} +' +DEFS=`sed -n "$ac_script" confdefs.h` + + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -p'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -p' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -p' + fi +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +if test -x / >/dev/null 2>&1; then + as_test_x='test -x' +else + if ls -dL / >/dev/null 2>&1; then + as_ls_L_option=L + else + as_ls_L_option= + fi + as_test_x=' + eval sh -c '\'' + if test -d "$1"; then + test -d "$1/."; + else + case $1 in #( + -*)set "./$1";; + esac; + case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( + ???[sx]*):;;*)false;;esac;fi + '\'' sh + ' +fi +as_executable_p=$as_test_x + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by $as_me, which was +generated by GNU Autoconf 2.67. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + +Configuration files: +$config_files + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.67, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2010 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h | --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "Makefile.vmware") CONFIG_FILES="$CONFIG_FILES Makefile.vmware" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5 ;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= + trap 'exit_status=$? + { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + + +eval set X " :F $CONFIG_FILES " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5 ;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5 ;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$tmp/stdin" + case $ac_file in + -) cat "$tmp/out" && rm -f "$tmp/out";; + *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/components/vmware/configure.in b/src/components/vmware/configure.in new file mode 100644 index 0000000..b3ccd11 --- /dev/null +++ b/src/components/vmware/configure.in @@ -0,0 +1,15 @@ +AC_INIT +AC_ARG_WITH(vmware_incdir, + [--with-vmware_incdir= Specify path to VMware GuestSDK includes], + [VMWARE_INCDIR=$withval + CFLAGS="$CFLAGS -I$withval" + AC_CHECK_HEADER([vmGuestLib.h], + [VMGUESTLIB=1], + [AC_MSG_WARN([vmGuestLib.h not found])], + )], + [AC_MSG_WARN([Component requires path to vmware includes])]) + +AC_SUBST(VMWARE_INCDIR) +AC_SUBST(VMGUESTLIB) +AC_CONFIG_FILES([Makefile.vmware]) +AC_OUTPUT diff --git a/src/components/vmware/tests/Makefile b/src/components/vmware/tests/Makefile new file mode 100644 index 0000000..9dbc67b --- /dev/null +++ b/src/components/vmware/tests/Makefile @@ -0,0 +1,20 @@ +NAME=vmware +include ../../Makefile_comp_tests + +%.o:%.c + $(CC) $(CFLAGS) $(INCLUDE) -c -o $@ $< + +TESTS = vmware_basic + +vmware_tests: $(TESTS) + +vmware_basic: vmware_basic.o $(UTILOBJS) $(PAPILIB) + $(CC) $(CFLAGS) $(INCLUDE) -o vmware_basic vmware_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) + +clean: + rm -f $(TESTS) *.o + + + + + diff --git a/src/components/vmware/tests/vmware_basic.c b/src/components/vmware/tests/vmware_basic.c new file mode 100644 index 0000000..560ff5b --- /dev/null +++ b/src/components/vmware/tests/vmware_basic.c @@ -0,0 +1,150 @@ +/** + * @author Vince Weaver + * + * test case for vmware component + * + * + * @brief + * Tests basic vmware functionality + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM_EVENTS 1 + +int main (int argc, char **argv) +{ + + int retval,cid,numcmp; + int EventSet = PAPI_NULL; + long long values[NUM_EVENTS]; + int code; + char event_name[PAPI_MAX_STR_LEN]; + int total_events=0; + int r; + const PAPI_component_info_t *cmpinfo = NULL; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* PAPI Initialization */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__,"PAPI_library_init failed\n",retval); + } + + if (!TESTS_QUIET) { + printf("Trying all vmware events\n"); + } + + /* Find our Component */ + + numcmp = PAPI_num_components(); + + for(cid=0; cidname,"vmware")) { + if (!TESTS_QUIET) printf("\tFound vmware component %d - %s\n", cid, cmpinfo->name); + } + else { + continue; + } + + PAPI_event_info_t info; + + /* Try all events one by one */ + + code = PAPI_NATIVE_MASK; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_FIRST, cid ); + + while ( r == PAPI_OK ) { + + retval=PAPI_get_event_info(code,&info); + if (retval!=PAPI_OK) { + printf("Error getting event info\n"); + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info", retval ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + printf("Error translating %#x\n",code); + test_fail( __FILE__, __LINE__, + "PAPI_event_code_to_name", retval ); + } + + if (!TESTS_QUIET) printf(" %s ",event_name); + + EventSet = PAPI_NULL; + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_create_eventset()",retval); + } + + retval = PAPI_add_event( EventSet, code ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_add_event()",retval); + } + + /* start */ + retval = PAPI_start( EventSet); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + /* do something */ + usleep(100); + + /* stop */ + retval = PAPI_stop( EventSet, values); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "PAPI_start()",retval); + } + + if (!TESTS_QUIET) printf(" value: %lld %s\n",values[0], + info.units); + + retval = PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_cleanup_eventset()",retval); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, + "PAPI_destroy_eventset()",retval); + } + + total_events++; + + r = PAPI_enum_cmp_event( &code, PAPI_ENUM_EVENTS, cid ); + } + } + + if (total_events==0) { + test_skip(__FILE__,__LINE__,"No vmware events found",0); + } + + if (!TESTS_QUIET) { + printf("\n"); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/components/vmware/vmware.c b/src/components/vmware/vmware.c new file mode 100644 index 0000000..b858601 --- /dev/null +++ b/src/components/vmware/vmware.c @@ -0,0 +1,1285 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/** + * @file mware.c + * @author Matt Johnson + * mrj@eecs.utk.edu + * @author John Nelson + * jnelso37@eecs.utk.edu + * @author Vince Weaver + * vweaver1@eecs.utk.edu + * + * @ingroup papi_components + * + * VMware component + * + * @brief + * This is the VMware component for PAPI-V. It will allow user access to + * hardware information available from a VMware virtual machine. + */ + +#include +#include +#include +#include + +#include +#include + +/* Headers required by PAPI */ +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" + +#define VMWARE_MAX_COUNTERS 256 + +#define VMWARE_CPU_LIMIT_MHZ 0 +#define VMWARE_CPU_RESERVATION_MHZ 1 +#define VMWARE_CPU_SHARES 2 +#define VMWARE_CPU_STOLEN_MS 3 +#define VMWARE_CPU_USED_MS 4 +#define VMWARE_ELAPSED_MS 5 + +#define VMWARE_MEM_ACTIVE_MB 6 +#define VMWARE_MEM_BALLOONED_MB 7 +#define VMWARE_MEM_LIMIT_MB 8 +#define VMWARE_MEM_MAPPED_MB 9 +#define VMWARE_MEM_OVERHEAD_MB 10 +#define VMWARE_MEM_RESERVATION_MB 11 +#define VMWARE_MEM_SHARED_MB 12 +#define VMWARE_MEM_SHARES 13 +#define VMWARE_MEM_SWAPPED_MB 14 +#define VMWARE_MEM_TARGET_SIZE_MB 15 +#define VMWARE_MEM_USED_MB 16 + +#define VMWARE_HOST_CPU_MHZ 17 + +/* The following 3 require VMWARE_PSEUDO_PERFORMANCE env_var to be set. */ + +#define VMWARE_HOST_TSC 18 +#define VMWARE_ELAPSED_TIME 19 +#define VMWARE_ELAPSED_APPARENT 20 + +/* Begin PAPI definitions */ +papi_vector_t _vmware_vector; + + +void (*_dl_non_dynamic_init)(void) __attribute__((weak)); + +/** Structure that stores private information for each event */ +struct _vmware_register { + unsigned int selector; + /**< Signifies which counter slot is being used */ + /**< Indexed from 1 as 0 has a special meaning */ +}; + +/** This structure is used to build the table of events */ +struct _vmware_native_event_entry { + char name[PAPI_MAX_STR_LEN]; /**< Name of the counter */ + char description[PAPI_HUGE_STR_LEN]; /**< Description of counter */ + char units[PAPI_MIN_STR_LEN]; + int which_counter; + int report_difference; +}; + +struct _vmware_reg_alloc { + struct _vmware_register ra_bits; +}; + + +inline uint64_t rdpmc(int c) +{ + uint32_t low, high; + __asm__ __volatile__("rdpmc" : "=a" (low), "=d" (high) : "c" (c)); + return (uint64_t)high << 32 | (uint64_t)low; +} + + + +#ifdef VMGUESTLIB +/* Headers required by VMware */ +#include "vmGuestLib.h" + +/* Functions to dynamically load from the GuestLib library. */ +char const * (*GuestLib_GetErrorText)(VMGuestLibError); +VMGuestLibError (*GuestLib_OpenHandle)(VMGuestLibHandle*); +VMGuestLibError (*GuestLib_CloseHandle)(VMGuestLibHandle); +VMGuestLibError (*GuestLib_UpdateInfo)(VMGuestLibHandle handle); +VMGuestLibError (*GuestLib_GetSessionId)(VMGuestLibHandle handle, VMSessionId *id); +VMGuestLibError (*GuestLib_GetCpuReservationMHz)(VMGuestLibHandle handle, uint32 *cpuReservationMHz); +VMGuestLibError (*GuestLib_GetCpuLimitMHz)(VMGuestLibHandle handle, uint32 *cpuLimitMHz); +VMGuestLibError (*GuestLib_GetCpuShares)(VMGuestLibHandle handle, uint32 *cpuShares); +VMGuestLibError (*GuestLib_GetCpuUsedMs)(VMGuestLibHandle handle, uint64 *cpuUsedMs); +VMGuestLibError (*GuestLib_GetHostProcessorSpeed)(VMGuestLibHandle handle, uint32 *mhz); +VMGuestLibError (*GuestLib_GetMemReservationMB)(VMGuestLibHandle handle, uint32 *memReservationMB); +VMGuestLibError (*GuestLib_GetMemLimitMB)(VMGuestLibHandle handle, uint32 *memLimitMB); +VMGuestLibError (*GuestLib_GetMemShares)(VMGuestLibHandle handle, uint32 *memShares); +VMGuestLibError (*GuestLib_GetMemMappedMB)(VMGuestLibHandle handle, uint32 *memMappedMB); +VMGuestLibError (*GuestLib_GetMemActiveMB)(VMGuestLibHandle handle, uint32 *memActiveMB); +VMGuestLibError (*GuestLib_GetMemOverheadMB)(VMGuestLibHandle handle, uint32 *memOverheadMB); +VMGuestLibError (*GuestLib_GetMemBalloonedMB)(VMGuestLibHandle handle, uint32 *memBalloonedMB); +VMGuestLibError (*GuestLib_GetMemSwappedMB)(VMGuestLibHandle handle, uint32 *memSwappedMB); +VMGuestLibError (*GuestLib_GetMemSharedMB)(VMGuestLibHandle handle, uint32 *memSharedMB); +VMGuestLibError (*GuestLib_GetMemSharedSavedMB)(VMGuestLibHandle handle, uint32 *memSharedSavedMB); +VMGuestLibError (*GuestLib_GetMemUsedMB)(VMGuestLibHandle handle, uint32 *memUsedMB); +VMGuestLibError (*GuestLib_GetElapsedMs)(VMGuestLibHandle handle, uint64 *elapsedMs); +VMGuestLibError (*GuestLib_GetResourcePoolPath)(VMGuestLibHandle handle, size_t *bufferSize, char *pathBuffer); +VMGuestLibError (*GuestLib_GetCpuStolenMs)(VMGuestLibHandle handle, uint64 *cpuStolenMs); +VMGuestLibError (*GuestLib_GetMemTargetSizeMB)(VMGuestLibHandle handle, uint64 *memTargetSizeMB); +VMGuestLibError (*GuestLib_GetHostNumCpuCores)(VMGuestLibHandle handle, uint32 *hostNumCpuCores); +VMGuestLibError (*GuestLib_GetHostCpuUsedMs)(VMGuestLibHandle handle, uint64 *hostCpuUsedMs); +VMGuestLibError (*GuestLib_GetHostMemSwappedMB)(VMGuestLibHandle handle, uint64 *hostMemSwappedMB); +VMGuestLibError (*GuestLib_GetHostMemSharedMB)(VMGuestLibHandle handle, uint64 *hostMemSharedMB); +VMGuestLibError (*GuestLib_GetHostMemUsedMB)(VMGuestLibHandle handle, uint64 *hostMemUsedMB); +VMGuestLibError (*GuestLib_GetHostMemPhysMB)(VMGuestLibHandle handle, uint64 *hostMemPhysMB); +VMGuestLibError (*GuestLib_GetHostMemPhysFreeMB)(VMGuestLibHandle handle, uint64 *hostMemPhysFreeMB); +VMGuestLibError (*GuestLib_GetHostMemKernOvhdMB)(VMGuestLibHandle handle, uint64 *hostMemKernOvhdMB); +VMGuestLibError (*GuestLib_GetHostMemMappedMB)(VMGuestLibHandle handle, uint64 *hostMemMappedMB); +VMGuestLibError (*GuestLib_GetHostMemUnmappedMB)(VMGuestLibHandle handle, uint64 *hostMemUnmappedMB); + + +static void *dlHandle = NULL; + + +/* + * Macro to load a single GuestLib function from the shared library. + */ + +#define LOAD_ONE_FUNC(funcname) \ +do { \ +funcname = dlsym(dlHandle, "VM" #funcname); \ +if ((dlErrStr = dlerror()) != NULL) { \ +fprintf(stderr, "Failed to load \'%s\': \'%s\'\n", \ +#funcname, dlErrStr); \ +return FALSE; \ +} \ +} while (0) + +#endif + +/** Holds control flags, usually out-of band configuration of the hardware */ +struct _vmware_control_state { + long long value[VMWARE_MAX_COUNTERS]; + int which_counter[VMWARE_MAX_COUNTERS]; + int num_events; +}; + +/** Holds per-thread information */ +struct _vmware_context { + long long values[VMWARE_MAX_COUNTERS]; + long long start_values[VMWARE_MAX_COUNTERS]; +#ifdef VMGUESTLIB + VMGuestLibHandle glHandle; +#endif +}; + + + + + + +/* + *----------------------------------------------------------------------------- + * + * LoadFunctions -- + * + * Load the functions from the shared library. + * + * Results: + * TRUE on success + * FALSE on failure + * + * Side effects: + * None + * + * Credit: VMware + *----------------------------------------------------------------------------- + */ + +static int +LoadFunctions(void) +{ + +#ifdef VMGUESTLIB + /* + * First, try to load the shared library. + */ + + /* Attempt to guess if we were statically linked to libc, if so bail */ + if ( _dl_non_dynamic_init != NULL ) { + strncpy(_vmware_vector.cmp_info.disabled_reason, "The VMware component does not support statically linking of libc.", PAPI_MAX_STR_LEN); + return PAPI_ENOSUPP; + } + + char const *dlErrStr; + char filename[BUFSIZ]; + + sprintf(filename,"%s","libvmGuestLib.so"); + dlHandle = dlopen(filename, RTLD_NOW); + if (!dlHandle) { + dlErrStr = dlerror(); + fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, + dlErrStr); + + sprintf(filename,"%s/lib/lib64/libvmGuestLib.so",VMWARE_INCDIR); + dlHandle = dlopen(filename, RTLD_NOW); + if (!dlHandle) { + dlErrStr = dlerror(); + fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, + dlErrStr); + + sprintf(filename,"%s/lib/lib32/libvmGuestLib.so",VMWARE_INCDIR); + dlHandle = dlopen(filename, RTLD_NOW); + if (!dlHandle) { + dlErrStr = dlerror(); + fprintf(stderr, "dlopen of %s failed: \'%s\'\n", filename, + dlErrStr); + return PAPI_ECMP; + } + } + } + + /* Load all the individual library functions. */ + LOAD_ONE_FUNC(GuestLib_GetErrorText); + LOAD_ONE_FUNC(GuestLib_OpenHandle); + LOAD_ONE_FUNC(GuestLib_CloseHandle); + LOAD_ONE_FUNC(GuestLib_UpdateInfo); + LOAD_ONE_FUNC(GuestLib_GetSessionId); + LOAD_ONE_FUNC(GuestLib_GetCpuReservationMHz); + LOAD_ONE_FUNC(GuestLib_GetCpuLimitMHz); + LOAD_ONE_FUNC(GuestLib_GetCpuShares); + LOAD_ONE_FUNC(GuestLib_GetCpuUsedMs); + LOAD_ONE_FUNC(GuestLib_GetHostProcessorSpeed); + LOAD_ONE_FUNC(GuestLib_GetMemReservationMB); + LOAD_ONE_FUNC(GuestLib_GetMemLimitMB); + LOAD_ONE_FUNC(GuestLib_GetMemShares); + LOAD_ONE_FUNC(GuestLib_GetMemMappedMB); + LOAD_ONE_FUNC(GuestLib_GetMemActiveMB); + LOAD_ONE_FUNC(GuestLib_GetMemOverheadMB); + LOAD_ONE_FUNC(GuestLib_GetMemBalloonedMB); + LOAD_ONE_FUNC(GuestLib_GetMemSwappedMB); + LOAD_ONE_FUNC(GuestLib_GetMemSharedMB); + LOAD_ONE_FUNC(GuestLib_GetMemSharedSavedMB); + LOAD_ONE_FUNC(GuestLib_GetMemUsedMB); + LOAD_ONE_FUNC(GuestLib_GetElapsedMs); + LOAD_ONE_FUNC(GuestLib_GetResourcePoolPath); + LOAD_ONE_FUNC(GuestLib_GetCpuStolenMs); + LOAD_ONE_FUNC(GuestLib_GetMemTargetSizeMB); + LOAD_ONE_FUNC(GuestLib_GetHostNumCpuCores); + LOAD_ONE_FUNC(GuestLib_GetHostCpuUsedMs); + LOAD_ONE_FUNC(GuestLib_GetHostMemSwappedMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemSharedMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemUsedMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemPhysMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemPhysFreeMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemKernOvhdMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemMappedMB); + LOAD_ONE_FUNC(GuestLib_GetHostMemUnmappedMB); +#endif + return PAPI_OK; +} + + + +/** This table contains the native events */ +static struct _vmware_native_event_entry *_vmware_native_table; +/** number of events in the table*/ +static int num_events = 0; +static int use_pseudo=0; +static int use_guestlib=0; + +/************************************************************************/ +/* Below is the actual "hardware implementation" of our VMWARE counters */ +/************************************************************************/ + +/** Code that reads event values. + You might replace this with code that accesses + hardware or reads values from the operatings system. */ +static long long +_vmware_hardware_read( struct _vmware_context *context, int starting) +{ + + int i; + + if (use_pseudo) { + context->values[VMWARE_HOST_TSC]=rdpmc(0x10000); + context->values[VMWARE_ELAPSED_TIME]=rdpmc(0x10001); + context->values[VMWARE_ELAPSED_APPARENT]=rdpmc(0x10002); + } + + +#ifdef VMGUESTLIB + static VMSessionId sessionId = 0; + VMSessionId tmpSession; + uint32_t temp32; + uint64_t temp64; + VMGuestLibError glError; + + if (use_guestlib) { + + glError = GuestLib_UpdateInfo(context->glHandle); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"UpdateInfo failed: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + /* Retrieve and check the session ID */ + glError = GuestLib_GetSessionId(context->glHandle, &tmpSession); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get session ID: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + if (tmpSession == 0) { + fprintf(stderr, "Error: Got zero sessionId from GuestLib\n"); + return PAPI_ECMP; + } + + if (sessionId == 0) { + sessionId = tmpSession; + } else if (tmpSession != sessionId) { + sessionId = tmpSession; + } + + glError = GuestLib_GetCpuLimitMHz(context->glHandle,&temp32); + context->values[VMWARE_CPU_LIMIT_MHZ]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"Failed to get CPU limit: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetCpuReservationMHz(context->glHandle,&temp32); + context->values[VMWARE_CPU_RESERVATION_MHZ]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"Failed to get CPU reservation: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetCpuShares(context->glHandle,&temp32); + context->values[VMWARE_CPU_SHARES]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"Failed to get cpu shares: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetCpuStolenMs(context->glHandle,&temp64); + context->values[VMWARE_CPU_STOLEN_MS]=temp64; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + if (glError == VMGUESTLIB_ERROR_UNSUPPORTED_VERSION) { + context->values[VMWARE_CPU_STOLEN_MS]=0; + fprintf(stderr, "Skipping CPU stolen, not supported...\n"); + } else { + fprintf(stderr, "Failed to get CPU stolen: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } + + glError = GuestLib_GetCpuUsedMs(context->glHandle,&temp64); + context->values[VMWARE_CPU_USED_MS]=temp64; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get used ms: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetElapsedMs(context->glHandle, &temp64); + context->values[VMWARE_ELAPSED_MS]=temp64; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get elapsed ms: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemActiveMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_ACTIVE_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get active mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemBalloonedMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_BALLOONED_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get ballooned mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemLimitMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_LIMIT_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"Failed to get mem limit: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemMappedMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_MAPPED_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get mapped mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemOverheadMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_OVERHEAD_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get overhead mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemReservationMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_RESERVATION_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get mem reservation: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemSharedMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_SHARED_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get swapped mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemShares(context->glHandle, &temp32); + context->values[VMWARE_MEM_SHARES]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + if (glError == VMGUESTLIB_ERROR_NOT_AVAILABLE) { + context->values[VMWARE_MEM_SHARES]=0; + fprintf(stderr, "Skipping mem shares, not supported...\n"); + } else { + fprintf(stderr, "Failed to get mem shares: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } + + glError = GuestLib_GetMemSwappedMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_SWAPPED_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get swapped mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetMemTargetSizeMB(context->glHandle, &temp64); + context->values[VMWARE_MEM_TARGET_SIZE_MB]=temp64; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + if (glError == VMGUESTLIB_ERROR_UNSUPPORTED_VERSION) { + context->values[VMWARE_MEM_TARGET_SIZE_MB]=0; + fprintf(stderr, "Skipping target mem size, not supported...\n"); + } else { + fprintf(stderr, "Failed to get target mem size: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } + + glError = GuestLib_GetMemUsedMB(context->glHandle, &temp32); + context->values[VMWARE_MEM_USED_MB]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get swapped mem: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + + glError = GuestLib_GetHostProcessorSpeed(context->glHandle, &temp32); + context->values[VMWARE_HOST_CPU_MHZ]=temp32; + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to get host proc speed: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } + +#endif + + if (starting) { + + for(i=0;istart_values[i]=context->values[i]; + } + + } + + return PAPI_OK; +} + +/********************************************************************/ +/* Below are the functions required by the PAPI component interface */ +/********************************************************************/ + +/** This is called whenever a thread is initialized */ +int +_vmware_init_thread( hwd_context_t *ctx ) +{ + (void) ctx; + + +#ifdef VMGUESTLIB + + struct _vmware_context *context; + VMGuestLibError glError; + + context=(struct _vmware_context *)ctx; + + if (use_guestlib) { + glError = GuestLib_OpenHandle(&(context->glHandle)); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"OpenHandle failed: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } + +#endif + + return PAPI_OK; +} + + +/** Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int +_vmware_init_component( int cidx ) +{ + + (void) cidx; + + int result; + + SUBDBG( "_vmware_init_component..." ); + + /* Initialize and try to load the VMware library */ + /* Try to load the library. */ + result=LoadFunctions(); + + if (result!=PAPI_OK) { + strncpy(_vmware_vector.cmp_info.disabled_reason, + "GuestLibTest: Failed to load shared library", + PAPI_MAX_STR_LEN); + return PAPI_ECMP; + } + + /* we know in advance how many events we want */ + /* for actual hardware this might have to be determined dynamically */ + + /* Allocate memory for the our event table */ + _vmware_native_table = ( struct _vmware_native_event_entry * ) + calloc( VMWARE_MAX_COUNTERS, sizeof ( struct _vmware_native_event_entry )); + if ( _vmware_native_table == NULL ) { + return PAPI_ENOMEM; + } + + +#ifdef VMGUESTLIB + + /* Detect if GuestLib works */ + { + + VMGuestLibError glError; + VMGuestLibHandle glHandle; + + use_guestlib=0; + + /* try to open */ + glError = GuestLib_OpenHandle(&glHandle); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"OpenHandle failed: %s\n", + GuestLib_GetErrorText(glError)); + } + else { + /* open worked, try to update */ + glError = GuestLib_UpdateInfo(glHandle); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr,"UpdateInfo failed: %s\n", + GuestLib_GetErrorText(glError)); + } + else { + /* update worked, things work! */ + use_guestlib=1; + } + /* shut things down */ + glError = GuestLib_CloseHandle(glHandle); + } + + } + + + + if (use_guestlib) { + + /* fill in the event table parameters */ + strcpy( _vmware_native_table[num_events].name, + "CPU_LIMIT" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the upper limit of processor use in MHz " + "available to the virtual machine.", + PAPI_HUGE_STR_LEN); + strcpy( _vmware_native_table[num_events].units,"MHz"); + _vmware_native_table[num_events].which_counter= + VMWARE_CPU_LIMIT_MHZ; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "CPU_RESERVATION" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the minimum processing power in MHz " + "reserved for the virtual machine.", + PAPI_HUGE_STR_LEN); + strcpy( _vmware_native_table[num_events].units,"MHz"); + _vmware_native_table[num_events].which_counter= + VMWARE_CPU_RESERVATION_MHZ; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "CPU_SHARES" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the number of CPU shares allocated " + "to the virtual machine.", + PAPI_HUGE_STR_LEN); + strcpy( _vmware_native_table[num_events].units,"shares"); + _vmware_native_table[num_events].which_counter= + VMWARE_CPU_SHARES; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "CPU_STOLEN" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the number of milliseconds that the " + "virtual machine was in a ready state (able to " + "transition to a run state), but was not scheduled to run.", + PAPI_HUGE_STR_LEN); + strcpy( _vmware_native_table[num_events].units,"ms"); + _vmware_native_table[num_events].which_counter= + VMWARE_CPU_STOLEN_MS; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "CPU_USED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the number of milliseconds during which " + "the virtual machine has used the CPU. This value " + "includes the time used by the guest operating system " + "and the time used by virtualization code for tasks for " + "this virtual machine. You can combine this value with " + "the elapsed time (VMWARE_ELAPSED) to estimate the " + "effective virtual machine CPU speed. This value is a " + "subset of elapsedMs.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"ms"); + _vmware_native_table[num_events].which_counter= + VMWARE_CPU_USED_MS; + _vmware_native_table[num_events].report_difference=1; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "ELAPSED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the number of milliseconds that have passed " + "in the virtual machine since it last started running on " + "the server. The count of elapsed time restarts each time " + "the virtual machine is powered on, resumed, or migrated " + "using VMotion. This value counts milliseconds, regardless " + "of whether the virtual machine is using processing power " + "during that time. You can combine this value with the CPU " + "time used by the virtual machine (VMWARE_CPU_USED) to " + "estimate the effective virtual machine xCPU speed. " + "cpuUsedMS is a subset of this value.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"ms"); + _vmware_native_table[num_events].which_counter= + VMWARE_ELAPSED_MS; + _vmware_native_table[num_events].report_difference=1; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_ACTIVE" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of memory the virtual machine is " + "actively using in MB - Its estimated working set size.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_ACTIVE_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_BALLOONED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of memory that has been reclaimed " + "from this virtual machine by the vSphere memory balloon " + "driver (also referred to as the 'vmemctl' driver) in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_BALLOONED_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_LIMIT" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the upper limit of memory that is available " + "to the virtual machine in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_LIMIT_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_MAPPED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of memory that is allocated to " + "the virtual machine in MB. Memory that is ballooned, " + "swapped, or has never been accessed is excluded.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_MAPPED_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_OVERHEAD" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of 'overhead' memory associated " + "with this virtual machine that is currently consumed " + "on the host system in MB. Overhead memory is additional " + "memory that is reserved for data structures required by " + "the virtualization layer.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_OVERHEAD_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_RESERVATION" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the minimum amount of memory that is " + "reserved for the virtual machine in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_RESERVATION_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_SHARED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of physical memory associated " + "with this virtual machine that is copy-on-write (COW) " + "shared on the host in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_SHARED_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_SHARES" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the number of memory shares allocated to " + "the virtual machine.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"shares"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_SHARES; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_SWAPPED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the amount of memory that has been reclaimed " + "from this virtual machine by transparently swapping " + "guest memory to disk in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_SWAPPED_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_TARGET_SIZE" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the size of the target memory allocation " + "for this virtual machine in MB.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_TARGET_SIZE_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "MEM_USED" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the estimated amount of physical host memory " + "currently consumed for this virtual machine's " + "physical memory.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MB"); + _vmware_native_table[num_events].which_counter= + VMWARE_MEM_USED_MB; + _vmware_native_table[num_events].report_difference=0; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "HOST_CPU" ); + strncpy( _vmware_native_table[num_events].description, + "Retrieves the speed of the ESX system's physical " + "CPU in MHz.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"MHz"); + _vmware_native_table[num_events].which_counter= + VMWARE_HOST_CPU_MHZ; + _vmware_native_table[num_events].report_difference=0; + num_events++; + } + +#endif + + /* For VMWare Pseudo Performance Counters */ + if ( getenv( "PAPI_VMWARE_PSEUDOPERFORMANCE" ) ) { + + use_pseudo=1; + + strcpy( _vmware_native_table[num_events].name, + "HOST_TSC" ); + strncpy( _vmware_native_table[num_events].description, + "Physical host TSC", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"cycles"); + _vmware_native_table[num_events].which_counter= + VMWARE_HOST_TSC; + _vmware_native_table[num_events].report_difference=1; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "ELAPSED_TIME" ); + strncpy( _vmware_native_table[num_events].description, + "Elapsed real time in ns.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"ns"); + _vmware_native_table[num_events].which_counter= + VMWARE_ELAPSED_TIME; + _vmware_native_table[num_events].report_difference=1; + num_events++; + + strcpy( _vmware_native_table[num_events].name, + "ELAPSED_APPARENT" ); + strncpy( _vmware_native_table[num_events].description, + "Elapsed apparent time in ns.", + PAPI_HUGE_STR_LEN ); + strcpy( _vmware_native_table[num_events].units,"ns"); + _vmware_native_table[num_events].which_counter= + VMWARE_ELAPSED_APPARENT; + _vmware_native_table[num_events].report_difference=1; + num_events++; + } + + if (num_events==0) { + strncpy(_vmware_vector.cmp_info.disabled_reason, + "VMware SDK not installed, and PAPI_VMWARE_PSEUDOPERFORMANCE not set", + PAPI_MAX_STR_LEN); + return PAPI_ECMP; + } + + _vmware_vector.cmp_info.num_native_events = num_events; + + return PAPI_OK; +} + +/** Setup the counter control structure */ +int +_vmware_init_control_state( hwd_control_state_t *ctl ) +{ + (void) ctl; + + return PAPI_OK; +} + +/** Enumerate Native Events + @param EventCode is the event of interest + @param modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS + */ +int +_vmware_ntv_enum_events( unsigned int *EventCode, int modifier ) +{ + + switch ( modifier ) { + /* return EventCode of first event */ + case PAPI_ENUM_FIRST: + if (num_events==0) return PAPI_ENOEVNT; + *EventCode = 0; + return PAPI_OK; + break; + /* return EventCode of passed-in Event */ + case PAPI_ENUM_EVENTS:{ + int index = *EventCode; + + if ( index < num_events - 1 ) { + *EventCode = *EventCode + 1; + return PAPI_OK; + } else { + return PAPI_ENOEVNT; + } + break; + } + default: + return PAPI_EINVAL; + } + return PAPI_EINVAL; +} + +int +_vmware_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info) +{ + + int index = EventCode; + + if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT; + + strncpy( info->symbol, _vmware_native_table[index].name, + sizeof(info->symbol)); + + strncpy( info->long_descr, _vmware_native_table[index].description, + sizeof(info->symbol)); + + strncpy( info->units, _vmware_native_table[index].units, + sizeof(info->units)); + + return PAPI_OK; +} + + +/** Takes a native event code and passes back the name + @param EventCode is the native event code + @param name is a pointer for the name to be copied to + @param len is the size of the string + */ +int +_vmware_ntv_code_to_name( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _vmware_native_table[index].name, len ); + } + return PAPI_OK; +} + +/** Takes a native event code and passes back the event description + @param EventCode is the native event code + @param name is a pointer for the description to be copied to + @param len is the size of the string + */ +int +_vmware_ntv_code_to_descr( unsigned int EventCode, char *name, int len ) +{ + int index = EventCode; + + if ( index >= 0 && index < num_events ) { + strncpy( name, _vmware_native_table[index].description, len ); + } + return PAPI_OK; +} + +/** Triggered by eventset operations like add or remove */ +int +_vmware_update_control_state( hwd_control_state_t *ctl, + NativeInfo_t *native, + int count, + hwd_context_t *ctx ) +{ + (void) ctx; + + struct _vmware_control_state *control; + + int i, index; + + control=(struct _vmware_control_state *)ctl; + + for ( i = 0; i < count; i++ ) { + index = native[i].ni_event; + control->which_counter[i]=_vmware_native_table[index].which_counter; + native[i].ni_position = i; + } + control->num_events=count; + + return PAPI_OK; +} + +/** Triggered by PAPI_start() */ +int +_vmware_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + struct _vmware_context *context; + (void) ctl; + + context=(struct _vmware_context *)ctx; + + _vmware_hardware_read( context, 1 ); + + return PAPI_OK; +} + +/** Triggered by PAPI_stop() */ +int +_vmware_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + + struct _vmware_context *context; + (void) ctl; + + context=(struct _vmware_context *)ctx; + + _vmware_hardware_read( context, 0 ); + + return PAPI_OK; +} + +/** Triggered by PAPI_read() */ +int +_vmware_read( hwd_context_t *ctx, + hwd_control_state_t *ctl, + long_long **events, int flags ) +{ + + struct _vmware_context *context; + struct _vmware_control_state *control; + + (void) flags; + int i; + + context=(struct _vmware_context *)ctx; + control=(struct _vmware_control_state *)ctl; + + _vmware_hardware_read( context, 0 ); + + for (i=0; inum_events; i++) { + + if (_vmware_native_table[ + _vmware_native_table[control->which_counter[i]].which_counter]. + report_difference) { + control->value[i]=context->values[control->which_counter[i]]- + context->start_values[control->which_counter[i]]; + } else { + control->value[i]=context->values[control->which_counter[i]]; + } + // printf("%d %d %lld-%lld=%lld\n",i,control->which_counter[i], + // context->values[control->which_counter[i]], + // context->start_values[control->which_counter[i]], + // control->value[i]); + + } + + *events = control->value; + + return PAPI_OK; +} + +/** Triggered by PAPI_write(), but only if the counters are running */ +/* otherwise, the updated state is written to ESI->hw_start */ +int +_vmware_write( hwd_context_t * ctx, hwd_control_state_t * ctrl, long_long events[] ) +{ + (void) ctx; + (void) ctrl; + (void) events; + SUBDBG( "_vmware_write... %p %p", ctx, ctrl ); + /* FIXME... this should actually carry out the write, though */ + /* this is non-trivial as which counter being written has to be */ + /* determined somehow. */ + return PAPI_OK; +} + +/** Triggered by PAPI_reset */ +int +_vmware_reset( hwd_context_t *ctx, hwd_control_state_t *ctl ) +{ + (void) ctx; + (void) ctl; + + return PAPI_OK; +} + +/** Shutting down a context */ +int +_vmware_shutdown_thread( hwd_context_t *ctx ) +{ + (void) ctx; + +#ifdef VMGUESTLIB + VMGuestLibError glError; + struct _vmware_context *context; + + context=(struct _vmware_context *)ctx; + + if (use_guestlib) { + glError = GuestLib_CloseHandle(context->glHandle); + if (glError != VMGUESTLIB_ERROR_SUCCESS) { + fprintf(stderr, "Failed to CloseHandle: %s\n", + GuestLib_GetErrorText(glError)); + return PAPI_ECMP; + } + } +#endif + + return PAPI_OK; +} + +/** Triggered by PAPI_shutdown() */ +int +_vmware_shutdown_component( void ) +{ + +#ifdef VMGUESTLIB + if (dlclose(dlHandle)) { + fprintf(stderr, "dlclose failed\n"); + return EXIT_FAILURE; + } +#endif + + return PAPI_OK; +} + + +/** This function sets various options in the component + @param ctx + @param code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT + @param option + */ +int +_vmware_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option ) +{ + + (void) ctx; + (void) code; + (void) option; + + SUBDBG( "_vmware_ctl..." ); + + return PAPI_OK; +} + +/** This function has to set the bits needed to count different domains + In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + By default return PAPI_EINVAL if none of those are specified + and PAPI_OK with success + PAPI_DOM_USER is only user context is counted + PAPI_DOM_KERNEL is only the Kernel/OS context is counted + PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + PAPI_DOM_ALL is all of the domains + */ +int +_vmware_set_domain( hwd_control_state_t *ctl, int domain ) +{ + (void) ctl; + + int found = 0; + SUBDBG( "_vmware_set_domain..." ); + if ( PAPI_DOM_USER & domain ) { + SUBDBG( " PAPI_DOM_USER " ); + found = 1; + } + if ( PAPI_DOM_KERNEL & domain ) { + SUBDBG( " PAPI_DOM_KERNEL " ); + found = 1; + } + if ( PAPI_DOM_OTHER & domain ) { + SUBDBG( " PAPI_DOM_OTHER " ); + found = 1; + } + if ( PAPI_DOM_ALL & domain ) { + SUBDBG( " PAPI_DOM_ALL " ); + found = 1; + } + if ( !found ) { + return ( PAPI_EINVAL ); + } + return PAPI_OK; +} + +/** Vector that points to entry points for our component */ +papi_vector_t _vmware_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "vmware", + .short_name = "vmware", + .description = "Provide support for VMware vmguest and pseudo counters", + .version = "5.0", + .num_mpx_cntrs = VMWARE_MAX_COUNTERS, + .num_cntrs = VMWARE_MAX_COUNTERS, + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 0, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + }, + /* sizes of framework-opaque component-private structures */ + .size = { + .context = sizeof ( struct _vmware_context ), + .control_state = sizeof ( struct _vmware_control_state ), + .reg_value = sizeof ( struct _vmware_register ), + .reg_alloc = sizeof ( struct _vmware_reg_alloc ), + } + , + /* function pointers in this component */ + .init_thread = _vmware_init_thread, + .init_component = _vmware_init_component, + .init_control_state = _vmware_init_control_state, + .start = _vmware_start, + .stop = _vmware_stop, + .read = _vmware_read, + .write = _vmware_write, + .shutdown_thread = _vmware_shutdown_thread, + .shutdown_component = _vmware_shutdown_component, + .ctl = _vmware_ctl, + + .update_control_state = _vmware_update_control_state, + .set_domain = _vmware_set_domain, + .reset = _vmware_reset, + + .ntv_enum_events = _vmware_ntv_enum_events, + .ntv_code_to_name = _vmware_ntv_code_to_name, + .ntv_code_to_descr = _vmware_ntv_code_to_descr, + .ntv_code_to_info = _vmware_ntv_code_to_info, + +}; + diff --git a/src/config.h.in b/src/config.h.in new file mode 100644 index 0000000..b538e9f --- /dev/null +++ b/src/config.h.in @@ -0,0 +1,201 @@ +/* config.h.in. Generated from configure.in by autoheader. */ + +/* cpu type */ +#undef CPU + +/* POSIX 1b clock */ +#undef HAVE_CLOCK_GETTIME + +/* POSIX 1b realtime clock */ +#undef HAVE_CLOCK_GETTIME_REALTIME + +/* POSIX 1b realtime HR clock */ +#undef HAVE_CLOCK_GETTIME_REALTIME_HR + +/* POSIX 1b per-thread clock */ +#undef HAVE_CLOCK_GETTIME_THREAD + +/* Native access to a hardware cycle counter */ +#undef HAVE_CYCLE + +/* Define to 1 if you have the header file. */ +#undef HAVE_C_ASM_H + +/* This platform has the ffsll() function */ +#undef HAVE_FFSLL + +/* Define to 1 if you have the `gethrtime' function. */ +#undef HAVE_GETHRTIME + +/* Full gettid function */ +#undef HAVE_GETTID + +/* Normal gettimeofday timer */ +#undef HAVE_GETTIMEOFDAY + +/* Define if hrtime_t is defined in */ +#undef HAVE_HRTIME_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTRINSICS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `cpc' library (-lcpc). */ +#undef HAVE_LIBCPC + +/* perfctr header file */ +#undef HAVE_LIBPERFCTR_H + +/* Define to 1 if you have the `mach_absolute_time' function. */ +#undef HAVE_MACH_ABSOLUTE_TIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_MACH_MACH_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Altix memory mapped global cycle counter */ +#undef HAVE_MMTIMER + +/* Define to 1 if you have the header file. */ +#undef HAVE_PERFMON_PFMLIB_H + +/* Montecito headers */ +#undef HAVE_PERFMON_PFMLIB_MONTECITO_H + +/* Working per thread getrusage */ +#undef HAVE_PER_THREAD_GETRUSAGE + +/* Working per thread timer */ +#undef HAVE_PER_THREAD_TIMES + +/* new pfmlib_output_param_t */ +#undef HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT + +/* event description function */ +#undef HAVE_PFM_GET_EVENT_DESCRIPTION + +/* new pfm_msg_t */ +#undef HAVE_PFM_MSG_TYPE + +/* old reg_evt_idx */ +#undef HAVE_PFM_REG_EVT_IDX + +/* Define to 1 if you have the `read_real_time' function. */ +#undef HAVE_READ_REAL_TIME + +/* Define to 1 if you have the `sched_getcpu' function. */ +#undef HAVE_SCHED_GETCPU + +/* Define to 1 if you have the header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* gettid syscall function */ +#undef HAVE_SYSCALL_GETTID + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Keyword for per-thread variables */ +#undef HAVE_THREAD_LOCAL_STORAGE + +/* Define to 1 if you have the `time_base_to_time' function. */ +#undef HAVE_TIME_BASE_TO_TIME + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define for _rtc() intrinsic. */ +#undef HAVE__RTC + +/* Define if _rtc() is not found. */ +#undef NO_RTC_INTRINSIC + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Use the perfctr virtual TSC for per-thread times */ +#undef USE_PERFCTR_PTTIMER + +/* Use /proc for per-thread times */ +#undef USE_PROC_PTTIMER + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif diff --git a/src/configure b/src/configure new file mode 100755 index 0000000..3d05182 --- /dev/null +++ b/src/configure @@ -0,0 +1,8096 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for PAPI 5.6.0.0. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and +$0: ptools-perfapi@icl.utk.edu about your system, including +$0: any error possibly output before this message. Then +$0: install a modern shell, or manually run the script +$0: under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='PAPI' +PACKAGE_TARNAME='papi' +PACKAGE_VERSION='5.6.0.0' +PACKAGE_STRING='PAPI 5.6.0.0' +PACKAGE_BUGREPORT='ptools-perfapi@icl.utk.edu' +PACKAGE_URL='' + +ac_unique_file="papi.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +LIBOBJS +CC_COMMON_NAME +BGPM_INSTALL_DIR +HAVE_NO_OVERRIDE_INIT +FTEST_TARGETS +COMPONENTS +COMPONENT_RULES +BITFLAGS +BGP_SYSDIR +SHOW_CONF +tests +TESTS +TOPTFLAGS +SHLIBDEPS +MISCHDRS +FLAGS +ARG64 +cpu_option +CPU_MODEL +ARCH_EVENTS +POST_BUILD +MISCOBJS +MISCSRCS +NOOPT +OMPCFLGS +SMPCFLGS +CC_SHR +CC_R +CTEST_TARGETS +DESCR +OSCONTEXT +OSLOCK +OSFILESHDR +OSFILESOBJ +OSFILESSRC +CPUCOMPONENT_OBJ +CPUCOMPONENT_C +CPUCOMPONENT_NAME +OPTFLAGS +PAPICFLAGS +VLIB +SHLIB +LIBRARY +FILENAME +CPU +VERSION +LINKLIB +SETPATH +PAPI_EVENTS_CSV +PAPI_EVENTS +OS +pfm_libdir +pfm_incdir +pfm_prefix +old_pfmv2 +pfm_root +perfctr_libdir +perfctr_incdir +perfctr_prefix +perfctr_root +altix +STATIC +papiLIBS +AR +PMINIT +PMAPI +MAKEVER +arch +LDL +EGREP +GREP +RANLIB +SET_MAKE +LN_S +CPP +AWK +MPICC +ac_ct_F77 +FFLAGS +F77 +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +MIC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +with_arch +with_bitmode +with_OS +with_OSVER +with_assumed_kernel +with_mic +with_bgpm_installdir +with_nativecc +with_tests +with_debug +with_CPU +with_pthread_mutexes +with_ffsll +with_walltimer +with_tls +with_virtualtimer +with_pmapi +with_static_user_events +with_static_papi_events +with_static_lib +with_shared_lib +with_static_tools +with_shlib_tools +with_perfctr +with_perfctr_root +with_perfctr_prefix +with_perfctr_incdir +with_perfctr_libdir +with_perfmon +with_pfm_root +with_pfm_prefix +with_pfm_incdir +with_pfm_libdir +with_perf_events +enable_perfevent_rdpmc +with_pe_incdir +enable_perf_event_uncore +with_components +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +F77 +FFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures PAPI 5.6.0.0 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/papi] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of PAPI 5.6.0.0:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-perfevent-rdpmc + Enable userspace rdpmc instruction on perf_event, + default: yes + --disable-perf-event-uncore Disable perf_event uncore component + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-arch= Specify architecture (uname -m) + --with-bitmode=<32,64> Specify bit mode of library + --with-OS= Specify operating system + --with-OSVER= Specify operating system version + --with-assumed-kernel= Assume kernel version is for purposes of workarounds + --with-mic To compile for Intel MIC + --with-bgpm_installdir= Specify the installation path of BGPM + --with-nativecc= Specify native C compiler for header generation + --with-tests= Specify which tests to run on install + --with-debug= Build a debug version, debug version plus memory tracker or none + --with-CPU= Specify CPU type + --with-pthread-mutexes Specify use of pthread mutexes rather than custom PAPI locks + --with-ffsll Specify use of the ffsll() function + --with-walltimer= Specify realtime timer + --with-tls= This platform supports thread local storage with a keyword + --with-virtualtimer= Specify per-thread virtual timer + --with-pmapi= Specify path of pmapi on aix system + --with-static-user-events Build with a static user events file. + --with-static-papi-events Build with a static papi events file. + --with-static-lib= Build a static library + --with-shared-lib= Build a shared library + --with-static-tools Specify static compile of tests and utilities + --with-shlib-tools Specify linking with papi library of tests and utilities + --with-perfctr Specify perfctr as the performance interface + --with-perfctr-root= Specify path to source tree (for use by developers only) + --with-perfctr-prefix= Specify prefix to installed perfctr distribution + --with-perfctr-incdir= Specify directory of perfctr header files in non-standard location + --with-perfctr-libdir= Specify directory of perfctr library in non-standard location + --with-perfmon= Specify perfmon as the performance interface and specify version + --with-pfm-root= Specify path to source tree (for use by developers only) + --with-pfm-prefix= Specify prefix to installed pfm distribution + --with-pfm-incdir= Specify directory of pfm header files in non-standard location + --with-pfm-libdir= Specify directory of pfm library in non-standard location + --with-perf-events Specify use of Linux Performance Event (requires kernel 2.6.32 or greater) + --with-pe-incdir= Specify path to the correct perf header file + --with-components=<"component1 component2"> Specify which components to build + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + F77 Fortran 77 compiler command + FFLAGS Fortran 77 compiler flags + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +PAPI configure 5.6.0.0 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_f77_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_f77_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_f77_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_f77_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## ----------------------------------------- ## +## Report this to ptools-perfapi@icl.utk.edu ## +## ----------------------------------------- ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES +# ---------------------------------------------------- +# Tries to find if the field MEMBER exists in type AGGR, after including +# INCLUDES, setting cache variable VAR accordingly. +ac_fn_c_check_member () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 +$as_echo_n "checking for $2.$3... " >&6; } +if eval \${$4+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + eval "$4=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$4 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_member +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by PAPI $as_me 5.6.0.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers config.h" + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for architecture" >&5 +$as_echo_n "checking for architecture... " >&6; } + +# Check whether --with-arch was given. +if test "${with_arch+set}" = set; then : + withval=$with_arch; arch=$withval +else + arch=`uname -m` +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $arch" >&5 +$as_echo "$arch" >&6; } + + +# Check whether --with-bitmode was given. +if test "${with_bitmode+set}" = set; then : + withval=$with_bitmode; bitmode=$withval +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for OS" >&5 +$as_echo_n "checking for OS... " >&6; } + +# Check whether --with-OS was given. +if test "${with_OS+set}" = set; then : + withval=$with_OS; OS=$withval +else + OS="`uname | tr 'A-Z' 'a-z'`" + if (test "$OS" = "SunOS" || test "$OS" = "sunos"); then + OS=solaris + fi +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $OS" >&5 +$as_echo "$OS" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for OS version" >&5 +$as_echo_n "checking for OS version... " >&6; } + +# Check whether --with-OSVER was given. +if test "${with_OSVER+set}" = set; then : + withval=$with_OSVER; OSVER=$withval +else + if test "$OS" != "bgp" -o "$OS" != "bgq"; then + OSVER="`uname -r`" + fi +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $OSVER" >&5 +$as_echo "$OSVER" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for perf_event workaround level" >&5 +$as_echo_n "checking for perf_event workaround level... " >&6; } + +# Check whether --with-assumed_kernel was given. +if test "${with_assumed_kernel+set}" = set; then : + withval=$with_assumed_kernel; assumed_kernel=$withval; CFLAGS="$CFLAGS -DASSUME_KERNEL=\\\"$with_assumed_kernel\\\"" +else + assumed_kernel="autodetect" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $assumed_kernel" >&5 +$as_echo "$assumed_kernel" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for if MIC should be used" >&5 +$as_echo_n "checking for if MIC should be used... " >&6; } + +# Check whether --with-mic was given. +if test "${with_mic+set}" = set; then : + withval=$with_mic; MIC=yes + tls=__thread + virtualtimer=cputime_id + perf_events=yes + walltimer=clock_realtime_hr + ffsll=no + cross_compiling=yes + arch=k1om +else + MIC=no +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MIC" >&5 +$as_echo "$MIC" >&6; } + + +CFLAGS="$CFLAGS -g" +#If not set, set FFLAGS to null to prevent AC_PROG_F77 from defaulting it to -g -O2 +if test "x$FFLAGS" = "x"; then + FFLAGS="" +fi +OPTFLAGS="-O2" +TOPTFLAGS="-O1" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in xlc icc gcc cc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in xlc icc gcc cc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=f +ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' +ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_f77_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in xlf ifort gfortran f95 f90 f77 + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$F77"; then + ac_cv_prog_F77="$F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_F77="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +F77=$ac_cv_prog_F77 +if test -n "$F77"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $F77" >&5 +$as_echo "$F77" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$F77" && break + done +fi +if test -z "$F77"; then + ac_ct_F77=$F77 + for ac_prog in xlf ifort gfortran f95 f90 f77 +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_F77+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_F77"; then + ac_cv_prog_ac_ct_F77="$ac_ct_F77" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_F77="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_F77=$ac_cv_prog_ac_ct_F77 +if test -n "$ac_ct_F77"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_F77" >&5 +$as_echo "$ac_ct_F77" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_F77" && break +done + + if test "x$ac_ct_F77" = x; then + F77="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + F77=$ac_ct_F77 + fi +fi + + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for Fortran 77 compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done +rm -f a.out + +# If we don't use `.F' as extension, the preprocessor is not run on the +# input file. (Note that this only needs to work for GNU compilers.) +ac_save_ext=$ac_ext +ac_ext=F +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU Fortran 77 compiler" >&5 +$as_echo_n "checking whether we are using the GNU Fortran 77 compiler... " >&6; } +if ${ac_cv_f77_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.$ac_ext <<_ACEOF + program main +#ifndef __GNUC__ + choke me +#endif + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_f77_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f77_compiler_gnu" >&5 +$as_echo "$ac_cv_f77_compiler_gnu" >&6; } +ac_ext=$ac_save_ext +ac_test_FFLAGS=${FFLAGS+set} +ac_save_FFLAGS=$FFLAGS +FFLAGS= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $F77 accepts -g" >&5 +$as_echo_n "checking whether $F77 accepts -g... " >&6; } +if ${ac_cv_prog_f77_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + FFLAGS=-g +cat > conftest.$ac_ext <<_ACEOF + program main + + end +_ACEOF +if ac_fn_f77_try_compile "$LINENO"; then : + ac_cv_prog_f77_g=yes +else + ac_cv_prog_f77_g=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_f77_g" >&5 +$as_echo "$ac_cv_prog_f77_g" >&6; } +if test "$ac_test_FFLAGS" = set; then + FFLAGS=$ac_save_FFLAGS +elif test $ac_cv_prog_f77_g = yes; then + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-g -O2" + else + FFLAGS="-g" + fi +else + if test "x$ac_cv_f77_compiler_gnu" = xyes; then + FFLAGS="-O2" + else + FFLAGS= + fi +fi + +if test $ac_compiler_gnu = yes; then + G77=yes +else + G77= +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test "x$F77" = "x"; then + F77= +fi +# Extract the first word of "mpicc", so it can be a program name with args. +set dummy mpicc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MPICC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MPICC"; then + ac_cv_prog_MPICC="$MPICC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MPICC="mpicc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MPICC=$ac_cv_prog_MPICC +if test -n "$MPICC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPICC" >&5 +$as_echo "$MPICC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + +# Lets figure out what CC actually is... +# Used in later checks to set compiler specific options +if `$CC -V 2>&1 | grep '^Intel(R) C' >/dev/null 2>&1` ; then + CC_COMMON_NAME="icc" +elif `$CC -v 2>&1 | grep 'gcc version' >/dev/null 2>&1` ; then + CC_COMMON_NAME="gcc" +elif `$CC -qversion 2>&1 | grep 'IBM XL C' >/dev/null 2>&1`; then + CC_COMMON_NAME="xlc" +else + CC_COMMON_NAME="unknown" +fi + +#prevent icc warnings about overriding optimization settings set by AC_PROG_CC +# remark #869: parameter was never referenced +# remark #271: trailing comma is nonstandard +if test "$CC_COMMON_NAME" = "icc"; then + CFLAGS="$CFLAGS -diag-disable 188,869,271" + if test "$MIC" = "yes"; then + CC="$CC -mmic -fPIC" + fi +fi + +if test "$F77" = "ifort" -a "$MIC" = "yes"; then + F77="$F77 -mmic -fPIC" +fi + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +$as_echo_n "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +$as_echo "no, using $LN_S" >&6; } +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + ac_fn_c_check_header_mongrel "$LINENO" "minix/config.h" "ac_cv_header_minix_config_h" "$ac_includes_default" +if test "x$ac_cv_header_minix_config_h" = xyes; then : + MINIX=yes +else + MINIX= +fi + + + if test "$MINIX" = yes; then + +$as_echo "#define _POSIX_SOURCE 1" >>confdefs.h + + +$as_echo "#define _POSIX_1_SOURCE 2" >>confdefs.h + + +$as_echo "#define _MINIX 1" >>confdefs.h + + fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5 +$as_echo_n "checking whether it is safe to define __EXTENSIONS__... " >&6; } +if ${ac_cv_safe_to_define___extensions__+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +# define __EXTENSIONS__ 1 + $ac_includes_default +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_safe_to_define___extensions__=yes +else + ac_cv_safe_to_define___extensions__=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 +$as_echo "$ac_cv_safe_to_define___extensions__" >&6; } + test $ac_cv_safe_to_define___extensions__ = yes && + $as_echo "#define __EXTENSIONS__ 1" >>confdefs.h + + $as_echo "#define _ALL_SOURCE 1" >>confdefs.h + + $as_echo "#define _GNU_SOURCE 1" >>confdefs.h + + $as_echo "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h + + $as_echo "#define _TANDEM_SOURCE 1" >>confdefs.h + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 +$as_echo_n "checking for inline... " >&6; } +if ${ac_cv_c_inline+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_inline=no +for ac_kw in inline __inline__ __inline; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifndef __cplusplus +typedef int foo_t; +static $ac_kw foo_t static_foo () {return 0; } +$ac_kw foo_t foo () {return 0; } +#endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_inline=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_inline" != no && break +done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 +$as_echo "$ac_cv_c_inline" >&6; } + +case $ac_cv_c_inline in + inline | yes) ;; + *) + case $ac_cv_c_inline in + no) ac_val=;; + *) ac_val=$ac_cv_c_inline;; + esac + cat >>confdefs.h <<_ACEOF +#ifndef __cplusplus +#define inline $ac_val +#endif +_ACEOF + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5 +$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; } +if ${ac_cv_header_time+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include + +int +main () +{ +if ((struct tm *) 0) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_time=yes +else + ac_cv_header_time=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5 +$as_echo "$ac_cv_header_time" >&6; } +if test $ac_cv_header_time = yes; then + +$as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h + +fi + +for ac_header in sys/time.h c_asm.h intrinsics.h mach/mach_time.h sched.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +for ac_func in gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time sched_getcpu +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + +# +# Check if the system provides dl* symbols without -ldl, and if not, +# check for -ldl existance. +# + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen and dlerror symbols in base system" >&5 +$as_echo_n "checking for dlopen and dlerror symbols in base system... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +void *p = dlopen ("", 0); char *c = dlerror(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + dlsymbols_in_base="yes" +else + dlsymbols_in_base="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +if test "${dlsymbols_in_base}" = "yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5 +$as_echo "found" >&6; } + LDL="" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +$as_echo "not found" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen and dlerror symbols in -ldl" >&5 +$as_echo_n "checking for dlopen and dlerror symbols in -ldl... " >&6; } + SAVED_LIBS=${LIBS} + LIBS="${LIBS} -ldl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +void *p = dlopen ("", 0); char *c = dlerror(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + has_ldl="yes" +else + has_ldl="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LIBS=${SAVED_LIBS} + if test "${has_ldl}" = "yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: found" >&5 +$as_echo "found" >&6; } + LDL="-ldl" + else + as_fn_error $? "cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl" "$LINENO" 5 + fi +fi + + + +if test "$OS" = "CLE"; then + virtualtimer=times + tls=__thread + walltimer=cycle + ffsll=yes + cross_compiling=yes + STATIC="-static" + # _rtc is only defined when using the Cray compiler + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _rtc intrinsic" >&5 +$as_echo_n "checking for _rtc intrinsic... " >&6; } + rtc_ok=yes + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef HAVE_INTRINSICS_H + #include + #endif +int +main () +{ +_rtc() + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + +$as_echo "#define HAVE__RTC 1" >>confdefs.h + +else + rtc_ok=no + +$as_echo "#define NO_RTC_INTRINSIC 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $rtc_ok" >&5 +$as_echo "$rtc_ok" >&6; } +elif test "$OS" = "bgp"; then + CC=powerpc-bgp-linux-gcc + F77=powerpc-bgp-linux-gfortran + walltimer=cycle + virtualtimer=perfctr + tls=no + ffsll=yes + cross_compiling=yes +elif test "$OS" = "bgq"; then + +# Check whether --with-bgpm_installdir was given. +if test "${with_bgpm_installdir+set}" = set; then : + withval=$with_bgpm_installdir; BGPM_INSTALL_DIR=$withval + CFLAGS="$CFLAGS -I$withval" +else + as_fn_error $? "BGQ CPU component requires installation path of BGPM (see --with-bgpm_installdir)" "$LINENO" 5 +fi + + bitmode=64 + tls=no +elif test "$OS" = "linux"; then + if test "$arch" = "ppc64" -o "$arch" = "x86_64"; then + if test "$bitmode" = "64" -a "$libdir" = '${exec_prefix}/lib'; then + libdir='${exec_prefix}/lib64' + fi + fi +elif test "$OS" = "solaris"; then + ac_fn_c_check_type "$LINENO" "hrtime_t" "ac_cv_type_hrtime_t" "#if HAVE_SYS_TIME_H + #include + #endif +" +if test "x$ac_cv_type_hrtime_t" = xyes; then : + +$as_echo "#define HAVE_HRTIME_T 1" >>confdefs.h + +fi + + if test "x$AR" = "x"; then + AR=/usr/ccs/bin/ar + fi + +fi + +if test "x$AR" = "x"; then + AR=ar +fi + +if test "$cross_compiling" = "yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for native compiler for header generation" >&5 +$as_echo_n "checking for native compiler for header generation... " >&6; } + +# Check whether --with-nativecc was given. +if test "${with_nativecc+set}" = set; then : + withval=$with_nativecc; nativecc=$withval +else + nativecc=gcc +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $nativecc" >&5 +$as_echo "$nativecc" >&6; } +fi + + +# Check whether --with-tests was given. +if test "${with_tests+set}" = set; then : + withval=$with_tests; tests=$withval +else + tests="ctests ftests" +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for debug build" >&5 +$as_echo_n "checking for debug build... " >&6; } + +# Check whether --with-debug was given. +if test "${with_debug+set}" = set; then : + withval=$with_debug; debug=$withval +fi + +if test "$debug" = "yes"; then + if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -g3" + fi + OPTFLAGS="-O0" + PAPICFLAGS+=" -DDEBUG -DPAPI_NO_MEMORY_MANAGEMENT" +elif test "$debug" = "memory"; then + if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -g3" + fi + OPTFLAGS="-O0" + PAPICFLAGS+=" -DDEBUG" +else + PAPICFLAGS+="-DPAPI_NO_MEMORY_MANAGEMENT" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $debug" >&5 +$as_echo "$debug" >&6; } + +if test "$CC_COMMON_NAME" = "gcc"; then + gcc_version=`gcc -v 2>&1 | tail -n 1 | awk '{printf $3}'` + major=`echo $gcc_version | sed 's/\([^.][^.]*\).*/\1/'` + minor=`echo $gcc_version | sed 's/[^.][^.]*.\([^.][^.]*\).*/\1/'` + if (test "$major" -ge 4 || test "$major" = 3 -a "$minor" -ge 4); then + CFLAGS+=" -Wextra" + else + CFLAGS+=" -W" + fi +# -Wextra => -Woverride-init on gcc >= 4.2 +# This issues a warning (error under -Werror) for some libpfm4 code. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wno-override-init" >&5 +$as_echo_n "checking for -Wno-override-init... " >&6; } + oldcflags="$CFLAGS" + CFLAGS+=" -Wall -Wextra -Werror -Wno-override-init" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + struct A { int x; int y; }; + int main(void) { struct A a = {.x = 0, .y = 0, .y = 5 }; return a.x; } + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + HAVE_NO_OVERRIDE_INIT=1 +else + HAVE_NO_OVERRIDE_INIT=0 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$oldcflags" +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $HAVE_NO_OVERRIDE_INIT" >&5 +$as_echo "$HAVE_NO_OVERRIDE_INIT" >&6; } + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for CPU type" >&5 +$as_echo_n "checking for CPU type... " >&6; } + +# Check whether --with-CPU was given. +if test "${with_CPU+set}" = set; then : + withval=$with_CPU; CPU=$withval + case "$CPU" in + core|core2|i7|atom|p4|p3|opteron|athlon) + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + esac +else + case "$OS" in + aix) + CPU="`/usr/sbin/lsattr -E -l proc0 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr 'A-Z' 'a-z'`" + if test "$CPU" = ""; then + CPU="`/usr/sbin/lsattr -E -l proc1 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr 'A-Z' 'a-z'`" + fi ;; + freebsd) + family=`uname -m` + + if test "$family" = "amd64"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + + elif test "$family" = "i386"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + fi ;; + + darwin) + family=`uname -m` + + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + + ;; + + linux) + family=`uname -m` + + if test "$family" = "x86_64"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + CPU="x86" + + elif test "$family" = "i686"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + CPU="x86" + + elif test "$family" = "ppc64"; then + CPU_info="`cat /proc/cpuinfo | grep cpu | cut -d: -f2 | cut -d' ' -f2 | sed '2,$d'`" + case "$CPU_info" in + PPC970*) CPU="PPC970";; + POWER5) CPU="POWER5";; + POWER5+) CPU="POWER5+";; + POWER6) CPU="POWER6";; + POWER7) CPU="POWER7";; + esac + fi ;; + solaris) + ac_fn_c_check_header_mongrel "$LINENO" "libcpc.h" "ac_cv_header_libcpc_h" "$ac_includes_default" +if test "x$ac_cv_header_libcpc_h" = xyes; then : + CFLAGS="$CFLAGS -lcpc" + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + int main() { + // Check for libcpc 2 + if(CPC_VER_CURRENT == 2) + exit(0); + exit(1); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + cpc_version=2 +else + cpc_version=0 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +else + as_fn_error $? "libcpc is needed for running PAPI on Solaris" "$LINENO" 5 +fi + + + processor=`uname -p` + machinetype=`uname -m` + if test "$processor" = "sparc"; then + if test "$machinetype" = "sun4u"; then + CPU=ultra + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpc_take_sample in -lcpc" >&5 +$as_echo_n "checking for cpc_take_sample in -lcpc... " >&6; } +if ${ac_cv_lib_cpc_cpc_take_sample+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcpc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cpc_take_sample (); +int +main () +{ +return cpc_take_sample (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cpc_cpc_take_sample=yes +else + ac_cv_lib_cpc_cpc_take_sample=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cpc_cpc_take_sample" >&5 +$as_echo "$ac_cv_lib_cpc_cpc_take_sample" >&6; } +if test "x$ac_cv_lib_cpc_cpc_take_sample" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBCPC 1 +_ACEOF + + LIBS="-lcpc $LIBS" + +else + as_fn_error $? "libcpc.a is needed on Solaris, install SUNWcpc" "$LINENO" 5 +fi + + elif test "$machinetype" = "sun4v"; then + CPU=niagara2 + if test "$cpc_version" != "2"; then + as_fn_error $? "libcpc2 needed for Niagara 2" "$LINENO" 5 + fi + else + as_fn_error $? "$machinetype not supported" "$LINENO" 5 + fi + else + as_fn_error $? "Only SPARC processors are supported on Solaris" "$LINENO" 5 + fi ;; + bgp) + CPU=bgp ;; + bgq) + CPU=bgq ;; + esac +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPU" >&5 +$as_echo "$CPU" >&6; } + +cat >>confdefs.h <<_ACEOF +#define CPU $CPU +_ACEOF + + +# First set pthread-mutexes based on arch +case $arch in + aarch64|arm*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 + ;; +esac + + + +# Check whether --with-pthread-mutexes was given. +if test "${with_pthread_mutexes+set}" = set; then : + withval=$with_pthread_mutexes; pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + +fi + + + +# Check whether --with-ffsll was given. +if test "${with_ffsll+set}" = set; then : + withval=$with_ffsll; ffsll=$withval +else + if test "$cross_compiling" = "yes" ; then + as_fn_error $? "ffsll must be specified for cross compile" "$LINENO" 5 + fi + didcheck=1 + ac_fn_c_check_func "$LINENO" "ffsll" "ac_cv_func_ffsll" +if test "x$ac_cv_func_ffsll" = xyes; then : + ffsll=yes +else + ffsll=no +fi + +fi + + if test "$ffsll" = "yes" ; then + +$as_echo "#define HAVE_FFSLL 1" >>confdefs.h + + fi +if test "$didcheck" != "1"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ffsll" >&5 +$as_echo_n "checking for ffsll... " >&6; } + if test "$ffsll" = "yes" ; then + +$as_echo "#define HAVE_FFSLL 1" >>confdefs.h + + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ffsll" >&5 +$as_echo "$ffsll" >&6; } +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working gettid" >&5 +$as_echo_n "checking for working gettid... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + main() { pid_t a = gettid(); } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define HAVE_GETTID 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working syscall(SYS_gettid)" >&5 +$as_echo_n "checking for working syscall(SYS_gettid)... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + main() { pid_t a = syscall(SYS_gettid); } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define HAVE_SYSCALL_GETTID 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + +# Check whether --with-walltimer was given. +if test "${with_walltimer+set}" = set; then : + withval=$with_walltimer; walltimer=$withval +else + if test "$cross_compiling" = "yes" ; then + as_fn_error $? "walltimer must be specified for cross compile" "$LINENO" 5 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working MMTIMER" >&5 +$as_echo_n "checking for working MMTIMER... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #include + #include + #include + #include + #ifndef MMTIMER_FULLNAME + #define MMTIMER_FULLNAME "/dev/mmtimer" + #endif + int main() { + int offset; + int fd; + if((fd = open(MMTIMER_FULLNAME, O_RDONLY)) == -1) + exit(1); + if ((offset = ioctl(fd, MMTIMER_GETOFFSET, 0)) < 0) + exit(1); + close(fd); + exit(0); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + walltimer="mmtimer" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working CLOCK_REALTIME_HR POSIX 1b timer" >&5 +$as_echo_n "checking for working CLOCK_REALTIME_HR POSIX 1b timer... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #include + #include + #include + main() { + struct timespec t1, t2; + double seconds; + if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t1) == -1) exit(1); + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t2) == -1) exit(1); + seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); + if (seconds > 1.0) + exit(0); + else + exit(1); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + walltimer="clock_realtime_hr" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working CLOCK_REALTIME POSIX 1b timer" >&5 +$as_echo_n "checking for working CLOCK_REALTIME POSIX 1b timer... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #include + #include + #include + main() { + struct timespec t1, t2; + double seconds; + if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t1) == -1) exit(1); + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t2) == -1) exit(1); + seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); + if (seconds > 1.0) + exit(0); + else + exit(1); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + walltimer="clock_realtime" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + walltimer="cycle" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for which real time clock to use" >&5 +$as_echo_n "checking for which real time clock to use... " >&6; } +if test "$walltimer" = "gettimeofday"; then + +$as_echo "#define HAVE_GETTIMEOFDAY 1" >>confdefs.h + +elif test "$walltimer" = "mmtimer"; then + +$as_echo "#define HAVE_MMTIMER 1" >>confdefs.h + + altix="-DALTIX" +elif test "$walltimer" = "clock_realtime_hr"; then + +$as_echo "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h + + +$as_echo "#define HAVE_CLOCK_GETTIME_REALTIME_HR 1" >>confdefs.h + +elif test "$walltimer" = "clock_realtime"; then + +$as_echo "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h + + +$as_echo "#define HAVE_CLOCK_GETTIME_REALTIME 1" >>confdefs.h + +elif test "$walltimer" = "cycle"; then + +$as_echo "#define HAVE_CYCLE 1" >>confdefs.h + +else + as_fn_error $? "Unknown value for walltimer" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $walltimer" >&5 +$as_echo "$walltimer" >&6; } + +SAVED_LIBS=$LIBS +SAVED_LDFLAGS=$LDFLAGS +SAVED_CFLAGS=$CFLAGS +LIBS="" +LDFLAGS="" +CFLAGS="-pthread" + + +# Check whether --with-tls was given. +if test "${with_tls+set}" = set; then : + withval=$with_tls; tls=$withval +else + if test "$cross_compiling" = "yes" ; then + as_fn_error $? "tls must be specified for cross compile" "$LINENO" 5 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working __thread" >&5 +$as_echo_n "checking for working __thread... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + extern __thread int i; + static int res1, res2; + void thread_main (void *arg) { + i = (int)arg; + sleep (1); + if ((int)arg == 1) + res1 = (i == (int)arg); + else + res2 = (i == (int)arg); + } + __thread int i; + int main () { + pthread_t t1, t2; + i = 5; + pthread_create (&t1, NULL, thread_main, (void *)1); + pthread_create (&t2, NULL, thread_main, (void *)2); + pthread_join (t1, NULL); + pthread_join (t2, NULL); + return !(res1 + res2 == 2); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + tls="__thread" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + tls="no" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + if test "$OS" = "linux"; then + if test "x$tls" = "x__thread"; then + # On some linux distributions, TLS works in executables, but linking against + # a shared library containing TLS fails with: undefined reference to `__tls_get_addr' + rm -f conftest.c conftest.so conftest + echo "static __thread int foo; void main () { foo = 5; }" > conftest.c + gcc -fPIC --shared -o conftest.so conftest.c > /dev/null 2>&1 + gcc -o conftest conftest.so > /dev/null 2>&1 + if test ! -f conftest; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Disabling usage of __thread." >&5 +$as_echo "$as_me: WARNING: Disabling usage of __thread." >&2;}; + tls="no" + fi + rm -f conftest.c conftest.so conftest + fi + fi +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for high performance thread local storage" >&5 +$as_echo_n "checking for high performance thread local storage... " >&6; } +if test "$tls" = "no"; then + NOTLS="-DNO_TLS" +elif test "x$tls" != "x"; then + if test "$tls" = "yes"; then + tls="__thread" + fi + NOTLS="-DUSE_COMPILER_TLS" + +cat >>confdefs.h <<_ACEOF +#define HAVE_THREAD_LOCAL_STORAGE $tls +_ACEOF + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $tls" >&5 +$as_echo "$tls" >&6; } + + +# Check whether --with-virtualtimer was given. +if test "${with_virtualtimer+set}" = set; then : + withval=$with_virtualtimer; virtualtimer=$withval +else + if test "$cross_compiling" = "yes" ; then + as_fn_error $? "virtualtimer must be specified for cross compile" "$LINENO" 5 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working CLOCK_THREAD_CPUTIME_ID POSIX 1b timer" >&5 +$as_echo_n "checking for working CLOCK_THREAD_CPUTIME_ID POSIX 1b timer... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #if !defined( SYS_gettid ) + #define SYS_gettid 1105 + #endif + struct timespec threadone = { 0, 0 }; + struct timespec threadtwo = { 0, 0 }; + pthread_t threadOne, threadTwo; + volatile int done = 0; + + int gettid() { + return syscall( SYS_gettid ); + } + + void *doThreadOne( void * v ) { + while (!done) + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadone) == -1) { + perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); + exit(1); + } + return 0; + } + + void *doThreadTwo( void * v ) { + long i, j = 0xdeadbeef; + for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } + + if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadtwo) == -1) { + perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); + exit(1); + } + done = 1; + return j; + } + + int main( int argc, char ** argv ) { + int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); + assert( status == 0 ); + status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadOne, NULL ); + assert( status == 0 ); + if ((threadone.tv_sec != threadtwo.tv_sec) || (threadone.tv_nsec != threadtwo.tv_nsec)) + exit(0); + else { + fprintf(stderr,"T1 %ld %ld T2 %ld %ld\n",threadone.tv_sec,threadone.tv_nsec,threadtwo.tv_sec,threadtwo.tv_nsec); + exit(1); + } + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + virtualtimer="clock_thread_cputime_id" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + # *** Checks for working per thread timer*** + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working per-thread times() timer" >&5 +$as_echo_n "checking for working per-thread times() timer... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #if !defined( SYS_gettid ) + #define SYS_gettid 1105 + #endif + long threadone = 0, threadtwo = 0; + pthread_t threadOne, threadTwo; + volatile int done = 0; + + int gettid() { + return syscall( SYS_gettid ); + } + int doThreadOne( void * v ) { + struct tms tm; + int status; + while (!done) + sleep(1); + status = times( & tm ); + assert( status != -1 ); + threadone = tm.tms_utime; + return 0; + } + int doThreadTwo( void * v ) { + struct tms tm; + long i, j = 0xdeadbeef; + int status; + for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } + status = times( & tm ); + assert( status != -1 ); + threadtwo = tm.tms_utime; + done = 1; + return j; + } + int main( int argc, char ** argv ) { + int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); + assert( status == 0 ); + status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadOne, NULL ); + assert( status == 0 ); + return (threadone == threadtwo); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + virtualtimer="times" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + virtualtimer="default" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi + +LDFLAGS=$SAVED_LDFLAGS +CFLAGS=$SAVED_CFLAGS +LIBS=$SAVED_LIBS + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for which virtual timer to use" >&5 +$as_echo_n "checking for which virtual timer to use... " >&6; } +case "$virtualtimer" in + times) + +$as_echo "#define HAVE_PER_THREAD_TIMES 1" >>confdefs.h + ;; + getrusage) + +$as_echo "#define HAVE_PER_THREAD_GETRUSAGE 1" >>confdefs.h + ;; + clock_thread_cputime_id) + +$as_echo "#define HAVE_CLOCK_GETTIME_THREAD CLOCK_THREAD_CPUTIME_ID" >>confdefs.h + ;; + proc|default|perfctr) + +$as_echo "#define USE_PROC_PTTIMER 1" >>confdefs.h + + +$as_echo "#define USE_PERFCTR_PTTIMER 1" >>confdefs.h + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $virtualtimer" >&5 +$as_echo "$virtualtimer" >&6; } + +if test "$OS" = "aix"; then + +# Check whether --with-pmapi was given. +if test "${with_pmapi+set}" = set; then : + withval=$with_pmapi; PMAPI=$withval +else + PMAPI="/usr/pmapi" +fi + + LIBS="-L$PMAPI/lib -lpmapi" + CPPFLAGS="$CPPFLAGS -I$PMAPI/include" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pm_initialize in -lpmapi" >&5 +$as_echo_n "checking for pm_initialize in -lpmapi... " >&6; } +if ${ac_cv_lib_pmapi_pm_initialize+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpmapi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pm_initialize (); +int +main () +{ +return pm_initialize (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pmapi_pm_initialize=yes +else + ac_cv_lib_pmapi_pm_initialize=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pmapi_pm_initialize" >&5 +$as_echo "$ac_cv_lib_pmapi_pm_initialize" >&6; } +if test "x$ac_cv_lib_pmapi_pm_initialize" = xyes; then : + PMINIT="-DPM_INITIALIZE" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pm_init in -lpmapi" >&5 +$as_echo_n "checking for pm_init in -lpmapi... " >&6; } +if ${ac_cv_lib_pmapi_pm_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpmapi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pm_init (); +int +main () +{ +return pm_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pmapi_pm_init=yes +else + ac_cv_lib_pmapi_pm_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pmapi_pm_init" >&5 +$as_echo "$ac_cv_lib_pmapi_pm_init" >&6; } +if test "x$ac_cv_lib_pmapi_pm_init" = xyes; then : + PMINIT="-DPM_INIT" +else + as_fn_error $? "libpmapi.a not found, rerun configure with different flags" "$LINENO" 5 +fi + +fi + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for static user preset events" >&5 +$as_echo_n "checking for static user preset events... " >&6; } + +# Check whether --with-static_user_events was given. +if test "${with_static_user_events+set}" = set; then : + withval=$with_static_user_events; STATIC_USER_EVENTS=$withval +else + STATIC_USER_EVENTS=no +fi + +if test "$STATIC_USER_EVENTS" = "yes"; then + PAPICFLAGS+=" -DSTATIC_USER_EVENTS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $STATIC_USER_EVENTS" >&5 +$as_echo "$STATIC_USER_EVENTS" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for static PAPI preset events" >&5 +$as_echo_n "checking for static PAPI preset events... " >&6; } + +# Check whether --with-static_papi_events was given. +if test "${with_static_papi_events+set}" = set; then : + withval=$with_static_papi_events; STATIC_PAPI_EVENTS=$withval +else + STATIC_PAPI_EVENTS=yes +fi + +if test "$STATIC_PAPI_EVENTS" = "yes"; then + PAPICFLAGS+=" -DSTATIC_PAPI_EVENTS_TABLE" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $STATIC_PAPI_EVENTS" >&5 +$as_echo "$STATIC_PAPI_EVENTS" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for whether to build static library" >&5 +$as_echo_n "checking for whether to build static library... " >&6; } + +# Check whether --with-static_lib was given. +if test "${with_static_lib+set}" = set; then : + withval=$with_static_lib; static_lib=$withval +else + static_lib=yes +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $static_lib" >&5 +$as_echo "$static_lib" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for whether to build shared library" >&5 +$as_echo_n "checking for whether to build shared library... " >&6; } + +# Check whether --with-shared_lib was given. +if test "${with_shared_lib+set}" = set; then : + withval=$with_shared_lib; shared_lib=$withval +else + shared_lib=yes +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $shared_lib" >&5 +$as_echo "$shared_lib" >&6; } + +if test "$shared_lib" = "no" -a "$static_lib" = "no"; then + as_fn_error $? "Both shared and static libs are disabled" "$LINENO" 5 +fi + +if test "$shared_lib" = "yes"; then + papiLIBS="shared" +fi +if test "$static_lib" = "yes"; then + papiLIBS="$papiLIBS static" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for static compile of tests and utilities" >&5 +$as_echo_n "checking for static compile of tests and utilities... " >&6; } + +# Check whether --with-static_tools was given. +if test "${with_static_tools+set}" = set; then : + withval=$with_static_tools; STATIC="-static" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +if test "$static_lib" = "no"; then + as_fn_error $? "Building tests and utilities static but no static papi library to be built" "$LINENO" 5 +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for linking with papi shared library of tests and utilities" >&5 +$as_echo_n "checking for linking with papi shared library of tests and utilities... " >&6; } + +# Check whether --with-shlib_tools was given. +if test "${with_shlib_tools+set}" = set; then : + withval=$with_shlib_tools; shlib_tools=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + shlib_tools=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +if test "$shlib_tools" = "yes"; then + if test "$shared_lib" != "yes"; then + as_fn_error $? "Building static but specified shared linking for tests and utilities" "$LINENO" 5 + fi + if test "$STATIC" = "-static"; then + as_fn_error $? "Building shared but specified static linking" "$LINENO" 5 + fi + LINKLIB='$(SHLIB)' +elif test "$shlib_tools" = "no"; then + if test "$static_lib" != "yes"; then + as_fn_error $? "Building shared but specified static linking for tests and utilities" "$LINENO" 5 + fi + LINKLIB='$(LIBRARY)' +fi + +################################################## +# perfctr +################################################## +perfctr=0 +force_perfctr=no + + +# Check whether --with-perfctr was given. +if test "${with_perfctr+set}" = set; then : + withval=$with_perfctr; force_perfctr=yes + user_specified_interface=perfctr + if test "$arch" != "ppc64"; then + perfctr=6 + else + perfctr=7 + fi +else + perfctr=0 +fi + + +if test "$cross_compiling" = "no" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /sys/class/perfctr" >&5 +$as_echo_n "checking for /sys/class/perfctr... " >&6; } +if ${ac_cv_file__sys_class_perfctr+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "/sys/class/perfctr"; then + ac_cv_file__sys_class_perfctr=yes +else + ac_cv_file__sys_class_perfctr=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__sys_class_perfctr" >&5 +$as_echo "$ac_cv_file__sys_class_perfctr" >&6; } +if test "x$ac_cv_file__sys_class_perfctr" = xyes; then : + perfctr=7 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /dev/perfctr" >&5 +$as_echo_n "checking for /dev/perfctr... " >&6; } +if ${ac_cv_file__dev_perfctr+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "/dev/perfctr"; then + ac_cv_file__dev_perfctr=yes +else + ac_cv_file__dev_perfctr=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__dev_perfctr" >&5 +$as_echo "$ac_cv_file__dev_perfctr" >&6; } +if test "x$ac_cv_file__dev_perfctr" = xyes; then : + perfctr=6 +fi + +fi + +fi + +if test "$perfctr" != 0; then + pfm_incdir="libpfm-3.y/include/" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for perfctr version" >&5 +$as_echo_n "checking for perfctr version... " >&6; } + if test "$perfctr" = 7 -a "$arch" != "ppc64"; then + as_fn_error $? "Perfctr 2.7.x only works on PPC64 machines. Patch kernel with 2.6.x." "$LINENO" 5 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: 2.$perfctr" >&5 +$as_echo "2.$perfctr" >&6; } +fi + + +# Check whether --with-perfctr_root was given. +if test "${with_perfctr_root+set}" = set; then : + withval=$with_perfctr_root; perfctr_root=$withval + user_specified_interface=perfctr +fi + + +# Check whether --with-perfctr_prefix was given. +if test "${with_perfctr_prefix+set}" = set; then : + withval=$with_perfctr_prefix; perfctr_prefix=$withval + user_specified_interface=perfctr +fi + + +# Check whether --with-perfctr_incdir was given. +if test "${with_perfctr_incdir+set}" = set; then : + withval=$with_perfctr_incdir; perfctr_incdir=$withval + user_specified_interface=perfctr +fi + + +# Check whether --with-perfctr_libdir was given. +if test "${with_perfctr_libdir+set}" = set; then : + withval=$with_perfctr_libdir; perfctr_libdir=$withval + user_specified_interface=perfctr +fi + + +if test "$perfctr" != 0; then + + dotest=0 + if test "x$perfctr_root" != "x"; then + LIBS="-L$perfctr_root/usr.lib -lperfctr" + CPPFLAGS="$CPPFLAGS -I$perfctr_root/usr.lib -I$perfctr_root/linux/include" + dotest=1 + elif test "x$perfctr_prefix" != "x"; then + LIBS="-L$perfctr_prefix/lib -lperfctr" + CPPFLAGS="$CPPFLAGS -I$perfctr_prefix/include" + perfctr_libdir="$perfctr_prefix/lib" + perfctr_incdir="$perfctr_prefix/include" + dotest=1 + else + if test "x$perfctr_libdir" != "x"; then + LIBS="-L$perfctr_libdir -lperfctr" + dotest=1 + fi + if test "x$perfctr_incdir" != "x"; then + CPPFLAGS="-I$perfctr_incdir" + dotest=1 + fi + fi + + if test "$dotest" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for vperfctr_open in -lperfctr" >&5 +$as_echo_n "checking for vperfctr_open in -lperfctr... " >&6; } +if ${ac_cv_lib_perfctr_vperfctr_open+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lperfctr $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char vperfctr_open (); +int +main () +{ +return vperfctr_open (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_perfctr_vperfctr_open=yes +else + ac_cv_lib_perfctr_vperfctr_open=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_perfctr_vperfctr_open" >&5 +$as_echo "$ac_cv_lib_perfctr_vperfctr_open" >&6; } +if test "x$ac_cv_lib_perfctr_vperfctr_open" = xyes; then : + for ac_header in libperfctr.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "libperfctr.h" "ac_cv_header_libperfctr_h" "$ac_includes_default" +if test "x$ac_cv_header_libperfctr_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPERFCTR_H 1 +_ACEOF + oCFLAGS=$CFLAGS + CFLAGS="$CFLAGS -static" + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include "libperfctr.h" + int main() { + if ((PERFCTR_ABI_VERSION >> 24) != 5) + exit(1); + exit(0); + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + perfctr=6 +else + perfctr=7 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + CFLAGS=$oCFLAGS +else + as_fn_error $? "libperfctr.h not found, rerun configure with different flags" "$LINENO" 5 +fi + +done + +else + as_fn_error $? "libperfctr.a not found, rerun configure with different flags" "$LINENO" 5 +fi + + else + +$as_echo "#define HAVE_LIBPERFCTR_H 1" >>confdefs.h + + fi +fi + +user_specified_interface=no +################################################## +# perfmon +################################################## +old_pfmv2=n +perfmon=0 +perfmon2=no +force_perfmon2=no + + +# Check whether --with-perfmon was given. +if test "${with_perfmon+set}" = set; then : + withval=$with_perfmon; perfmon=$withval + user_specified_interface=perfmon + force_perfmon2=yes + pfm_incdir="libpfm-3.y/include" + perfmon=`echo ${perfmon} | sed 's/^ \t*//;s/ \t*$//'` + perfmon=`echo ${perfmon} | grep -e '[1-9]\.[0-9][0-9]*'` + if test "x$perfmon" = "x"; then + as_fn_error $? "\"Badly formed perfmon version string\"" "$LINENO" 5 + fi + perfmon=`echo ${perfmon} | sed 's/\.//'` + if test $perfmon -gt 20; then + perfmon2=yes + fi + if test $perfmon -lt 25; then + old_pfmv2=y + PFMCFLAGS="-DPFMLIB_OLD_PFMV2" + fi +else + perfmon=0 + if test "$cross_compiling" = "no" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /sys/kernel/perfmon/version" >&5 +$as_echo_n "checking for /sys/kernel/perfmon/version... " >&6; } +if ${ac_cv_file__sys_kernel_perfmon_version+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "/sys/kernel/perfmon/version"; then + ac_cv_file__sys_kernel_perfmon_version=yes +else + ac_cv_file__sys_kernel_perfmon_version=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__sys_kernel_perfmon_version" >&5 +$as_echo "$ac_cv_file__sys_kernel_perfmon_version" >&6; } +if test "x$ac_cv_file__sys_kernel_perfmon_version" = xyes; then : + perfmon=`cat /sys/kernel/perfmon/version` +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /proc/perfmon" >&5 +$as_echo_n "checking for /proc/perfmon... " >&6; } +if ${ac_cv_file__proc_perfmon+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "/proc/perfmon"; then + ac_cv_file__proc_perfmon=yes +else + ac_cv_file__proc_perfmon=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__proc_perfmon" >&5 +$as_echo "$ac_cv_file__proc_perfmon" >&6; } +if test "x$ac_cv_file__proc_perfmon" = xyes; then : + perfmon=`cat /proc/perfmon | grep version | cut -d: -f2` +else + perfmon=0 +fi + +fi + + if test "$perfmon" != 0; then + pfm_incdir="libpfm-3.y/include" + perfmon=`echo ${perfmon} | sed 's/^ \t*//;s/ \t*$//'` + perfmon=`echo ${perfmon} | grep -e '[1-9]\.[0-9][0-9]*'` + perfmon=`echo ${perfmon} | sed 's/\.//'` + if test $perfmon -gt 20; then + perfmon2=yes + fi + if test $perfmon -lt 25; then +# must be y, not yes, or libpfm breaks + old_pfmv2="y" + PFMCFLAGS="-DPFMLIB_OLD_PFMV2" + fi + fi + fi +fi + + + +force_pfm_incdir=no +# default + + +# Check whether --with-pfm_root was given. +if test "${with_pfm_root+set}" = set; then : + withval=$with_pfm_root; pfm_root=$withval + pfm_incdir=$withval/include + pfm_libdir=$withval/lib +fi + + +# Check whether --with-pfm_prefix was given. +if test "${with_pfm_prefix+set}" = set; then : + withval=$with_pfm_prefix; pfm_prefix=$withval + pfm_incdir=$pfm_prefix/include + pfm_libdir=$pfm_prefix/lib +fi + + +# Check whether --with-pfm_incdir was given. +if test "${with_pfm_incdir+set}" = set; then : + withval=$with_pfm_incdir; pfm_incdir=$withval +fi + + +# Check whether --with-pfm_libdir was given. +if test "${with_pfm_libdir+set}" = set; then : + withval=$with_pfm_libdir; pfm_libdir=$withval +fi + + +# if these are both empty, it means we haven't set either pfm_prefix or pfm_root +# which would have set them. Thus it means that we set this to our included +# libpfm4 library. Shame on the person that sets one but not the other. + +if test "x$pfm_incdir" = "x" -a "x$pfm_libdir" = "x"; then + pfm_root="libpfm4" + pfm_incdir="libpfm4/include" + pfm_libdir="libpfm4/lib" +fi + +################################################## +# Linux perf_event/perf_counter +################################################## + +if test "x$mic" = "xno"; then + perf_events=no +fi + +force_perf_events=no +disable_uncore=yes + + +# Check whether --with-perf_events was given. +if test "${with_perf_events+set}" = set; then : + withval=$with_perf_events; force_perf_events=yes + user_specified_interface=pe +fi + + +# RDPMC support + +# Check whether --enable-perfevent_rdpmc was given. +if test "${enable_perfevent_rdpmc+set}" = set; then : + enableval=$enable_perfevent_rdpmc; case "${enableval}" in + yes) enable_perfevent_rdpmc=true ;; + no) enable_perfevent_rpdmc=false ;; + *) as_fn_error $? "bad value ${enableval} for --enable-perfevent-rdpmc" "$LINENO" 5 ;; +esac +else + enable_perfevent_rdpmc=true +fi + + +if test "$enable_perfevent_rdpmc" = "true"; then + PECFLAGS="$PECFLAGS -DUSE_PERFEVENT_RDPMC=1" +fi + +# Uncore support + +# Check whether --with-pe_incdir was given. +if test "${with_pe_incdir+set}" = set; then : + withval=$with_pe_incdir; pe_incdir=$withval + force_perf_events=yes + user_specified_interface=pe +else + pe_incdir=$pfm_incdir/perfmon +fi + + +# Check whether --enable-perf_event_uncore was given. +if test "${enable_perf_event_uncore+set}" = set; then : + enableval=$enable_perf_event_uncore; +fi + + +if test "x$enable_perf_event_uncore" != "xno"; then : + + disable_uncore=no + +fi + +# Check for perf_event.h + if test "$force_perf_events" = "yes"; then + perf_events="yes" + fi + if test "$cross_compiling" = "no"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /proc/sys/kernel/perf_event_paranoid" >&5 +$as_echo_n "checking for /proc/sys/kernel/perf_event_paranoid... " >&6; } +if ${ac_cv_file__proc_sys_kernel_perf_event_paranoid+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "/proc/sys/kernel/perf_event_paranoid"; then + ac_cv_file__proc_sys_kernel_perf_event_paranoid=yes +else + ac_cv_file__proc_sys_kernel_perf_event_paranoid=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__proc_sys_kernel_perf_event_paranoid" >&5 +$as_echo "$ac_cv_file__proc_sys_kernel_perf_event_paranoid" >&6; } +if test "x$ac_cv_file__proc_sys_kernel_perf_event_paranoid" = xyes; then : + + as_ac_File=`$as_echo "ac_cv_file_$pe_incdir/perf_event.h" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $pe_incdir/perf_event.h" >&5 +$as_echo_n "checking for $pe_incdir/perf_event.h... " >&6; } +if eval \${$as_ac_File+:} false; then : + $as_echo_n "(cached) " >&6 +else + test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "$pe_incdir/perf_event.h"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi +fi +eval ac_res=\$$as_ac_File + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes"; then : + perf_events="yes" +fi + +fi + + fi + if test "$perf_events" = "yes"; then + PECFLAGS="$PECFLAGS -DPEINCLUDE=\\\"$pe_incdir/perf_event.h\\\"" + fi + +# +# Sort out the choice of the user vs. what we detected +# +# MESSING WITH CFLAGS IS STUPID! +# +if test "$user_specified_interface" != "no"; then + if test "$user_specified_interface" = "perfctr"; then + perfmon=0 + perf_events="no" + else + if test "$user_specified_interface" = "perfmon"; then + perfctr=0 + perf_events="no" + PAPICFLAGS+=" $PFMCFLAGS" + else + if test "$user_specified_interface" = "pe"; then + perfctr=0 + perfmon=0 + PAPICFLAGS+=" $PECFLAGS" + else + as_fn_error $? "\"Unknown user_specified_interface=$user_specified_interface perfctr=$perfctr perfmon=$perfmon perfmon2=$perfmon2 perf-events=$perf_events\"" "$LINENO" 5 + fi + fi + fi +else + if test "$perfmon" != 0; then + PAPICFLAGS+=" $PFMCFLAGS" + fi + if test "$perf_events" = "yes"; then + PAPICFLAGS+=" $PECFLAGS" + fi +fi + + +# +# User has made no choice, so we default to the ordering below in the platform section, if +# we detect more than one. +# + +# +# What does this next section do? It determines whether or not to run the tests for libpfm +# based on the settings of pfm_root, pfm_prefix, pfm_incdir, pfm_libdir +# + +if test "$perfmon" != 0 -o "$perf_events" = "yes"; then + # if prefix set, then yes + if test "x$pfm_prefix" != "x"; then + dotest=1 + # if root not set and libdir set, then yes + elif test "x$pfm_root" = "x" -a "x$pfm_libdir" != "x"; then + dotest=1 + else + dotest=0 + fi + if test "$dotest" = 1; then + LIBS="-L$pfm_libdir -lpfm" + CPPFLAGS="$CPPFLAGS -I$pfm_incdir" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pfm_initialize in -lpfm" >&5 +$as_echo_n "checking for pfm_initialize in -lpfm... " >&6; } +if ${ac_cv_lib_pfm_pfm_initialize+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpfm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pfm_initialize (); +int +main () +{ +return pfm_initialize (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pfm_pfm_initialize=yes +else + ac_cv_lib_pfm_pfm_initialize=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pfm_pfm_initialize" >&5 +$as_echo "$ac_cv_lib_pfm_pfm_initialize" >&6; } +if test "x$ac_cv_lib_pfm_pfm_initialize" = xyes; then : + for ac_header in perfmon/pfmlib.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "perfmon/pfmlib.h" "ac_cv_header_perfmon_pfmlib_h" "$ac_includes_default" +if test "x$ac_cv_header_perfmon_pfmlib_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_PERFMON_PFMLIB_H 1 +_ACEOF + if test "$arch" = "ia64"; then + for ac_header in perfmon/pfmlib_montecito.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "perfmon/pfmlib_montecito.h" "ac_cv_header_perfmon_pfmlib_montecito_h" "$ac_includes_default" +if test "x$ac_cv_header_perfmon_pfmlib_montecito_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_PERFMON_PFMLIB_MONTECITO_H 1 +_ACEOF + +fi + +done + + fi + ac_fn_c_check_func "$LINENO" "pfm_get_event_description" "ac_cv_func_pfm_get_event_description" +if test "x$ac_cv_func_pfm_get_event_description" = xyes; then : + +$as_echo "#define HAVE_PFM_GET_EVENT_DESCRIPTION 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "pfmlib_reg_t" "reg_evt_idx" "ac_cv_member_pfmlib_reg_t_reg_evt_idx" "#include \"perfmon/pfmlib.h\" +" +if test "x$ac_cv_member_pfmlib_reg_t_reg_evt_idx" = xyes; then : + +$as_echo "#define HAVE_PFM_REG_EVT_IDX 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "pfmlib_output_param_t" "pfp_pmd_count" "ac_cv_member_pfmlib_output_param_t_pfp_pmd_count" "#include \"perfmon/pfmlib.h\" +" +if test "x$ac_cv_member_pfmlib_output_param_t_pfp_pmd_count" = xyes; then : + +$as_echo "#define HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "pfm_msg_t" "type" "ac_cv_member_pfm_msg_t_type" "#include \"perfmon/perfmon.h\" +" +if test "x$ac_cv_member_pfm_msg_t_type" = xyes; then : + +$as_echo "#define HAVE_PFM_MSG_TYPE 1" >>confdefs.h + +fi + +else + as_fn_error $? "perfmon/pfmlib.h not found, rerun configure with different flags" "$LINENO" 5 +fi + +done + +else + as_fn_error $? "libpfm.a not found, rerun configure with different flags" "$LINENO" 5 +fi + + else + +$as_echo "#define HAVE_PERFMON_PFMLIB_MONTECITO_H 1" >>confdefs.h + + +$as_echo "#define HAVE_PFM_GET_EVENT_DESCRIPTION 1" >>confdefs.h + + +$as_echo "#define HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT 1" >>confdefs.h + + fi +fi + +################################################## +# Checking platform +################################################## +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking platform" >&5 +$as_echo_n "checking platform... " >&6; } +case "$OS" in + aix) + MAKEVER="$OS"-"$CPU" ;; + bgp) + MAKEVER=bgp ;; + bgq) + MAKEVER=bgq ;; + CLE) + if test "$perfmon2" = "yes"; then +# major_version=`echo $OSVER | sed 's/\([[^.]][[^.]]*\).*/\1/'` +# minor_version=`echo $OSVER | sed 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'` +# point_version=`echo $OSVER | sed -e 's/[[^.]][[^.]]*.[[^.]][[^.]]*.\(.*\)/\1/' -e 's/[[^0-9]].*//'` +# if (test "$major_version" = 2 -a "$minor_version" = 6 -a "$point_version" -lt 31 -a "$perfmon2" != "yes" ); then + MAKEVER="$OS"-perfmon2 + else + MAKEVER="$OS"-pe + fi ;; + freebsd) + MAKEVER="freebsd" + LDFLAGS="-lpmc" + # HWPMC driver is available for FreeBSD >= 6 + FREEBSD_VERSION=`uname -r | cut -d'.' -f1` + if test "${FREEBSD_VERSION}" -lt 6 ; then + as_fn_error $? "PAPI requires FreeBSD 6 or greater" "$LINENO" 5 + fi + # Determine if HWPMC module is on the kernel + dmesg | grep hwpmc 2> /dev/null > /dev/null + if test "$?" != "0" ; then + as_fn_error $? "HWPMC module not found. (see INSTALL.TXT)" "$LINENO" 5 + fi + # Determine the number of counters + echo "/* Automatically generated file by configure */" > freebsd-config.h + echo "#ifndef _FREEBSD_CONFIG_H_" >> freebsd-config.h + echo "#define _FREEBSD_CONFIG_H_" >> freebsd-config.h + echo "" >> freebsd-config.h + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include +int +main () +{ +int i = pmc_init(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pmc_pmc_init_linked="yes" +else + pmc_pmc_init_linked="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test "${pmc_init_linked}" = "no" ; then + as_fn_error $? "Failed to link hwpmc example" "$LINENO" 5 + fi + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + int main() { + const struct pmc_cpuinfo *info; + if (pmc_init() < 0) return 0; + if (pmc_cpuinfo (&info) < 0) return 0; + return info->pm_npmc-1; + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + num_counters="0" +else + num_counters="$?" +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + if test "${num_counters}" = "0" ; then + as_fn_error $? "pmc_npmc info returned 0. Determine if the HWPMC module is loaded (see hwpmc(4))" "$LINENO" 5 + fi + echo "#define HWPMC_NUM_COUNTERS ${num_counters}" >> freebsd-config.h + echo "" >> freebsd-config.h + echo "#endif" >> freebsd-config.h ;; + linux) + if test "$force_perf_events" = "yes" ; then + MAKEVER="$OS"-pe + elif test "$force_perfmon2" = "yes" ; then + MAKEVER="$OS"-perfmon2 + elif test "$force_perfctr" = "yes" ; then + MAKEVER="$OS"-perfctr-x86 + case "$CPU" in + itanium2|montecito) + if test "$bitmode" = "32"; then + as_fn_error $? "The bitmode you specified is not supported" "$LINENO" 5 + fi + MAKEVER="$OS"-pfm-"$CPU" + ;; + POWER5|POWER5+|POWER6|POWER7|PPC970) + MAKEVER="$OS"-perfctr-"$CPU" ;; + esac + elif test "$perf_events" = "yes" ; then + MAKEVER="$OS"-pe + elif test "$perfmon2" = "yes" ; then + MAKEVER="$OS"-perfmon2 + elif test "$old_pfmv2" = "y" ; then + MAKEVER="$OS"-pfm-"$CPU" + elif test "$perfctr" != 0 ; then + case "$CPU" in + itanium2|montecito) + if test "$bitmode" = "32"; then + as_fn_error $? "The bitmode you specified is not supported" "$LINENO" 5 + fi + MAKEVER="$OS"-pfm-"$CPU" + ;; + x86) + MAKEVER="$OS"-perfctr-x86 ;; + POWER5|POWER5+|POWER6|POWER7|PPC970) + MAKEVER="$OS"-perfctr-"$CPU" ;; + *) + MAKEVER="$OS"-generic ;; + esac + else + MAKEVER="$OS"-generic + fi ;; + solaris) + if test "$bitmode" = "64" -a "`isainfo -v | grep "64"`" = ""; then + as_fn_error $? "The bitmode you specified is not supported" "$LINENO" 5 + fi + MAKEVER="$OS"-"$CPU" ;; + darwin) + MAKEVER="$OS" ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAKEVER" >&5 +$as_echo "$MAKEVER" >&6; } +if test "x$MAKEVER" = "x"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: This platform is not supported so a generic build without CPU counters will be used" >&5 +$as_echo "$as_me: This platform is not supported so a generic build without CPU counters will be used" >&6;} + MAKEVER="generic_platform" +fi + +################################################## +# Set build macros +################################################## +FILENAME=Makefile.inc +SHOW_CONF=showconf +CTEST_TARGETS="all" +FTEST_TARGETS="all" +LIBRARY=libpapi.a +SHLIB='libpapi.so.5.6.0.0' +VLIB='libpapi.so.$(PAPIVER)' +OMPCFLGS=-fopenmp +CC_R='$(CC) -pthread' +CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' +if test "$CC_COMMON_NAME" = "gcc"; then + if test "$bitmode" = "32"; then + BITFLAGS=-m32 + elif test "$bitmode" = "64"; then + BITFLAGS=-m64 + fi +fi + +OPTFLAGS="$OPTFLAGS" +PAPICFLAGS+=" -D_REENTRANT -D_GNU_SOURCE $NOTLS" +CFLAGS="$CFLAGS $BITFLAGS" +if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -Wall" +fi +FFLAGS="$CFLAGS $BITFLAGS $FFLAGS -Dlinux" + +# OS Support + +if (test "$OS" = "aix"); then + OSFILESSRC=aix-memory.c + OSLOCK=aix-lock.h + OSCONTEXT=aix-context.h +elif (test "$OS" = "bgp"); then + OSFILESSRC=linux-bgp-memory.c + OSLOCK=linux-bgp-lock.h + OSCONTEXT=linux-bgp-context.h +elif (test "$OS" = "bgq"); then + OSFILESSRC=linux-bgq-memory.c + OSLOCK=linux-bgq-lock.h + OSCONTEXT=linux-context.h +elif (test "$OS" = "freebsd"); then + OSFILESSRC=freebsd-memory.c + OSLOCK="freebsd-lock.h" + OSCONTEXT="freebsd-context.h" +elif (test "$OS" = "linux"); then + OSFILESSRC="linux-memory.c linux-timer.c linux-common.c" + OSFILESHDR="linux-memory.h linux-timer.h linux-common.h" + OSLOCK="linux-lock.h" + OSCONTEXT="linux-context.h" +elif (test "$OS" = "solaris"); then + OSFILESSRC="solaris-memory.c solaris-common.c" + OSFILESHDR="solaris-memory.h solaris-common.h" + OSLOCK="solaris-lock.h" + OSCONTEXT="solaris-context.h" +elif (test "$OS" = "darwin"); then + OSFILESSRC="darwin-memory.c darwin-common.c" + OSFILESHDR="darwin-memory.h darwin-common.h" + OSLOCK="darwin-lock.h" + OSCONTEXT="darwin-context.h" +fi + +OSFILESOBJ='$(OSFILESSRC:.c=.o)' + + +if (test "$MAKEVER" = "aix-power5" || test "$MAKEVER" = "aix-power6" || test "$MAKEVER" = "aix-power7"); then + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so +# By default AIX enforces a limit on heap space +#( limiting the heap to share the same 256MB memory segment as stack ) +# changing the max data paramater moves the heap off the stack's memory segment + BITFLAGS='-q64 -bmaxdata:0x07000000000000' + ARG64=-X64 + else +# If the issue ever comes up, /dsa requires AIX v5.1 or higher +# and the Large address-space model (-bmaxdata) requires v4.3 or later +# see http://publib.boulder.ibm.com/infocenter/pseries/v5r3/topic/com.ibm.aix.genprogc/doc/genprogc/lrg_prg_support.htm#a179c11c5d + SHLIB=libpapi.so + BITFLAGS="-bmaxdata:0x80000000/dsa" + fi + + CPUCOMPONENT_NAME=aix + CPUCOMPONENT_C=aix.c + CPUCOMPONENT_OBJ=aix.o + VECTOR=_aix_vector + PAPI_EVENTS_CSV="papi_events.csv" + MISCHDRS="aix.h components/perfctr_ppc/ppc64_events.h papi_events_table.h" + MISCSRCS="aix.c" + CFLAGS+='-qenum=4 -Icomponents/perfctr_ppc -DNO_VARARG_MACRO -D_AIX -D_$(CPU_MODEL) -DNEED_FFSLL -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors -DSTATIC_PAPI_EVENTS_TABLE' + FFLAGS+='-WF,-D_$(CPU_MODEL) -WF,-DARCH_EVTS=\"$(ARCH_EVENTS).h\"' + CFLAGS+='-I$(PMAPI)/include -Icomponents/perfctr_ppc -qmaxmem=-1 -qarch=$(cpu_option) -qtune=$(cpu_option) -qlanglvl=extended $(BITFLAGS)' + if test $debug != "yes"; then + OPTFLAGS='-O3 -qstrict $(PMINIT)' + else + OPTFLAGS='$(PMINIT)' + fi + SMPCFLGS=-qsmp + OMPCFLGS='-qsmp=omp' + LDFLAGS='-L$(PMAPI)/lib -lpmapi' + CC_R=xlc_r + CC=xlc + CC_SHR="xlc -G -bnoentry" + for ac_prog in mpicc mpcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MPICC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MPICC"; then + ac_cv_prog_MPICC="$MPICC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MPICC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MPICC=$ac_cv_prog_MPICC +if test -n "$MPICC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPICC" >&5 +$as_echo "$MPICC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$MPICC" && break +done + + F77=xlf + CPP='xlc -E $(CPPFLAGS)' + if test "$MAKEVER" = "aix-power5"; then + ARCH_EVENTS=power5_events + CPU_MODEL=POWER5 + cpu_option=pwr5 + DESCR="AIX 5.1.0 or greater with POWER5" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + elif test "$MAKEVER" = "aix-power6"; then + ARCH_EVENTS=power6_events + CPU_MODEL=POWER6 + cpu_option=pwr6 + DESCR="AIX 5.1.0 or greater with POWER6" + CPPFLAGS="-qlanglvl=extended" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + elif test "$MAKEVER" = "aix-power7"; then + ARCH_EVENTS=power7_events + CPU_MODEL=POWER7 + cpu_option=pwr7 + DESCR="AIX 5.1.0 or greater with POWER7" + CPPFLAGS="-qlanglvl=extended" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + fi +elif test "$MAKEVER" = "bgp"; then + CPP="$CC -E" + CPUCOMPONENT_NAME=linux-bgp + CPUCOMPONENT_C=linux-bgp.c + CPUCOMPONENT_OBJ=linux-bgp.o + VECTOR=_bgp_vectors + PAPI_EVENTS_CSV="papi_events.csv" + MISCSRCS= + CFLAGS='-g -gdwarf-2 -O2 -Wall -I. -I$(BGP_SYSDIR)/arch/include -DCOMP_VECTOR=_bgp_vectors' + tests="$tests bgp_tests" + SHOW_CONF=show_bgp_conf + BGP_SYSDIR=/bgsys/drivers/ppcfloor + BGP_GNU_LINUX_PATH='${BGP_SYSDIR}/gnu-linux' + LDFLAGS='-L$(BGP_SYSDIR)/runtime/SPI -lSPI.cna' + FFLAGS='-g -gdwarf-2 -O2 -Wall -I. -Dlinux' + OPTFLAGS="-g -Wall -O3" + TOPTFLAGS="-g -Wall -O0" + SHLIB=libpapi.so + DESCR="Linux for BlueGene/P" + LIBS=static + CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "$(SHLIB)" -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + OMPCFLGS="" + +elif test "$MAKEVER" = "bgq"; then + FILENAME=Rules.bgpm + VECTOR=_bgq_vectors + CPUCOMPONENT_NAME=linux-bgq + CPUCOMPONENT_C=linux-bgq.c + CPUCOMPONENT_OBJ=linux-bgq.o + PAPI_EVENTS_CSV="papi_events.csv" + MISCSRCS="linux-bgq-common.c" + OPTFLAGS="-g -Wall -O3" + TOPTFLAGS="-g -Wall -O0" + SHLIB=libpapi.so + DESCR="Linux for Blue Gene/Q" + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + OMPCFLGS="" + +elif test "$MAKEVER" = "CLE-perfmon2"; then + FILENAME=Rules.perfmon2 + CPUCOMPONENT_NAME=perfmon + CPUCOMPONENT_C=perfmon.c + CPUCOMPONENT_OBJ=perfmon.o + VECTOR=_papi_pfm_vector + PAPI_EVENTS_CSV="papi_events.csv" + F77=gfortran + CFLAGS="$CFLAGS -D__crayxt" + FFLAGS="" + +elif test "$MAKEVER" = "freebsd"; then + CPUCOMPONENT_NAME=freebsd + CPUCOMPONENT_C=freebsd.c + CPUCOMPONENT_OBJ=freebsd.o + VECTOR=_papi_freebsd_vector + PAPI_EVENTS_CSV="freebsd_events.csv" + MISCHDRS="freebsd/map-unknown.h freebsd/map.h freebsd/map-p6.h freebsd/map-p6-m.h freebsd/map-p6-3.h freebsd/map-p6-2.h freebsd/map-p6-c.h freebsd/map-k7.h freebsd/map-k8.h freebsd/map-p4.h freebsd/map-atom.h freebsd/map-core.h freebsd/map-core2.h freebsd/map-core2-extreme.h freebsd/map-i7.h freebsd/map-westme\ +re.h" + MISCSRCS="$MISCSRCS freebsd/map-unknown.c freebsd/map.c freebsd/map-p6.c freebsd/map-p6-m.c freebsd/map-p6-3.c freebsd/map-p6-2.c freebsd/map-p6-c.c freebsd/map-k7.c freebsd/map-k8.c freebsd/map-p4.c freebsd/map-atom.c freebsd/map-core.c freebsd/map-core2.c freebsd/map-core2-extreme.c freebsd/map-i7.c freebsd/map-westme\ +re.c" + DESCR="FreeBSD -over libpmc- " + CFLAGS+=" -I. -Ifreebsd -DPIC -fPIC" + CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "libpapi.so" -Xlinker "-rpath" -Xlinker "$(LIBDIR)" -DPIC -fPIC -I. -Ifreebsd' + +elif test "$MAKEVER" = "linux-generic"; then + CPUCOMPONENT_NAME=linux-generic + CPUCOMPONENT_C=linux-generic.c + CPUCOMPONENT_OBJ=linux-generic.o + PAPI_EVENTS_CSV="papi_events.csv" + VECTOR=_papi_dummy_vector + +elif test "$MAKEVER" = "linux-pe"; then + FILENAME=Rules.pfm4_pe + CPUCOMPONENT_NAME=perf_event + components="perf_event" + if test "$disable_uncore" = "no"; then + components="$components perf_event_uncore" + fi + +elif test "$MAKEVER" = "linux-perfctr-x86"; then + FILENAME=Rules.perfctr-pfm + CPUCOMPONENT_NAME=perfctr-x86 + VERSION=2.6.x + components="perfctr" + +elif (test "$MAKEVER" = "linux-perfctr-POWER5" || test "$MAKEVER" = "linux-perfctr-POWER5+" || test "$MAKEVER" = "linux-perfctr-POWER6" || test "$MAKEVER" = "linux-perfctr-POWER7" || test "$MAKEVER" = "linux-perfctr-PPC970"); then + FILENAME=Rules.perfctr + CPUCOMPONENT_NAME=perfctr-ppc + VERSION=2.7.x + components="perfctr_ppc" + CPU=ppc64 + if test "$MAKEVER" = "linux-perfctr-POWER5"; then + CPU_MODEL=POWER5 + ARCH_EVENTS=power5_events + ARCH_SPEC_EVTS=power5_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER5+"; then + CPU_MODEL=POWER5p + ARCH_EVENTS=power5+_events + ARCH_SPEC_EVTS=power5+_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER6"; then + CPU_MODEL=POWER6 + ARCH_EVENTS=power6_events + ARCH_SPEC_EVTS=power6_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER7"; then + CPU_MODEL=POWER7 + ARCH_EVENTS=power7_events + ARCH_SPEC_EVTS=power7_events_map.c + elif test "$MAKEVER" = "linux-perfctr-PPC970"; then + CPU_MODEL=PPC970 + ARCH_EVENTS=ppc970_events + ARCH_SPEC_EVTS=ppc970_events_map.c + fi + CFLAGS="$CFLAGS -DPPC64 -D_$(CPU_MODEL) -D__perfctr__ -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors" + FFLAGS='-D_$(CPU_MODEL)' + +elif test "$MAKEVER" = "linux-perfmon2"; then + FILENAME=Rules.perfmon2 + CPUCOMPONENT_NAME=perfmon2 + components="perfmon2" + +elif (test "$MAKEVER" = "linux-pfm-ia64" || test "$MAKEVER" = "linux-pfm-itanium2" || test "$MAKEVER" = "linux-pfm-montecito"); then + FILENAME=Rules.pfm + CPUCOMPONENT_NAME=perfmon-ia64 + components="perfmon_ia64" + VERSION=3.y + if test "$MAKEVER" = "linux-pfm-itanium2"; then + CPU=2 + else + CPU=3 + fi + CFLAGS="$CFLAGS -DITANIUM$CPU" + FFLAGS="$FFLAGS -DITANIUM$CPU" + + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + +elif test "$MAKEVER" = "solaris-ultra"; then + CPUCOMPONENT_NAME=solaris-ultra + CPUCOMPONENT_C=solaris-ultra.c + CPUCOMPONENT_OBJ=solaris-ultra.obj + VECTOR=_solaris_vector + PAPI_EVENTS_CSV="papi_events.csv" + DESCR="Solaris 5.8 or greater with UltraSPARC I, II or III" + if test "$CC" = "gcc"; then + F77=g77 + CPP="$CC -E" + CC_R="$CC" + CC_SHR="$CC -shared -fpic" + OPTFLAGS=-O3 + CFLAGS="$CFLAGS -DNEED_FFSLL" + FFLAGS=$CFLAGS + else + # Sun Workshop compilers: V5.0 and V6.0 R2 + CPP="$CC -E" + CC_R="$CC -mt" + CC_SHR="$CC -ztext -G -Kpic" + CFLAGS="-xtarget=ultra3 -xarch=v8plusa -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" + SMPCFLGS=-xexplicitpar + OMPCFLGS=-xopenmp + F77=f90 + FFLAGS=$CFLAGS + NOOPT=-xO0 + OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v8plusa" + fi + LDFLAGS="$LDFLAGS -lcpc" + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so + CFLAGS="-xtarget=ultra3 -xarch=v9a -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" + OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v9a" + fi + +elif test "$MAKEVER" = "solaris-niagara2"; then + CPUCOMPONENT_NAME=solaris-niagara2 + CPUCOMPONENT_C=solaris-niagara2.c + CPUCOMPONENT_OBJ=solaris-niagara2.obj + VECTOR=_niagara2_vector + PAPI_EVENTS_CSV="papi_events.csv" + CFLAGS="-xtarget=native -xarch=native -DNO_VARARG_MACRO -D__EXTENSIONS__ -DCOMP_VECTOR=_niagara2_vector" + ORY_MANAGEMENT="-DCOMP_VECTOR=_solaris_vector" + DESCR="Solaris 10 with libcpc2 and UltraSPARC T2 (Niagara 2)" + CPP="$CC -E" + CC_R="$CC -mt" + CC_SHR="$CC -ztext -G -Kpic" + SMPCFLGS=-xexplicitpar + OMPCFLGS=-xopenmp + F77=f90 + FFLAGS=$CFLAGS + NOOPT=-xO0 + OPTFLAGS="-fast" + FOPTFLAGS=$OPTFLAGS + LDFLAGS="$LDFLAGS -lcpc" + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so + CFLAGS="$CFLAGS -m64" + FFLAGS="$FFLAGS -m64" + fi +elif test "$MAKEVER" = "darwin"; then + DESCR="Darwin" + CPUCOMPONENT_NAME=darwin + CPUCOMPONENT=linux-generic.c + CPUCOMPONENT=linux-generic.obj + CFLAGS="-DNEED_FFSLL" + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-dylib -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + SHLIB=libpapi.dylib +elif test "$MAKEVER" = "generic_platform"; then + DESCR="Generic platform" +fi + +MISCOBJS='$(MISCSRCS:.c=.o)' + + + +if test "$F77" = "pgf77"; then + FFLAGS="$FFLAGS -Wall -Mextend" +elif test "$F77" = "ifort"; then + FFLAGS="$FFLAGS -warn all" +elif test "$F77" != "xlf"; then + FFLAGS="$FFLAGS -ffixed-line-length-132" +fi + +if test "$CC_COMMON_NAME" = "icc"; then + OMPCFLGS=-openmp +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for components to build" >&5 +$as_echo_n "checking for components to build... " >&6; } +COMPONENT_RULES=components/Rules.components +echo "/* Automatically generated by configure */" > components_config.h +echo "#ifndef COMPONENTS_CONFIG_H" >> components_config.h +echo "#define COMPONENTS_CONFIG_H" >> components_config.h +echo "" >> components_config.h + + +# Check whether --with-components was given. +if test "${with_components+set}" = set; then : + withval=$with_components; components="$components $withval" +fi + + +# This is an ugly hack to keep building on configurations covered by any-null in the past. +if test "$VECTOR" = "_papi_dummy_vector"; then + if test "x$components" = "x"; then + echo "papi_vector_t ${VECTOR} = {" >> components_config.h + echo " .size = { + .context = sizeof ( int ), + .control_state = sizeof ( int ), + .reg_value = sizeof ( int ), + .reg_alloc = sizeof ( int ), + }, + .cmp_info = { + .num_native_events = 0, + .num_preset_events = 0, + .num_cntrs = 0, + .name = \"Your system is unsupported! \", + .short_name = \"UNSUPPORTED!\" + }, + .dispatch_timer = NULL, + .get_overflow_address = NULL, + .start = NULL, + .stop = NULL, + .read = NULL, + .reset = NULL, + .write = NULL, + .cleanup_eventset = NULL, + .stop_profiling = NULL, + .init_component = NULL, + .init_thread = NULL, + .init_control_state = NULL, + .update_control_state = NULL, + .ctl = NULL, + .set_overflow = NULL, + .set_profile = NULL, + .set_domain = NULL, + .ntv_enum_events = NULL, + .ntv_name_to_code = NULL, + .ntv_code_to_name = NULL, + .ntv_code_to_descr = NULL, + .ntv_code_to_bits = NULL, + .ntv_code_to_info = NULL, + .allocate_registers = NULL, + .shutdown_thread = NULL, + .shutdown_component = NULL, + .user = NULL, +};" >> components_config.h + # but in the face of actual components, we don't have to do hacky size games + else + VECTOR="" + fi +elif test "x$VECTOR" != "x"; then + echo "extern papi_vector_t ${VECTOR};" >> components_config.h +fi + + + +for comp in $components; do + idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` + if test "$idx" = 0; then + subcomp=$comp + else + subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` + fi + COMPONENT_RULES="$COMPONENT_RULES components/$comp/Rules.$subcomp" + echo "extern papi_vector_t _${subcomp}_vector;" >> components_config.h +done +echo "" >> components_config.h +echo "struct papi_vectors *_papi_hwd[] = {" >> components_config.h + +if test "x$VECTOR" != "x"; then + echo " &${VECTOR}," >> components_config.h +fi + +for comp in $components; do + idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` + if test "$idx" = 0; then + subcomp=$comp + else + subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` + fi + echo " &_${subcomp}_vector," >> components_config.h +done +echo " NULL" >> components_config.h +echo "};" >> components_config.h +echo "" >> components_config.h +echo "#endif" >> components_config.h + +# check for component tests +for comp in $components; do + if test "`find components/$comp -name "tests"`" != "" ; then + COMPONENTS="$COMPONENTS $comp" + fi +done +tests="$tests comp_tests" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $components" >&5 +$as_echo "$components" >&6; } + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for PAPI event CSV filename to use" >&5 +$as_echo_n "checking for PAPI event CSV filename to use... " >&6; } +if test "x$PAPI_EVENTS_CSV" == "x"; then + PAPI_EVENTS_CSV="papi_events.csv" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $PAPI_EVENTS_CSV" >&5 +$as_echo "$PAPI_EVENTS_CSV" >&6; } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +if test "$cross_compiling" = "yes" ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling genpapifdef with $nativecc because cross compiling" >&5 +$as_echo "$as_me: Compiling genpapifdef with $nativecc because cross compiling" >&6;} + $nativecc -I. genpapifdef.c -o genpapifdef +else + { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling genpapifdef with $CC" >&5 +$as_echo "$as_me: Compiling genpapifdef with $CC" >&6;} + $CC -I. genpapifdef.c -o genpapifdef +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: Generating fpapi.h" >&5 +$as_echo "$as_me: Generating fpapi.h" >&6;} +./genpapifdef -c > fpapi.h +{ $as_echo "$as_me:${as_lineno-$LINENO}: Generating f77papi.h" >&5 +$as_echo "$as_me: Generating f77papi.h" >&6;} +./genpapifdef -f77 > f77papi.h +{ $as_echo "$as_me:${as_lineno-$LINENO}: Generating f90papi.h" >&5 +$as_echo "$as_me: Generating f90papi.h" >&6;} +./genpapifdef -f90 > f90papi.h + +{ $as_echo "$as_me:${as_lineno-$LINENO}: $FILENAME will be included in the generated Makefile" >&5 +$as_echo "$as_me: $FILENAME will be included in the generated Makefile" >&6;} +ac_config_files="$ac_config_files Makefile papi.pc" + +ac_config_files="$ac_config_files components/Makefile_comp_tests.target testlib/Makefile.target utils/Makefile.target ctests/Makefile.target ftests/Makefile.target validation_tests/Makefile.target" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by PAPI $as_me 5.6.0.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +PAPI config.status 5.6.0.0 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "papi.pc") CONFIG_FILES="$CONFIG_FILES papi.pc" ;; + "components/Makefile_comp_tests.target") CONFIG_FILES="$CONFIG_FILES components/Makefile_comp_tests.target" ;; + "testlib/Makefile.target") CONFIG_FILES="$CONFIG_FILES testlib/Makefile.target" ;; + "utils/Makefile.target") CONFIG_FILES="$CONFIG_FILES utils/Makefile.target" ;; + "ctests/Makefile.target") CONFIG_FILES="$CONFIG_FILES ctests/Makefile.target" ;; + "ftests/Makefile.target") CONFIG_FILES="$CONFIG_FILES ftests/Makefile.target" ;; + "validation_tests/Makefile.target") CONFIG_FILES="$CONFIG_FILES validation_tests/Makefile.target" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/configure.in b/src/configure.in new file mode 100644 index 0000000..cff36bc --- /dev/null +++ b/src/configure.in @@ -0,0 +1,1833 @@ +# Process this file with autoconf to produce a configure script. +# File: configure.in + +# cross compile sample +# ARCH=mips CC=scgcc ./configure --with-arch=mips --host=mips64el-gentoo-linux-gnu- --with-ffsll --with-libpfm4 --with-perf-events --with-virtualtimer=times --with-walltimer=gettimeofday --with-tls=__thread --with-CPU=mips +# cross compiling should work differently... + +AC_PREREQ(2.59) +AC_INIT(PAPI, 5.6.0.0, ptools-perfapi@icl.utk.edu) +AC_CONFIG_SRCDIR([papi.c]) +AC_CONFIG_HEADER([config.h]) + +AC_DEFUN([AS_AC_EXPAND], + [EXP_VAR=[$1] + FROM_VAR=[$2] + prefix_save=$prefix + exec_prefix_save=$exec_prefix + if test "x$prefix" = "xNONE"; then + prefix="$ac_default_prefix" + fi + if test "x$exec_prefix" = "xNONE"; then + exec_prefix=$prefix + fi + full_var="$FROM_VAR" + while true; + do + new_full_var="`eval echo $full_var`" + if test "x$new_full_var" = "x$full_var"; then + break; + fi + full_var=$new_full_var + done + full_var=$new_full_var + AC_DEFINE_UNQUOTED([$1], "$full_var") + prefix=$prefix_save + exec_prefix=$exec_prefix_save ]) + +AC_MSG_CHECKING(for architecture) +AC_ARG_WITH(arch, + [ --with-arch= Specify architecture (uname -m)], + [arch=$withval], + [arch=`uname -m`]) +AC_MSG_RESULT($arch) + +AC_ARG_WITH(bitmode, + [ --with-bitmode=<32,64> Specify bit mode of library], + [bitmode=$withval]) + +AC_MSG_CHECKING(for OS) +AC_ARG_WITH(OS, + [ --with-OS= Specify operating system], + [OS=$withval], + [OS="`uname | tr '[A-Z]' '[a-z]'`" + if (test "$OS" = "SunOS" || test "$OS" = "sunos"); then + OS=solaris + fi ]) +AC_MSG_RESULT($OS) + +AC_MSG_CHECKING(for OS version) +AC_ARG_WITH(OSVER, + [ --with-OSVER= Specify operating system version], + [OSVER=$withval], + [if test "$OS" != "bgp" -o "$OS" != "bgq"; then + OSVER="`uname -r`" + fi ]) +AC_MSG_RESULT($OSVER) + +AC_MSG_CHECKING(for perf_event workaround level) +AC_ARG_WITH(assumed_kernel, + [ --with-assumed-kernel= Assume kernel version is for purposes of workarounds], + [assumed_kernel=$withval; CFLAGS="$CFLAGS -DASSUME_KERNEL=\\\"$with_assumed_kernel\\\""], + [assumed_kernel="autodetect"] + ) +AC_MSG_RESULT($assumed_kernel) + +AC_MSG_CHECKING([for if MIC should be used]) + AC_ARG_WITH(mic, + [ --with-mic To compile for Intel MIC ], + [MIC=yes + tls=__thread + virtualtimer=cputime_id + perf_events=yes + walltimer=clock_realtime_hr + ffsll=no + cross_compiling=yes + arch=k1om], + [MIC=no]) +AC_MSG_RESULT($MIC) +AC_SUBST(MIC) + +CFLAGS="$CFLAGS -g" +#If not set, set FFLAGS to null to prevent AC_PROG_F77 from defaulting it to -g -O2 +if test "x$FFLAGS" = "x"; then + FFLAGS="" +fi +OPTFLAGS="-O2" +TOPTFLAGS="-O1" +AC_PROG_CC([xlc icc gcc cc]) +AC_PROG_F77([xlf ifort gfortran f95 f90 f77]) +if test "x$F77" = "x"; then + F77= +fi +AC_CHECK_PROG( [MPICC], mpicc, [mpicc], []) + +# Lets figure out what CC actually is... +# Used in later checks to set compiler specific options +if `$CC -V 2>&1 | grep '^Intel(R) C' >/dev/null 2>&1` ; then + CC_COMMON_NAME="icc" +elif `$CC -v 2>&1 | grep 'gcc version' >/dev/null 2>&1` ; then + CC_COMMON_NAME="gcc" +elif `$CC -qversion 2>&1 | grep 'IBM XL C' >/dev/null 2>&1`; then + CC_COMMON_NAME="xlc" +else + CC_COMMON_NAME="unknown" +fi + +#prevent icc warnings about overriding optimization settings set by AC_PROG_CC +# remark #869: parameter was never referenced +# remark #271: trailing comma is nonstandard +if test "$CC_COMMON_NAME" = "icc"; then + CFLAGS="$CFLAGS -diag-disable 188,869,271" + if test "$MIC" = "yes"; then + CC="$CC -mmic -fPIC" + fi +fi + +if test "$F77" = "ifort" -a "$MIC" = "yes"; then + F77="$F77 -mmic -fPIC" +fi + +AC_PROG_AWK +AC_PROG_CPP +AC_PROG_LN_S +AC_PROG_MAKE_SET +AC_PROG_RANLIB +AC_GNU_SOURCE +AC_HEADER_STDC +AC_C_INLINE +AC_HEADER_TIME +AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h sched.h]) +AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time sched_getcpu]) + +# +# Check if the system provides dl* symbols without -ldl, and if not, +# check for -ldl existance. +# + +AC_MSG_CHECKING([for dlopen and dlerror symbols in base system]) +AC_TRY_LINK([#include ], + [void *p = dlopen ("", 0); char *c = dlerror();], + [dlsymbols_in_base="yes"], [dlsymbols_in_base="no"]) +if test "${dlsymbols_in_base}" = "yes"; then + AC_MSG_RESULT([found]) + LDL="" +else + AC_MSG_RESULT([not found]) + AC_MSG_CHECKING([for dlopen and dlerror symbols in -ldl]) + SAVED_LIBS=${LIBS} + LIBS="${LIBS} -ldl" + AC_TRY_LINK([#include ], + [void *p = dlopen ("", 0); char *c = dlerror();], + [has_ldl="yes"], [has_ldl="no"]) + LIBS=${SAVED_LIBS} + if test "${has_ldl}" = "yes" ; then + AC_MSG_RESULT([found]) + LDL="-ldl" + else + AC_MSG_ERROR([cannot find dlopen and dlerror symbols neither in the base system libraries nor in -ldl]) + fi +fi +AC_SUBST(LDL) + + +if test "$OS" = "CLE"; then + virtualtimer=times + tls=__thread + walltimer=cycle + ffsll=yes + cross_compiling=yes + STATIC="-static" + # _rtc is only defined when using the Cray compiler + AC_MSG_CHECKING([for _rtc intrinsic]) + rtc_ok=yes + AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H + #include + #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define for _rtc() intrinsic.])], + [rtc_ok=no + AC_DEFINE(NO_RTC_INTRINSIC,1,[Define if _rtc() is not found.])]) + AC_MSG_RESULT($rtc_ok) +elif test "$OS" = "bgp"; then + CC=powerpc-bgp-linux-gcc + F77=powerpc-bgp-linux-gfortran + walltimer=cycle + virtualtimer=perfctr + tls=no + ffsll=yes + cross_compiling=yes +elif test "$OS" = "bgq"; then + AC_ARG_WITH(bgpm_installdir, + [ --with-bgpm_installdir= Specify the installation path of BGPM], + [BGPM_INSTALL_DIR=$withval + CFLAGS="$CFLAGS -I$withval"], + [AC_MSG_ERROR([BGQ CPU component requires installation path of BGPM (see --with-bgpm_installdir)])]) + bitmode=64 + tls=no +elif test "$OS" = "linux"; then + if test "$arch" = "ppc64" -o "$arch" = "x86_64"; then + if test "$bitmode" = "64" -a "$libdir" = '${exec_prefix}/lib'; then + libdir='${exec_prefix}/lib64' + fi + fi +elif test "$OS" = "solaris"; then + AC_CHECK_TYPE([hrtime_t], + [AC_DEFINE(HAVE_HRTIME_T, 1, [Define if hrtime_t is defined in ])],[], + [#if HAVE_SYS_TIME_H + #include + #endif]) + if test "x$AR" = "x"; then + AR=/usr/ccs/bin/ar + fi + +fi + +if test "x$AR" = "x"; then + AR=ar +fi + +if test "$cross_compiling" = "yes" ; then + AC_MSG_CHECKING(for native compiler for header generation) + AC_ARG_WITH(nativecc, + [ --with-nativecc= Specify native C compiler for header generation ], + [nativecc=$withval], + [nativecc=gcc]) + AC_MSG_RESULT($nativecc) +fi + +AC_ARG_WITH(tests, + [ --with-tests= Specify which tests to run on install ], + [tests=$withval], + [tests="ctests ftests"]) + +AC_MSG_CHECKING(for debug build) +AC_ARG_WITH(debug, + [ --with-debug= Build a debug version, debug version plus memory tracker or none ], + [debug=$withval]) +if test "$debug" = "yes"; then + if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -g3" + fi + OPTFLAGS="-O0" + PAPICFLAGS+=" -DDEBUG -DPAPI_NO_MEMORY_MANAGEMENT" +elif test "$debug" = "memory"; then + if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -g3" + fi + OPTFLAGS="-O0" + PAPICFLAGS+=" -DDEBUG" +else + PAPICFLAGS+="-DPAPI_NO_MEMORY_MANAGEMENT" +fi +AC_MSG_RESULT($debug) + +if test "$CC_COMMON_NAME" = "gcc"; then + gcc_version=`gcc -v 2>&1 | tail -n 1 | awk '{printf $3}'` + major=`echo $gcc_version | sed 's/\([[^.]][[^.]]*\).*/\1/'` + minor=`echo $gcc_version | sed 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'` + if (test "$major" -ge 4 || test "$major" = 3 -a "$minor" -ge 4); then + CFLAGS+=" -Wextra" + else + CFLAGS+=" -W" + fi +# -Wextra => -Woverride-init on gcc >= 4.2 +# This issues a warning (error under -Werror) for some libpfm4 code. + AC_MSG_CHECKING( for -Wno-override-init) + oldcflags="$CFLAGS" + CFLAGS+=" -Wall -Wextra -Werror -Wno-override-init" + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [ struct A { int x; int y; }; + int main(void) { struct A a = {.x = 0, .y = 0, .y = 5 }; return a.x; } + ])], + [HAVE_NO_OVERRIDE_INIT=1], + [HAVE_NO_OVERRIDE_INIT=0] ) + CFLAGS="$oldcflags" +AC_MSG_RESULT($HAVE_NO_OVERRIDE_INIT) + +fi +AC_MSG_CHECKING(for CPU type) +AC_ARG_WITH(CPU, + [ --with-CPU= Specify CPU type], + [CPU=$withval + case "$CPU" in + core|core2|i7|atom|p4|p3|opteron|athlon) + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + esac], + [case "$OS" in + aix) + CPU="`/usr/sbin/lsattr -E -l proc0 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr '[A-Z]' '[a-z]'`" + if test "$CPU" = ""; then + CPU="`/usr/sbin/lsattr -E -l proc1 | grep type | cut -d '_' -f 2 | cut -d ' ' -f 1 | tr '[A-Z]' '[a-z]'`" + fi ;; + freebsd) + family=`uname -m` + + if test "$family" = "amd64"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + + elif test "$family" = "i386"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + fi ;; + + darwin) + family=`uname -m` + + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + + ;; + + linux) + family=`uname -m` + + if test "$family" = "x86_64"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + CPU="x86" + + elif test "$family" = "i686"; then + MISCSRCS="$MISCSRCS x86_cpuid_info.c" + CPU="x86" + + elif test "$family" = "ppc64"; then + CPU_info="`cat /proc/cpuinfo | grep cpu | cut -d: -f2 | cut -d' ' -f2 | sed '2,$d'`" + case "$CPU_info" in + PPC970*) CPU="PPC970";; + POWER5) CPU="POWER5";; + POWER5+) CPU="POWER5+";; + POWER6) CPU="POWER6";; + POWER7) CPU="POWER7";; + esac + fi ;; + solaris) + AC_CHECK_HEADER([libcpc.h], + [CFLAGS="$CFLAGS -lcpc" + AC_TRY_RUN([#include + #include + int main() { + // Check for libcpc 2 + if(CPC_VER_CURRENT == 2) + exit(0); + exit(1); + } ], + [cpc_version=2], + [cpc_version=0])], + [AC_MSG_ERROR([libcpc is needed for running PAPI on Solaris]) ]) + processor=`uname -p` + machinetype=`uname -m` + if test "$processor" = "sparc"; then + if test "$machinetype" = "sun4u"; then + CPU=ultra + AC_CHECK_LIB([cpc], [cpc_take_sample], [], + [AC_MSG_ERROR([libcpc.a is needed on Solaris, install SUNWcpc]) ]) + elif test "$machinetype" = "sun4v"; then + CPU=niagara2 + if test "$cpc_version" != "2"; then + AC_MSG_ERROR([libcpc2 needed for Niagara 2]) + fi + else + AC_MSG_ERROR([$machinetype not supported]) + fi + else + AC_MSG_ERROR([Only SPARC processors are supported on Solaris]) + fi ;; + bgp) + CPU=bgp ;; + bgq) + CPU=bgq ;; + esac ]) +AC_MSG_RESULT($CPU) +AC_DEFINE_UNQUOTED(CPU,$CPU,[cpu type]) + +# First set pthread-mutexes based on arch +case $arch in + aarch64|arm*) + pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + echo "forcing use of pthread mutexes... " >&6 + ;; +esac + + +AC_ARG_WITH(pthread-mutexes, + [ --with-pthread-mutexes Specify use of pthread mutexes rather than custom PAPI locks], + [pthread_mutexes=yes + CFLAGS="$CFLAGS -DUSE_PTHREAD_MUTEXES" + ]) + + AC_ARG_WITH(ffsll, + [ --with-ffsll Specify use of the ffsll() function ], + [ffsll=$withval], + [if test "$cross_compiling" = "yes" ; then + AC_MSG_ERROR([ffsll must be specified for cross compile]) + fi + didcheck=1 + AC_CHECK_FUNC(ffsll,[ffsll=yes],[ffsll=no]) ]) + if test "$ffsll" = "yes" ; then + AC_DEFINE(HAVE_FFSLL, 1, This platform has the ffsll() function) + fi +if test "$didcheck" != "1"; then + AC_MSG_CHECKING(for ffsll) + if test "$ffsll" = "yes" ; then + AC_DEFINE(HAVE_FFSLL, 1, This platform has the ffsll() function) + fi + AC_MSG_RESULT($ffsll) +fi + +AC_MSG_CHECKING(for working gettid) +AC_LINK_IFELSE([AC_LANG_SOURCE([#include + main() { pid_t a = gettid(); }])], + [AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_GETTID, 1, [Full gettid function])], + [AC_MSG_RESULT(no) + AC_MSG_CHECKING(for working syscall(SYS_gettid)) + AC_LINK_IFELSE([AC_LANG_SOURCE([#include + #include + main() { pid_t a = syscall(SYS_gettid); }])], + [AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SYSCALL_GETTID, 1, [gettid syscall function])], + [AC_MSG_RESULT(no)]) ]) + +AC_ARG_WITH(walltimer, + [ --with-walltimer= Specify realtime timer ], + [walltimer=$withval], + [if test "$cross_compiling" = "yes" ; then + AC_MSG_ERROR([walltimer must be specified for cross compile]) + fi + AC_MSG_CHECKING(for working MMTIMER) + AC_TRY_RUN([#include + #include + #include + #include + #include + #include + #ifndef MMTIMER_FULLNAME + #define MMTIMER_FULLNAME "/dev/mmtimer" + #endif + int main() { + int offset; + int fd; + if((fd = open(MMTIMER_FULLNAME, O_RDONLY)) == -1) + exit(1); + if ((offset = ioctl(fd, MMTIMER_GETOFFSET, 0)) < 0) + exit(1); + close(fd); + exit(0); + } ], + [walltimer="mmtimer" + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no) + AC_MSG_CHECKING(for working CLOCK_REALTIME_HR POSIX 1b timer) + AC_TRY_RUN([#include + #include + #include + #include + #include + main() { + struct timespec t1, t2; + double seconds; + if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t1) == -1) exit(1); + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_REALTIME_HR,&t2) == -1) exit(1); + seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); + if (seconds > 1.0) + exit(0); + else + exit(1); + } ], + [walltimer="clock_realtime_hr" + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no) + AC_MSG_CHECKING(for working CLOCK_REALTIME POSIX 1b timer) + AC_TRY_RUN([#include + #include + #include + #include + #include + main() { + struct timespec t1, t2; + double seconds; + if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t1) == -1) exit(1); + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_REALTIME,&t2) == -1) exit(1); + seconds = ((double)t2.tv_sec + (double)t2.tv_nsec/1000000000.0) - ((double)t1.tv_sec + (double)t1.tv_nsec/1000000000.0); + if (seconds > 1.0) + exit(0); + else + exit(1); + } ], + [walltimer="clock_realtime" + AC_MSG_RESULT(yes) ], + [walltimer="cycle" + AC_MSG_RESULT(no)]) ]) ]) ]) +AC_MSG_CHECKING(for which real time clock to use) +if test "$walltimer" = "gettimeofday"; then + AC_DEFINE(HAVE_GETTIMEOFDAY, 1, [Normal gettimeofday timer]) +elif test "$walltimer" = "mmtimer"; then + AC_DEFINE(HAVE_MMTIMER, 1, [Altix memory mapped global cycle counter]) + altix="-DALTIX" +elif test "$walltimer" = "clock_realtime_hr"; then + AC_DEFINE(HAVE_CLOCK_GETTIME, 1, [POSIX 1b clock]) + AC_DEFINE(HAVE_CLOCK_GETTIME_REALTIME_HR, 1, [POSIX 1b realtime HR clock]) +elif test "$walltimer" = "clock_realtime"; then + AC_DEFINE(HAVE_CLOCK_GETTIME, 1, [POSIX 1b clock]) + AC_DEFINE(HAVE_CLOCK_GETTIME_REALTIME, 1, [POSIX 1b realtime clock]) +elif test "$walltimer" = "cycle"; then + AC_DEFINE(HAVE_CYCLE, 1, [Native access to a hardware cycle counter]) +else + AC_MSG_ERROR([Unknown value for walltimer]) +fi +AC_MSG_RESULT($walltimer) + +SAVED_LIBS=$LIBS +SAVED_LDFLAGS=$LDFLAGS +SAVED_CFLAGS=$CFLAGS +LIBS="" +LDFLAGS="" +CFLAGS="-pthread" + +AC_ARG_WITH(tls, + [ --with-tls= This platform supports thread local storage with a keyword ], + [tls=$withval], + [if test "$cross_compiling" = "yes" ; then + AC_MSG_ERROR([tls must be specified for cross compile]) + fi + AC_MSG_CHECKING(for working __thread) + AC_TRY_RUN([#include + #include + extern __thread int i; + static int res1, res2; + void thread_main (void *arg) { + i = (int)arg; + sleep (1); + if ((int)arg == 1) + res1 = (i == (int)arg); + else + res2 = (i == (int)arg); + } + __thread int i; + int main () { + pthread_t t1, t2; + i = 5; + pthread_create (&t1, NULL, thread_main, (void *)1); + pthread_create (&t2, NULL, thread_main, (void *)2); + pthread_join (t1, NULL); + pthread_join (t2, NULL); + return !(res1 + res2 == 2); + } ], + [AC_MSG_RESULT(yes) + tls="__thread"], + [AC_MSG_RESULT(no) + tls="no" ]) + if test "$OS" = "linux"; then + if test "x$tls" = "x__thread"; then + # On some linux distributions, TLS works in executables, but linking against + # a shared library containing TLS fails with: undefined reference to `__tls_get_addr' + rm -f conftest.c conftest.so conftest + echo "static __thread int foo; void main () { foo = 5; }" > conftest.c + gcc -fPIC --shared -o conftest.so conftest.c > /dev/null 2>&1 + gcc -o conftest conftest.so > /dev/null 2>&1 + if test ! -f conftest; then + AC_MSG_WARN([Disabling usage of __thread.]); + tls="no" + fi + rm -f conftest.c conftest.so conftest + fi + fi]) +AC_MSG_CHECKING(for high performance thread local storage) +if test "$tls" = "no"; then + NOTLS="-DNO_TLS" +elif test "x$tls" != "x"; then + if test "$tls" = "yes"; then + tls="__thread" + fi + NOTLS="-DUSE_COMPILER_TLS" + AC_DEFINE_UNQUOTED(HAVE_THREAD_LOCAL_STORAGE,$tls,[Keyword for per-thread variables]) +fi +AC_MSG_RESULT($tls) + +AC_ARG_WITH(virtualtimer, + [ --with-virtualtimer= Specify per-thread virtual timer ], + [virtualtimer=$withval], + [if test "$cross_compiling" = "yes" ; then + AC_MSG_ERROR([virtualtimer must be specified for cross compile]) + fi + AC_MSG_CHECKING(for working CLOCK_THREAD_CPUTIME_ID POSIX 1b timer) + AC_TRY_RUN([#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #if !defined( SYS_gettid ) + #define SYS_gettid 1105 + #endif + struct timespec threadone = { 0, 0 }; + struct timespec threadtwo = { 0, 0 }; + pthread_t threadOne, threadTwo; + volatile int done = 0; + + int gettid() { + return syscall( SYS_gettid ); + } + + void *doThreadOne( void * v ) { + while (!done) + sleep(1); + if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadone) == -1) { + perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); + exit(1); + } + return 0; + } + + void *doThreadTwo( void * v ) { + long i, j = 0xdeadbeef; + for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } + + if (syscall(__NR_clock_gettime,CLOCK_THREAD_CPUTIME_ID,&threadtwo) == -1) { + perror("clock_gettime(CLOCK_THREAD_CPUTIME_ID)"); + exit(1); + } + done = 1; + return j; + } + + int main( int argc, char ** argv ) { + int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); + assert( status == 0 ); + status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadOne, NULL ); + assert( status == 0 ); + if ((threadone.tv_sec != threadtwo.tv_sec) || (threadone.tv_nsec != threadtwo.tv_nsec)) + exit(0); + else { + fprintf(stderr,"T1 %ld %ld T2 %ld %ld\n",threadone.tv_sec,threadone.tv_nsec,threadtwo.tv_sec,threadtwo.tv_nsec); + exit(1); + } + } ], + [AC_MSG_RESULT(yes) + virtualtimer="clock_thread_cputime_id"], + [AC_MSG_RESULT(no) + # *** Checks for working per thread timer*** + AC_MSG_CHECKING(for working per-thread times() timer) + AC_TRY_RUN([#include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #if !defined( SYS_gettid ) + #define SYS_gettid 1105 + #endif + long threadone = 0, threadtwo = 0; + pthread_t threadOne, threadTwo; + volatile int done = 0; + + int gettid() { + return syscall( SYS_gettid ); + } + int doThreadOne( void * v ) { + struct tms tm; + int status; + while (!done) + sleep(1); + status = times( & tm ); + assert( status != -1 ); + threadone = tm.tms_utime; + return 0; + } + int doThreadTwo( void * v ) { + struct tms tm; + long i, j = 0xdeadbeef; + int status; + for( i = 0; i < 0xFFFFFFF; ++i ) { j = j ^ i; } + status = times( & tm ); + assert( status != -1 ); + threadtwo = tm.tms_utime; + done = 1; + return j; + } + int main( int argc, char ** argv ) { + int status = pthread_create( & threadOne, NULL, doThreadOne, NULL ); + assert( status == 0 ); + status = pthread_create( & threadTwo, NULL, doThreadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadTwo, NULL ); + assert( status == 0 ); + status = pthread_join( threadOne, NULL ); + assert( status == 0 ); + return (threadone == threadtwo); + } ], + [AC_MSG_RESULT(yes) + virtualtimer="times"], + [AC_MSG_RESULT(no) + virtualtimer="default"]) ]) ]) +LDFLAGS=$SAVED_LDFLAGS +CFLAGS=$SAVED_CFLAGS +LIBS=$SAVED_LIBS + +AC_MSG_CHECKING(for which virtual timer to use) +case "$virtualtimer" in + times) + AC_DEFINE(HAVE_PER_THREAD_TIMES, 1, [Working per thread timer]) ;; + getrusage) + AC_DEFINE(HAVE_PER_THREAD_GETRUSAGE, 1, [Working per thread getrusage]) ;; + clock_thread_cputime_id) + AC_DEFINE(HAVE_CLOCK_GETTIME_THREAD, CLOCK_THREAD_CPUTIME_ID, [POSIX 1b per-thread clock]) ;; + proc|default|perfctr) + AC_DEFINE(USE_PROC_PTTIMER, 1, [Use /proc for per-thread times]) + AC_DEFINE(USE_PERFCTR_PTTIMER, 1, [Use the perfctr virtual TSC for per-thread times]) ;; +esac +AC_MSG_RESULT($virtualtimer) + +if test "$OS" = "aix"; then + AC_ARG_WITH(pmapi, + [ --with-pmapi= Specify path of pmapi on aix system ], + [PMAPI=$withval], + [PMAPI="/usr/pmapi"]) + LIBS="-L$PMAPI/lib -lpmapi" + CPPFLAGS="$CPPFLAGS -I$PMAPI/include" + AC_CHECK_LIB([pmapi], [pm_initialize], + [PMINIT="-DPM_INITIALIZE"], + [AC_CHECK_LIB([pmapi], [pm_init], + [PMINIT="-DPM_INIT"], + [AC_MSG_ERROR([libpmapi.a not found, rerun configure with different flags]) ]) ]) +fi + +AC_MSG_CHECKING(for static user preset events) +AC_ARG_WITH(static_user_events, + [ --with-static-user-events Build with a static user events file.], + [STATIC_USER_EVENTS=$withval], + [STATIC_USER_EVENTS=no]) +if test "$STATIC_USER_EVENTS" = "yes"; then + PAPICFLAGS+=" -DSTATIC_USER_EVENTS" +fi +AC_MSG_RESULT($STATIC_USER_EVENTS) + +AC_MSG_CHECKING(for static PAPI preset events) +AC_ARG_WITH(static_papi_events, + [ --with-static-papi-events Build with a static papi events file.], + [STATIC_PAPI_EVENTS=$withval], + [STATIC_PAPI_EVENTS=yes]) +if test "$STATIC_PAPI_EVENTS" = "yes"; then + PAPICFLAGS+=" -DSTATIC_PAPI_EVENTS_TABLE" +fi +AC_MSG_RESULT($STATIC_PAPI_EVENTS) + +AC_MSG_CHECKING(for whether to build static library) +AC_ARG_WITH(static_lib, + [ --with-static-lib= Build a static library], + [static_lib=$withval], + [static_lib=yes]) +AC_MSG_RESULT($static_lib) + +AC_MSG_CHECKING(for whether to build shared library) +AC_ARG_WITH(shared_lib, + [ --with-shared-lib= Build a shared library], + [shared_lib=$withval], + [shared_lib=yes]) +AC_MSG_RESULT($shared_lib) + +if test "$shared_lib" = "no" -a "$static_lib" = "no"; then + AC_MSG_ERROR(Both shared and static libs are disabled) +fi + +if test "$shared_lib" = "yes"; then + papiLIBS="shared" +fi +if test "$static_lib" = "yes"; then + papiLIBS="$papiLIBS static" +fi + +AC_MSG_CHECKING(for static compile of tests and utilities) +AC_ARG_WITH(static_tools, + [ --with-static-tools Specify static compile of tests and utilities], + [STATIC="-static" + AC_MSG_RESULT(yes)], + [AC_MSG_RESULT(no)]) + +if test "$static_lib" = "no"; then + AC_MSG_ERROR(Building tests and utilities static but no static papi library to be built) +fi + +AC_MSG_CHECKING(for linking with papi shared library of tests and utilities) +AC_ARG_WITH(shlib_tools, + [ --with-shlib-tools Specify linking with papi library of tests and utilities], + [shlib_tools=yes + AC_MSG_RESULT(yes)], + [shlib_tools=no + AC_MSG_RESULT(no)]) + +if test "$shlib_tools" = "yes"; then + if test "$shared_lib" != "yes"; then + AC_MSG_ERROR(Building static but specified shared linking for tests and utilities) + fi + if test "$STATIC" = "-static"; then + AC_MSG_ERROR([Building shared but specified static linking]) + fi + LINKLIB='$(SHLIB)' +elif test "$shlib_tools" = "no"; then + if test "$static_lib" != "yes"; then + AC_MSG_ERROR([Building shared but specified static linking for tests and utilities]) + fi + LINKLIB='$(LIBRARY)' +fi + +################################################## +# perfctr +################################################## +perfctr=0 +force_perfctr=no + +AC_ARG_WITH(perfctr, + [ --with-perfctr Specify perfctr as the performance interface ], + force_perfctr=yes + [user_specified_interface=perfctr + if test "$arch" != "ppc64"; then + perfctr=6 + else + perfctr=7 + fi], + [perfctr=0]) + +if test "$cross_compiling" = "no" ; then + AC_CHECK_FILE(/sys/class/perfctr,[perfctr=7],[AC_CHECK_FILE(/dev/perfctr,[perfctr=6])]) +fi + +if test "$perfctr" != 0; then + pfm_incdir="libpfm-3.y/include/" + AC_MSG_CHECKING(for perfctr version) + if test "$perfctr" = 7 -a "$arch" != "ppc64"; then + AC_MSG_ERROR([Perfctr 2.7.x only works on PPC64 machines. Patch kernel with 2.6.x.]) + fi + AC_MSG_RESULT(2.$perfctr) +fi + +AC_ARG_WITH(perfctr_root, + [ --with-perfctr-root= Specify path to source tree (for use by developers only) ], + [perfctr_root=$withval + user_specified_interface=perfctr]) +AC_ARG_WITH(perfctr_prefix, + [ --with-perfctr-prefix= Specify prefix to installed perfctr distribution ], + [perfctr_prefix=$withval + user_specified_interface=perfctr]) +AC_ARG_WITH(perfctr_incdir, + [ --with-perfctr-incdir= Specify directory of perfctr header files in non-standard location ], + [perfctr_incdir=$withval + user_specified_interface=perfctr]) +AC_ARG_WITH(perfctr_libdir, + [ --with-perfctr-libdir= Specify directory of perfctr library in non-standard location ], + [perfctr_libdir=$withval + user_specified_interface=perfctr]) + +if test "$perfctr" != 0; then + + dotest=0 + if test "x$perfctr_root" != "x"; then + LIBS="-L$perfctr_root/usr.lib -lperfctr" + CPPFLAGS="$CPPFLAGS -I$perfctr_root/usr.lib -I$perfctr_root/linux/include" + dotest=1 + elif test "x$perfctr_prefix" != "x"; then + LIBS="-L$perfctr_prefix/lib -lperfctr" + CPPFLAGS="$CPPFLAGS -I$perfctr_prefix/include" + perfctr_libdir="$perfctr_prefix/lib" + perfctr_incdir="$perfctr_prefix/include" + dotest=1 + else + if test "x$perfctr_libdir" != "x"; then + LIBS="-L$perfctr_libdir -lperfctr" + dotest=1 + fi + if test "x$perfctr_incdir" != "x"; then + CPPFLAGS="-I$perfctr_incdir" + dotest=1 + fi + fi + + if test "$dotest" = 1; then + AC_CHECK_LIB([perfctr], [vperfctr_open], + [AC_CHECK_HEADERS([libperfctr.h], + [oCFLAGS=$CFLAGS + CFLAGS="$CFLAGS -static" + AC_TRY_RUN([#include + #include "libperfctr.h" + int main() { + if ((PERFCTR_ABI_VERSION >> 24) != 5) + exit(1); + exit(0); + } ], + [perfctr=6], + [perfctr=7]) + CFLAGS=$oCFLAGS], + [AC_MSG_ERROR([libperfctr.h not found, rerun configure with different flags])])], + [AC_MSG_ERROR([libperfctr.a not found, rerun configure with different flags]) ]) + else + AC_DEFINE(HAVE_LIBPERFCTR_H,1,[perfctr header file]) + fi +fi + +user_specified_interface=no +################################################## +# perfmon +################################################## +old_pfmv2=n +perfmon=0 +perfmon2=no +force_perfmon2=no + +AC_ARG_WITH(perfmon, + [ --with-perfmon= Specify perfmon as the performance interface and specify version], + [perfmon=$withval + user_specified_interface=perfmon + force_perfmon2=yes + pfm_incdir="libpfm-3.y/include" + perfmon=`echo ${perfmon} | sed 's/^[ \t]*//;s/[ \t]*$//'` + perfmon=`echo ${perfmon} | grep -e '[[1-9]]\.[[0-9]][[0-9]]*'` + if test "x$perfmon" = "x"; then + AC_MSG_ERROR("Badly formed perfmon version string") + fi + perfmon=`echo ${perfmon} | sed 's/\.//'` + if test $perfmon -gt 20; then + perfmon2=yes + fi + if test $perfmon -lt 25; then + old_pfmv2=y + PFMCFLAGS="-DPFMLIB_OLD_PFMV2" + fi], + [perfmon=0 + if test "$cross_compiling" = "no" ; then + AC_CHECK_FILE(/sys/kernel/perfmon/version, + [perfmon=`cat /sys/kernel/perfmon/version`], + [AC_CHECK_FILE(/proc/perfmon, + [perfmon=`cat /proc/perfmon | grep version | cut -d: -f2`], + [perfmon=0])]) + if test "$perfmon" != 0; then + pfm_incdir="libpfm-3.y/include" + perfmon=`echo ${perfmon} | sed 's/^[ \t]*//;s/[ \t]*$//'` + perfmon=`echo ${perfmon} | grep -e '[[1-9]]\.[[0-9]][[0-9]]*'` + perfmon=`echo ${perfmon} | sed 's/\.//'` + if test $perfmon -gt 20; then + perfmon2=yes + fi + if test $perfmon -lt 25; then +# must be y, not yes, or libpfm breaks + old_pfmv2="y" + PFMCFLAGS="-DPFMLIB_OLD_PFMV2" + fi + fi + fi]) + + +force_pfm_incdir=no +# default + +AC_ARG_WITH(pfm_root, + [ --with-pfm-root= Specify path to source tree (for use by developers only) ], + [pfm_root=$withval + pfm_incdir=$withval/include + pfm_libdir=$withval/lib]) +AC_ARG_WITH(pfm_prefix, + [ --with-pfm-prefix= Specify prefix to installed pfm distribution ], + [pfm_prefix=$withval + pfm_incdir=$pfm_prefix/include + pfm_libdir=$pfm_prefix/lib]) +AC_ARG_WITH(pfm_incdir, + [ --with-pfm-incdir= Specify directory of pfm header files in non-standard location ], + [pfm_incdir=$withval]) +AC_ARG_WITH(pfm_libdir, + [ --with-pfm-libdir= Specify directory of pfm library in non-standard location ], + [pfm_libdir=$withval]) + +# if these are both empty, it means we haven't set either pfm_prefix or pfm_root +# which would have set them. Thus it means that we set this to our included +# libpfm4 library. Shame on the person that sets one but not the other. + +if test "x$pfm_incdir" = "x" -a "x$pfm_libdir" = "x"; then + pfm_root="libpfm4" + pfm_incdir="libpfm4/include" + pfm_libdir="libpfm4/lib" +fi + +################################################## +# Linux perf_event/perf_counter +################################################## + +if test "x$mic" = "xno"; then + perf_events=no +fi + +force_perf_events=no +disable_uncore=yes + +AC_ARG_WITH(perf_events, + [ --with-perf-events Specify use of Linux Performance Event (requires kernel 2.6.32 or greater)], + [force_perf_events=yes + user_specified_interface=pe]) + +# RDPMC support + +AC_ARG_ENABLE(perfevent_rdpmc, +AS_HELP_STRING([--enable-perfevent-rdpmc], + [Enable userspace rdpmc instruction on perf_event, default: yes]), +[case "${enableval}" in + yes) enable_perfevent_rdpmc=true ;; + no) enable_perfevent_rpdmc=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable-perfevent-rdpmc]) ;; +esac], +[enable_perfevent_rdpmc=true]) + +if test "$enable_perfevent_rdpmc" = "true"; then + PECFLAGS="$PECFLAGS -DUSE_PERFEVENT_RDPMC=1" +fi + +# Uncore support +AC_ARG_WITH(pe_incdir, + [ --with-pe-incdir= Specify path to the correct perf header file], + [pe_incdir=$withval + force_perf_events=yes + user_specified_interface=pe], + [pe_incdir=$pfm_incdir/perfmon]) + +AC_ARG_ENABLE(perf_event_uncore, + [ --disable-perf-event-uncore Disable perf_event uncore component]) + +AS_IF([test "x$enable_perf_event_uncore" != "xno"],[ + disable_uncore=no +]) + +# Check for perf_event.h + if test "$force_perf_events" = "yes"; then + perf_events="yes" + fi + if test "$cross_compiling" = "no"; then + AC_CHECK_FILE(/proc/sys/kernel/perf_event_paranoid,[ + AC_CHECK_FILE($pe_incdir/perf_event.h,perf_events="yes")]) + fi + if test "$perf_events" = "yes"; then + PECFLAGS="$PECFLAGS -DPEINCLUDE=\\\"$pe_incdir/perf_event.h\\\"" + fi + +# +# Sort out the choice of the user vs. what we detected +# +# MESSING WITH CFLAGS IS STUPID! +# +if test "$user_specified_interface" != "no"; then + if test "$user_specified_interface" = "perfctr"; then + perfmon=0 + perf_events="no" + else + if test "$user_specified_interface" = "perfmon"; then + perfctr=0 + perf_events="no" + PAPICFLAGS+=" $PFMCFLAGS" + else + if test "$user_specified_interface" = "pe"; then + perfctr=0 + perfmon=0 + PAPICFLAGS+=" $PECFLAGS" + else + AC_MSG_ERROR("Unknown user_specified_interface=$user_specified_interface perfctr=$perfctr perfmon=$perfmon perfmon2=$perfmon2 perf-events=$perf_events") + fi + fi + fi +else + if test "$perfmon" != 0; then + PAPICFLAGS+=" $PFMCFLAGS" + fi + if test "$perf_events" = "yes"; then + PAPICFLAGS+=" $PECFLAGS" + fi +fi + + +# +# User has made no choice, so we default to the ordering below in the platform section, if +# we detect more than one. +# + +# +# What does this next section do? It determines whether or not to run the tests for libpfm +# based on the settings of pfm_root, pfm_prefix, pfm_incdir, pfm_libdir +# + +if test "$perfmon" != 0 -o "$perf_events" = "yes"; then + # if prefix set, then yes + if test "x$pfm_prefix" != "x"; then + dotest=1 + # if root not set and libdir set, then yes + elif test "x$pfm_root" = "x" -a "x$pfm_libdir" != "x"; then + dotest=1 + else + dotest=0 + fi + if test "$dotest" = 1; then + LIBS="-L$pfm_libdir -lpfm" + CPPFLAGS="$CPPFLAGS -I$pfm_incdir" + AC_CHECK_LIB([pfm], [pfm_initialize], + [AC_CHECK_HEADERS([perfmon/pfmlib.h], + [if test "$arch" = "ia64"; then + AC_CHECK_HEADERS([perfmon/pfmlib_montecito.h]) + fi + AC_CHECK_FUNC(pfm_get_event_description, + [AC_DEFINE(HAVE_PFM_GET_EVENT_DESCRIPTION,1,[event description function])],[]) + AC_CHECK_MEMBER(pfmlib_reg_t.reg_evt_idx, + [AC_DEFINE(HAVE_PFM_REG_EVT_IDX,1,[old reg_evt_idx])],[],[#include "perfmon/pfmlib.h"]) + AC_CHECK_MEMBER(pfmlib_output_param_t.pfp_pmd_count, + [AC_DEFINE(HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT,1,[new pfmlib_output_param_t])],[],[#include "perfmon/pfmlib.h"]) + AC_CHECK_MEMBER(pfm_msg_t.type, + [AC_DEFINE(HAVE_PFM_MSG_TYPE,1,[new pfm_msg_t])],[],[#include "perfmon/perfmon.h"]) ], + [AC_MSG_ERROR([perfmon/pfmlib.h not found, rerun configure with different flags]) ]) ], + [AC_MSG_ERROR([libpfm.a not found, rerun configure with different flags]) ]) + else + AC_DEFINE(HAVE_PERFMON_PFMLIB_MONTECITO_H,1,[Montecito headers]) + AC_DEFINE(HAVE_PFM_GET_EVENT_DESCRIPTION,1,[event description function]) + AC_DEFINE(HAVE_PFMLIB_OUTPUT_PFP_PMD_COUNT,1,[new pfmlib_output_param_t]) + fi +fi + +################################################## +# Checking platform +################################################## +AC_MSG_CHECKING(platform) +case "$OS" in + aix) + MAKEVER="$OS"-"$CPU" ;; + bgp) + MAKEVER=bgp ;; + bgq) + MAKEVER=bgq ;; + CLE) + if test "$perfmon2" = "yes"; then +# major_version=`echo $OSVER | sed 's/\([[^.]][[^.]]*\).*/\1/'` +# minor_version=`echo $OSVER | sed 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'` +# point_version=`echo $OSVER | sed -e 's/[[^.]][[^.]]*.[[^.]][[^.]]*.\(.*\)/\1/' -e 's/[[^0-9]].*//'` +# if (test "$major_version" = 2 -a "$minor_version" = 6 -a "$point_version" -lt 31 -a "$perfmon2" != "yes" ); then + MAKEVER="$OS"-perfmon2 + else + MAKEVER="$OS"-pe + fi ;; + freebsd) + MAKEVER="freebsd" + LDFLAGS="-lpmc" + # HWPMC driver is available for FreeBSD >= 6 + FREEBSD_VERSION=`uname -r | cut -d'.' -f1` + if test "${FREEBSD_VERSION}" -lt 6 ; then + AC_MSG_ERROR([PAPI requires FreeBSD 6 or greater]) + fi + # Determine if HWPMC module is on the kernel + dmesg | grep hwpmc 2> /dev/null > /dev/null + if test "$?" != "0" ; then + AC_MSG_ERROR([HWPMC module not found. (see INSTALL.TXT)]) + fi + # Determine the number of counters + echo "/* Automatically generated file by configure */" > freebsd-config.h + echo "#ifndef _FREEBSD_CONFIG_H_" >> freebsd-config.h + echo "#define _FREEBSD_CONFIG_H_" >> freebsd-config.h + echo "" >> freebsd-config.h + AC_TRY_LINK([#include + #include ], + [int i = pmc_init();], + [pmc_pmc_init_linked="yes"], [pmc_pmc_init_linked="no"]) + if test "${pmc_init_linked}" = "no" ; then + AC_MSG_ERROR([Failed to link hwpmc example]) + fi + AC_TRY_RUN([#include + #include + int main() { + const struct pmc_cpuinfo *info; + if (pmc_init() < 0) return 0; + if (pmc_cpuinfo (&info) < 0) return 0; + return info->pm_npmc-1; + } ], + [ num_counters="0" ], + [ num_counters="$?"]) + if test "${num_counters}" = "0" ; then + AC_MSG_ERROR([pmc_npmc info returned 0. Determine if the HWPMC module is loaded (see hwpmc(4))]) + fi + echo "#define HWPMC_NUM_COUNTERS ${num_counters}" >> freebsd-config.h + echo "" >> freebsd-config.h + echo "#endif" >> freebsd-config.h ;; + linux) + if test "$force_perf_events" = "yes" ; then + MAKEVER="$OS"-pe + elif test "$force_perfmon2" = "yes" ; then + MAKEVER="$OS"-perfmon2 + elif test "$force_perfctr" = "yes" ; then + MAKEVER="$OS"-perfctr-x86 + case "$CPU" in + itanium2|montecito) + if test "$bitmode" = "32"; then + AC_MSG_ERROR([The bitmode you specified is not supported]) + fi + MAKEVER="$OS"-pfm-"$CPU" + ;; + POWER5|POWER5+|POWER6|POWER7|PPC970) + MAKEVER="$OS"-perfctr-"$CPU" ;; + esac + elif test "$perf_events" = "yes" ; then + MAKEVER="$OS"-pe + elif test "$perfmon2" = "yes" ; then + MAKEVER="$OS"-perfmon2 + elif test "$old_pfmv2" = "y" ; then + MAKEVER="$OS"-pfm-"$CPU" + elif test "$perfctr" != 0 ; then + case "$CPU" in + itanium2|montecito) + if test "$bitmode" = "32"; then + AC_MSG_ERROR([The bitmode you specified is not supported]) + fi + MAKEVER="$OS"-pfm-"$CPU" + ;; + x86) + MAKEVER="$OS"-perfctr-x86 ;; + POWER5|POWER5+|POWER6|POWER7|PPC970) + MAKEVER="$OS"-perfctr-"$CPU" ;; + *) + MAKEVER="$OS"-generic ;; + esac + else + MAKEVER="$OS"-generic + fi ;; + solaris) + if test "$bitmode" = "64" -a "`isainfo -v | grep "64"`" = ""; then + AC_MSG_ERROR([The bitmode you specified is not supported]) + fi + MAKEVER="$OS"-"$CPU" ;; + darwin) + MAKEVER="$OS" ;; +esac + +AC_MSG_RESULT($MAKEVER) +if test "x$MAKEVER" = "x"; then + AC_MSG_NOTICE(This platform is not supported so a generic build without CPU counters will be used) + MAKEVER="generic_platform" +fi + +################################################## +# Set build macros +################################################## +FILENAME=Makefile.inc +SHOW_CONF=showconf +CTEST_TARGETS="all" +FTEST_TARGETS="all" +LIBRARY=libpapi.a +SHLIB='libpapi.so.AC_PACKAGE_VERSION' +VLIB='libpapi.so.$(PAPIVER)' +OMPCFLGS=-fopenmp +CC_R='$(CC) -pthread' +CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(VLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' +if test "$CC_COMMON_NAME" = "gcc"; then + if test "$bitmode" = "32"; then + BITFLAGS=-m32 + elif test "$bitmode" = "64"; then + BITFLAGS=-m64 + fi +fi + +OPTFLAGS="$OPTFLAGS" +PAPICFLAGS+=" -D_REENTRANT -D_GNU_SOURCE $NOTLS" +CFLAGS="$CFLAGS $BITFLAGS" +if test "$CC_COMMON_NAME" = "gcc"; then + CFLAGS="$CFLAGS -Wall" +fi +FFLAGS="$CFLAGS $BITFLAGS $FFLAGS -Dlinux" + +# OS Support + +if (test "$OS" = "aix"); then + OSFILESSRC=aix-memory.c + OSLOCK=aix-lock.h + OSCONTEXT=aix-context.h +elif (test "$OS" = "bgp"); then + OSFILESSRC=linux-bgp-memory.c + OSLOCK=linux-bgp-lock.h + OSCONTEXT=linux-bgp-context.h +elif (test "$OS" = "bgq"); then + OSFILESSRC=linux-bgq-memory.c + OSLOCK=linux-bgq-lock.h + OSCONTEXT=linux-context.h +elif (test "$OS" = "freebsd"); then + OSFILESSRC=freebsd-memory.c + OSLOCK="freebsd-lock.h" + OSCONTEXT="freebsd-context.h" +elif (test "$OS" = "linux"); then + OSFILESSRC="linux-memory.c linux-timer.c linux-common.c" + OSFILESHDR="linux-memory.h linux-timer.h linux-common.h" + OSLOCK="linux-lock.h" + OSCONTEXT="linux-context.h" +elif (test "$OS" = "solaris"); then + OSFILESSRC="solaris-memory.c solaris-common.c" + OSFILESHDR="solaris-memory.h solaris-common.h" + OSLOCK="solaris-lock.h" + OSCONTEXT="solaris-context.h" +elif (test "$OS" = "darwin"); then + OSFILESSRC="darwin-memory.c darwin-common.c" + OSFILESHDR="darwin-memory.h darwin-common.h" + OSLOCK="darwin-lock.h" + OSCONTEXT="darwin-context.h" +fi + +OSFILESOBJ='$(OSFILESSRC:.c=.o)' + + +if (test "$MAKEVER" = "aix-power5" || test "$MAKEVER" = "aix-power6" || test "$MAKEVER" = "aix-power7"); then + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so +# By default AIX enforces a limit on heap space +#( limiting the heap to share the same 256MB memory segment as stack ) +# changing the max data paramater moves the heap off the stack's memory segment + BITFLAGS='-q64 -bmaxdata:0x07000000000000' + ARG64=-X64 + else +# If the issue ever comes up, /dsa requires AIX v5.1 or higher +# and the Large address-space model (-bmaxdata) requires v4.3 or later +# see http://publib.boulder.ibm.com/infocenter/pseries/v5r3/topic/com.ibm.aix.genprogc/doc/genprogc/lrg_prg_support.htm#a179c11c5d + SHLIB=libpapi.so + BITFLAGS="-bmaxdata:0x80000000/dsa" + fi + + CPUCOMPONENT_NAME=aix + CPUCOMPONENT_C=aix.c + CPUCOMPONENT_OBJ=aix.o + VECTOR=_aix_vector + PAPI_EVENTS_CSV="papi_events.csv" + MISCHDRS="aix.h components/perfctr_ppc/ppc64_events.h papi_events_table.h" + MISCSRCS="aix.c" + CFLAGS+='-qenum=4 -Icomponents/perfctr_ppc -DNO_VARARG_MACRO -D_AIX -D_$(CPU_MODEL) -DNEED_FFSLL -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors -DSTATIC_PAPI_EVENTS_TABLE' + FFLAGS+='-WF,-D_$(CPU_MODEL) -WF,-DARCH_EVTS=\"$(ARCH_EVENTS).h\"' + CFLAGS+='-I$(PMAPI)/include -Icomponents/perfctr_ppc -qmaxmem=-1 -qarch=$(cpu_option) -qtune=$(cpu_option) -qlanglvl=extended $(BITFLAGS)' + if test $debug != "yes"; then + OPTFLAGS='-O3 -qstrict $(PMINIT)' + else + OPTFLAGS='$(PMINIT)' + fi + SMPCFLGS=-qsmp + OMPCFLGS='-qsmp=omp' + LDFLAGS='-L$(PMAPI)/lib -lpmapi' + CC_R=xlc_r + CC=xlc + CC_SHR="xlc -G -bnoentry" + AC_CHECK_PROGS( [MPICC], [mpicc mpcc], []) + F77=xlf + CPP='xlc -E $(CPPFLAGS)' + if test "$MAKEVER" = "aix-power5"; then + ARCH_EVENTS=power5_events + CPU_MODEL=POWER5 + cpu_option=pwr5 + DESCR="AIX 5.1.0 or greater with POWER5" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + elif test "$MAKEVER" = "aix-power6"; then + ARCH_EVENTS=power6_events + CPU_MODEL=POWER6 + cpu_option=pwr6 + DESCR="AIX 5.1.0 or greater with POWER6" + CPPFLAGS="-qlanglvl=extended" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + elif test "$MAKEVER" = "aix-power7"; then + ARCH_EVENTS=power7_events + CPU_MODEL=POWER7 + cpu_option=pwr7 + DESCR="AIX 5.1.0 or greater with POWER7" + CPPFLAGS="-qlanglvl=extended" + if test "$bitmode" = "64"; then + DESCR="$DESCR 64 bit build" + fi + fi +elif test "$MAKEVER" = "bgp"; then + CPP="$CC -E" + CPUCOMPONENT_NAME=linux-bgp + CPUCOMPONENT_C=linux-bgp.c + CPUCOMPONENT_OBJ=linux-bgp.o + VECTOR=_bgp_vectors + PAPI_EVENTS_CSV="papi_events.csv" + MISCSRCS= + CFLAGS='-g -gdwarf-2 -O2 -Wall -I. -I$(BGP_SYSDIR)/arch/include -DCOMP_VECTOR=_bgp_vectors' + tests="$tests bgp_tests" + SHOW_CONF=show_bgp_conf + BGP_SYSDIR=/bgsys/drivers/ppcfloor + BGP_GNU_LINUX_PATH='${BGP_SYSDIR}/gnu-linux' + LDFLAGS='-L$(BGP_SYSDIR)/runtime/SPI -lSPI.cna' + FFLAGS='-g -gdwarf-2 -O2 -Wall -I. -Dlinux' + OPTFLAGS="-g -Wall -O3" + TOPTFLAGS="-g -Wall -O0" + SHLIB=libpapi.so + DESCR="Linux for BlueGene/P" + LIBS=static + CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "$(SHLIB)" -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + OMPCFLGS="" + +elif test "$MAKEVER" = "bgq"; then + FILENAME=Rules.bgpm + VECTOR=_bgq_vectors + CPUCOMPONENT_NAME=linux-bgq + CPUCOMPONENT_C=linux-bgq.c + CPUCOMPONENT_OBJ=linux-bgq.o + PAPI_EVENTS_CSV="papi_events.csv" + MISCSRCS="linux-bgq-common.c" + OPTFLAGS="-g -Wall -O3" + TOPTFLAGS="-g -Wall -O0" + SHLIB=libpapi.so + DESCR="Linux for Blue Gene/Q" + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + OMPCFLGS="" + +elif test "$MAKEVER" = "CLE-perfmon2"; then + FILENAME=Rules.perfmon2 + CPUCOMPONENT_NAME=perfmon + CPUCOMPONENT_C=perfmon.c + CPUCOMPONENT_OBJ=perfmon.o + VECTOR=_papi_pfm_vector + PAPI_EVENTS_CSV="papi_events.csv" + F77=gfortran + CFLAGS="$CFLAGS -D__crayxt" + FFLAGS="" + +elif test "$MAKEVER" = "freebsd"; then + CPUCOMPONENT_NAME=freebsd + CPUCOMPONENT_C=freebsd.c + CPUCOMPONENT_OBJ=freebsd.o + VECTOR=_papi_freebsd_vector + PAPI_EVENTS_CSV="freebsd_events.csv" + MISCHDRS="freebsd/map-unknown.h freebsd/map.h freebsd/map-p6.h freebsd/map-p6-m.h freebsd/map-p6-3.h freebsd/map-p6-2.h freebsd/map-p6-c.h freebsd/map-k7.h freebsd/map-k8.h freebsd/map-p4.h freebsd/map-atom.h freebsd/map-core.h freebsd/map-core2.h freebsd/map-core2-extreme.h freebsd/map-i7.h freebsd/map-westme\ +re.h" + MISCSRCS="$MISCSRCS freebsd/map-unknown.c freebsd/map.c freebsd/map-p6.c freebsd/map-p6-m.c freebsd/map-p6-3.c freebsd/map-p6-2.c freebsd/map-p6-c.c freebsd/map-k7.c freebsd/map-k8.c freebsd/map-p4.c freebsd/map-atom.c freebsd/map-core.c freebsd/map-core2.c freebsd/map-core2-extreme.c freebsd/map-i7.c freebsd/map-westme\ +re.c" + DESCR="FreeBSD -over libpmc- " + CFLAGS+=" -I. -Ifreebsd -DPIC -fPIC" + CC_SHR='$(CC) -shared -Xlinker "-soname" -Xlinker "libpapi.so" -Xlinker "-rpath" -Xlinker "$(LIBDIR)" -DPIC -fPIC -I. -Ifreebsd' + +elif test "$MAKEVER" = "linux-generic"; then + CPUCOMPONENT_NAME=linux-generic + CPUCOMPONENT_C=linux-generic.c + CPUCOMPONENT_OBJ=linux-generic.o + PAPI_EVENTS_CSV="papi_events.csv" + VECTOR=_papi_dummy_vector + +elif test "$MAKEVER" = "linux-pe"; then + FILENAME=Rules.pfm4_pe + CPUCOMPONENT_NAME=perf_event + components="perf_event" + if test "$disable_uncore" = "no"; then + components="$components perf_event_uncore" + fi + +elif test "$MAKEVER" = "linux-perfctr-x86"; then + FILENAME=Rules.perfctr-pfm + CPUCOMPONENT_NAME=perfctr-x86 + VERSION=2.6.x + components="perfctr" + +elif (test "$MAKEVER" = "linux-perfctr-POWER5" || test "$MAKEVER" = "linux-perfctr-POWER5+" || test "$MAKEVER" = "linux-perfctr-POWER6" || test "$MAKEVER" = "linux-perfctr-POWER7" || test "$MAKEVER" = "linux-perfctr-PPC970"); then + FILENAME=Rules.perfctr + CPUCOMPONENT_NAME=perfctr-ppc + VERSION=2.7.x + components="perfctr_ppc" + CPU=ppc64 + if test "$MAKEVER" = "linux-perfctr-POWER5"; then + CPU_MODEL=POWER5 + ARCH_EVENTS=power5_events + ARCH_SPEC_EVTS=power5_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER5+"; then + CPU_MODEL=POWER5p + ARCH_EVENTS=power5+_events + ARCH_SPEC_EVTS=power5+_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER6"; then + CPU_MODEL=POWER6 + ARCH_EVENTS=power6_events + ARCH_SPEC_EVTS=power6_events_map.c + elif test "$MAKEVER" = "linux-perfctr-POWER7"; then + CPU_MODEL=POWER7 + ARCH_EVENTS=power7_events + ARCH_SPEC_EVTS=power7_events_map.c + elif test "$MAKEVER" = "linux-perfctr-PPC970"; then + CPU_MODEL=PPC970 + ARCH_EVENTS=ppc970_events + ARCH_SPEC_EVTS=ppc970_events_map.c + fi + CFLAGS="$CFLAGS -DPPC64 -D_$(CPU_MODEL) -D__perfctr__ -DARCH_EVTS=\"$(ARCH_EVENTS).h\" -DCOMP_VECTOR=_ppc64_vectors" + FFLAGS='-D_$(CPU_MODEL)' + +elif test "$MAKEVER" = "linux-perfmon2"; then + FILENAME=Rules.perfmon2 + CPUCOMPONENT_NAME=perfmon2 + components="perfmon2" + +elif (test "$MAKEVER" = "linux-pfm-ia64" || test "$MAKEVER" = "linux-pfm-itanium2" || test "$MAKEVER" = "linux-pfm-montecito"); then + FILENAME=Rules.pfm + CPUCOMPONENT_NAME=perfmon-ia64 + components="perfmon_ia64" + VERSION=3.y + if test "$MAKEVER" = "linux-pfm-itanium2"; then + CPU=2 + else + CPU=3 + fi + CFLAGS="$CFLAGS -DITANIUM$CPU" + FFLAGS="$FFLAGS -DITANIUM$CPU" + + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-soname -Wl,$(SHLIB) -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + +elif test "$MAKEVER" = "solaris-ultra"; then + CPUCOMPONENT_NAME=solaris-ultra + CPUCOMPONENT_C=solaris-ultra.c + CPUCOMPONENT_OBJ=solaris-ultra.obj + VECTOR=_solaris_vector + PAPI_EVENTS_CSV="papi_events.csv" + DESCR="Solaris 5.8 or greater with UltraSPARC I, II or III" + if test "$CC" = "gcc"; then + F77=g77 + CPP="$CC -E" + CC_R="$CC" + CC_SHR="$CC -shared -fpic" + OPTFLAGS=-O3 + CFLAGS="$CFLAGS -DNEED_FFSLL" + FFLAGS=$CFLAGS + else + # Sun Workshop compilers: V5.0 and V6.0 R2 + CPP="$CC -E" + CC_R="$CC -mt" + CC_SHR="$CC -ztext -G -Kpic" + CFLAGS="-xtarget=ultra3 -xarch=v8plusa -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" + SMPCFLGS=-xexplicitpar + OMPCFLGS=-xopenmp + F77=f90 + FFLAGS=$CFLAGS + NOOPT=-xO0 + OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v8plusa" + fi + LDFLAGS="$LDFLAGS -lcpc" + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so + CFLAGS="-xtarget=ultra3 -xarch=v9a -DNO_VARARG_MACRO -D__EXTENSIONS__ -DPAPI_NO_MEMORY_MANAGEMENT -DCOMP_VECTOR=_solaris_vectors" + OPTFLAGS="-g -fast -xtarget=ultra3 -xarch=v9a" + fi + +elif test "$MAKEVER" = "solaris-niagara2"; then + CPUCOMPONENT_NAME=solaris-niagara2 + CPUCOMPONENT_C=solaris-niagara2.c + CPUCOMPONENT_OBJ=solaris-niagara2.obj + VECTOR=_niagara2_vector + PAPI_EVENTS_CSV="papi_events.csv" + CFLAGS="-xtarget=native -xarch=native -DNO_VARARG_MACRO -D__EXTENSIONS__ -DCOMP_VECTOR=_niagara2_vector" + ORY_MANAGEMENT="-DCOMP_VECTOR=_solaris_vector" + DESCR="Solaris 10 with libcpc2 and UltraSPARC T2 (Niagara 2)" + CPP="$CC -E" + CC_R="$CC -mt" + CC_SHR="$CC -ztext -G -Kpic" + SMPCFLGS=-xexplicitpar + OMPCFLGS=-xopenmp + F77=f90 + FFLAGS=$CFLAGS + NOOPT=-xO0 + OPTFLAGS="-fast" + FOPTFLAGS=$OPTFLAGS + LDFLAGS="$LDFLAGS -lcpc" + if test "$bitmode" = "64"; then + LIBRARY=libpapi64.a + SHLIB=libpapi64.so + CFLAGS="$CFLAGS -m64" + FFLAGS="$FFLAGS -m64" + fi +elif test "$MAKEVER" = "darwin"; then + DESCR="Darwin" + CPUCOMPONENT_NAME=darwin + CPUCOMPONENT=linux-generic.c + CPUCOMPONENT=linux-generic.obj + CFLAGS="-DNEED_FFSLL" + CC_SHR='$(CC) -fPIC -DPIC -shared -Wl,-dylib -Xlinker "-rpath" -Xlinker "$(LIBDIR)"' + SHLIB=libpapi.dylib +elif test "$MAKEVER" = "generic_platform"; then + DESCR="Generic platform" +fi + +MISCOBJS='$(MISCSRCS:.c=.o)' + + + +if test "$F77" = "pgf77"; then + FFLAGS="$FFLAGS -Wall -Mextend" +elif test "$F77" = "ifort"; then + FFLAGS="$FFLAGS -warn all" +elif test "$F77" != "xlf"; then + FFLAGS="$FFLAGS -ffixed-line-length-132" +fi + +if test "$CC_COMMON_NAME" = "icc"; then + OMPCFLGS=-openmp +fi + +AC_MSG_CHECKING(for components to build) +COMPONENT_RULES=components/Rules.components +echo "/* Automatically generated by configure */" > components_config.h +echo "#ifndef COMPONENTS_CONFIG_H" >> components_config.h +echo "#define COMPONENTS_CONFIG_H" >> components_config.h +echo "" >> components_config.h + +AC_ARG_WITH(components, + [ --with-components=<"component1 component2"> Specify which components to build ], + [components="$components $withval"]) + +# This is an ugly hack to keep building on configurations covered by any-null in the past. +if test "$VECTOR" = "_papi_dummy_vector"; then + if test "x$components" = "x"; then + echo "papi_vector_t ${VECTOR} = {" >> components_config.h + echo " .size = { + .context = sizeof ( int ), + .control_state = sizeof ( int ), + .reg_value = sizeof ( int ), + .reg_alloc = sizeof ( int ), + }, + .cmp_info = { + .num_native_events = 0, + .num_preset_events = 0, + .num_cntrs = 0, + .name = \"Your system is unsupported! \", + .short_name = \"UNSUPPORTED!\" + }, + .dispatch_timer = NULL, + .get_overflow_address = NULL, + .start = NULL, + .stop = NULL, + .read = NULL, + .reset = NULL, + .write = NULL, + .cleanup_eventset = NULL, + .stop_profiling = NULL, + .init_component = NULL, + .init_thread = NULL, + .init_control_state = NULL, + .update_control_state = NULL, + .ctl = NULL, + .set_overflow = NULL, + .set_profile = NULL, + .set_domain = NULL, + .ntv_enum_events = NULL, + .ntv_name_to_code = NULL, + .ntv_code_to_name = NULL, + .ntv_code_to_descr = NULL, + .ntv_code_to_bits = NULL, + .ntv_code_to_info = NULL, + .allocate_registers = NULL, + .shutdown_thread = NULL, + .shutdown_component = NULL, + .user = NULL, +};" >> components_config.h + # but in the face of actual components, we don't have to do hacky size games + else + VECTOR="" + fi +elif test "x$VECTOR" != "x"; then + echo "extern papi_vector_t ${VECTOR};" >> components_config.h +fi + + + +for comp in $components; do + idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` + if test "$idx" = 0; then + subcomp=$comp + else + subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` + fi + COMPONENT_RULES="$COMPONENT_RULES components/$comp/Rules.$subcomp" + echo "extern papi_vector_t _${subcomp}_vector;" >> components_config.h +done +echo "" >> components_config.h +echo "struct papi_vectors *_papi_hwd[[]] = {" >> components_config.h + +if test "x$VECTOR" != "x"; then + echo " &${VECTOR}," >> components_config.h +fi + +for comp in $components; do + idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c` + if test "$idx" = 0; then + subcomp=$comp + else + subcomp=`echo $comp | sed -E "s/^.{${idx}}//"` + fi + echo " &_${subcomp}_vector," >> components_config.h +done +echo " NULL" >> components_config.h +echo "};" >> components_config.h +echo "" >> components_config.h +echo "#endif" >> components_config.h + +# check for component tests +for comp in $components; do + if test "`find components/$comp -name "tests"`" != "" ; then + COMPONENTS="$COMPONENTS $comp" + fi +done +tests="$tests comp_tests" + +AC_MSG_RESULT($components) + +AC_MSG_CHECKING(for PAPI event CSV filename to use) +if test "x$PAPI_EVENTS_CSV" == "x"; then + PAPI_EVENTS_CSV="papi_events.csv" +fi +AC_MSG_RESULT($PAPI_EVENTS_CSV) + +AC_SUBST(prefix) +AC_SUBST(exec_prefix) +AC_SUBST(libdir) +AC_SUBST(includedir) +AC_SUBST(mandir) +AC_SUBST(bindir) +AC_SUBST(datadir) +AC_SUBST(datarootdir) +AC_SUBST(docdir) +AC_SUBST(PACKAGE_TARNAME) +AC_SUBST(arch) +AC_SUBST(MAKEVER) +AC_SUBST(PMAPI) +AC_SUBST(PMINIT) +AC_SUBST(F77) +AC_SUBST(CPP) +AC_SUBST(CC) +AC_SUBST(AR) +AC_SUBST(papiLIBS) +AC_SUBST(STATIC) +AC_SUBST(LDFLAGS) +AC_SUBST(altix) +AC_SUBST(perfctr_root) +AC_SUBST(perfctr_prefix) +AC_SUBST(perfctr_incdir) +AC_SUBST(perfctr_libdir) +AC_SUBST(pfm_root) +AC_SUBST(old_pfmv2) +AC_SUBST(pfm_prefix) +AC_SUBST(pfm_incdir) +AC_SUBST(pfm_libdir) +AC_SUBST(OS) +AC_SUBST(CFLAGS) +AC_SUBST(FFLAGS) +AC_SUBST(CPPFLAGS) +AC_SUBST(PAPI_EVENTS) +AC_SUBST(PAPI_EVENTS_CSV) +AC_SUBST(SETPATH) +AC_SUBST(LINKLIB) +AC_SUBST(VERSION) +AC_SUBST(CPU) +AC_SUBST(FILENAME) +AC_SUBST(LIBRARY) +AC_SUBST(SHLIB) +AC_SUBST(VLIB) +AC_SUBST(PAPICFLAGS) +AC_SUBST(OPTFLAGS) +AC_SUBST(CPUCOMPONENT_NAME) +AC_SUBST(CPUCOMPONENT_C) +AC_SUBST(CPUCOMPONENT_OBJ) +AC_SUBST(OSFILESSRC) +AC_SUBST(OSFILESOBJ) +AC_SUBST(OSFILESHDR) +AC_SUBST(OSLOCK) +AC_SUBST(OSCONTEXT) +AC_SUBST(DESCR) +AC_SUBST(LIBS) +AC_SUBST(CTEST_TARGETS) +AC_SUBST(CC_R) +AC_SUBST(CC_SHR) +AC_SUBST(SMPCFLGS) +AC_SUBST(OMPCFLGS) +AC_SUBST(NOOPT) +AC_SUBST(MISCSRCS) +AC_SUBST(MISCOBJS) +AC_SUBST(POST_BUILD) +AC_SUBST(ARCH_EVENTS) +AC_SUBST(CPU_MODEL) +AC_SUBST(cpu_option) +AC_SUBST(ARG64) +AC_SUBST(FLAGS) +AC_SUBST(MPICC) +AC_SUBST(MISCHDRS) +AC_SUBST(SHLIBDEPS) +AC_SUBST(TOPTFLAGS) +AC_SUBST(TESTS) +AC_SUBST(tests) +AC_SUBST(SHOW_CONF) +AC_SUBST(BGP_SYSDIR) +AC_SUBST(BITFLAGS) +AC_SUBST(COMPONENT_RULES) +AC_SUBST(COMPONENTS) +AC_SUBST(FTEST_TARGETS) +AC_SUBST(HAVE_NO_OVERRIDE_INIT) +AC_SUBST(BGPM_INSTALL_DIR) +AC_SUBST(CC_COMMON_NAME) + +if test "$cross_compiling" = "yes" ; then + AC_MSG_NOTICE(Compiling genpapifdef with $nativecc because cross compiling) + $nativecc -I. genpapifdef.c -o genpapifdef +else + AC_MSG_NOTICE(Compiling genpapifdef with $CC) + $CC -I. genpapifdef.c -o genpapifdef +fi + +AC_MSG_NOTICE(Generating fpapi.h) +./genpapifdef -c > fpapi.h +AC_MSG_NOTICE(Generating f77papi.h) +./genpapifdef -f77 > f77papi.h +AC_MSG_NOTICE(Generating f90papi.h) +./genpapifdef -f90 > f90papi.h + +AC_MSG_NOTICE($FILENAME will be included in the generated Makefile) +AC_CONFIG_FILES([Makefile papi.pc]) +AC_CONFIG_FILES([components/Makefile_comp_tests.target testlib/Makefile.target utils/Makefile.target ctests/Makefile.target ftests/Makefile.target validation_tests/Makefile.target]) +AC_OUTPUT diff --git a/src/cpus.c b/src/cpus.c new file mode 100644 index 0000000..cef4f31 --- /dev/null +++ b/src/cpus.c @@ -0,0 +1,313 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: cpus.c +* Author: Gary Mohr +* gary.mohr@bull.com +* - based on threads.c by Philip Mucci - +*/ + +/* This file contains cpu allocation and bookkeeping functions */ + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "cpus.h" +#include +#include + +/* The list of cpus; this gets built as user apps set the cpu papi */ +/* option on an event set */ + +static CpuInfo_t *_papi_hwi_cpu_head; + + +static CpuInfo_t * +_papi_hwi_lookup_cpu( unsigned int cpu_num ) +{ + APIDBG("Entry:\n"); + + CpuInfo_t *tmp; + + tmp = ( CpuInfo_t * ) _papi_hwi_cpu_head; + while ( tmp != NULL ) { + THRDBG( "Examining cpu %#x at %p\n", tmp->cpu_num, tmp ); + if ( tmp->cpu_num == cpu_num ) { + break; + } + tmp = tmp->next; + if ( tmp == _papi_hwi_cpu_head ) { + tmp = NULL; + break; + } + } + + if ( tmp ) { + _papi_hwi_cpu_head = tmp; + THRDBG( "Found cpu %#x at %p\n", cpu_num, tmp ); + } else { + THRDBG( "Did not find cpu %#x\n", cpu_num ); + } + + return tmp; +} + +int +_papi_hwi_lookup_or_create_cpu( CpuInfo_t **here, unsigned int cpu_num ) +{ + APIDBG("Entry: here: %p\n", here); + + CpuInfo_t *tmp = NULL; + int retval = PAPI_OK; + + _papi_hwi_lock( CPUS_LOCK ); + + tmp = _papi_hwi_lookup_cpu(cpu_num); + if ( tmp == NULL ) { + retval = _papi_hwi_initialize_cpu( &tmp, cpu_num ); + } + + /* Increment use count */ + tmp->num_users++; + + if ( retval == PAPI_OK ) { + *here = tmp; + } + + _papi_hwi_unlock( CPUS_LOCK ); + + return retval; +} + + +static CpuInfo_t * +allocate_cpu( unsigned int cpu_num ) +{ + APIDBG("Entry: cpu_num: %d\n", cpu_num); + + CpuInfo_t *cpu; + int i; + + /* Allocate new CpuInfo structure */ + cpu = ( CpuInfo_t * ) papi_calloc( 1, sizeof ( CpuInfo_t ) ); + if ( cpu == NULL ) { + goto allocate_error; + } + + /* identify the cpu this info structure represents */ + cpu->cpu_num = cpu_num; + cpu->context = ( hwd_context_t ** ) + papi_calloc( ( size_t ) papi_num_components , + sizeof ( hwd_context_t * ) ); + if ( !cpu->context ) { + goto error_free_cpu; + } + + /* Allocate an eventset per component per cpu? Why? */ + + cpu->running_eventset = ( EventSetInfo_t ** ) + papi_calloc(( size_t ) papi_num_components, + sizeof ( EventSetInfo_t * ) ); + if ( !cpu->running_eventset ) { + goto error_free_context; + } + + for ( i = 0; i < papi_num_components; i++ ) { + cpu->context[i] = + ( void * ) papi_calloc( 1, ( size_t ) _papi_hwd[i]->size.context ); + cpu->running_eventset[i] = NULL; + if ( cpu->context[i] == NULL ) { + goto error_free_contexts; + } + } + + cpu->num_users=0; + + THRDBG( "Allocated CpuInfo: %p\n", cpu ); + + return cpu; + +error_free_contexts: + for ( i--; i >= 0; i-- ) papi_free( cpu->context[i] ); +error_free_context: + papi_free( cpu->context ); +error_free_cpu: + papi_free( cpu ); +allocate_error: + return NULL; +} + +/* Must be called with CPUS_LOCK held! */ +static int +remove_cpu( CpuInfo_t * entry ) +{ + APIDBG("Entry: entry: %p\n", entry); + + CpuInfo_t *tmp = NULL, *prev = NULL; + + THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", + _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); + + /* Find the preceding element and the matched element, + short circuit if we've seen the head twice */ + + for ( tmp = ( CpuInfo_t * ) _papi_hwi_cpu_head; + ( entry != tmp ) || ( prev == NULL ); tmp = tmp->next ) { + prev = tmp; + } + + if ( tmp != entry ) { + THRDBG( "Cpu %d at %p was not found in the cpu list!\n", + entry->cpu_num, entry ); + return PAPI_EBUG; + } + + /* Only 1 element in list */ + + if ( prev == tmp ) { + _papi_hwi_cpu_head = NULL; + tmp->next = NULL; + THRDBG( "_papi_hwi_cpu_head now NULL\n" ); + } else { + prev->next = tmp->next; + /* If we're removing the head, better advance it! */ + if ( _papi_hwi_cpu_head == tmp ) { + _papi_hwi_cpu_head = tmp->next; + THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", + _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); + } + THRDBG( "Removed cpu %p from list\n", tmp ); + } + + return PAPI_OK; +} + + +static void +free_cpu( CpuInfo_t **cpu ) +{ + APIDBG( "Entry: *cpu: %p, cpu_num: %d, cpu_users: %d\n", + *cpu, ( *cpu )->cpu_num, (*cpu)->num_users); + + int i,users,retval; + + _papi_hwi_lock( CPUS_LOCK ); + + (*cpu)->num_users--; + + users=(*cpu)->num_users; + + /* Remove from linked list if no users */ + if (!users) remove_cpu( *cpu ); + + _papi_hwi_unlock( CPUS_LOCK ); + + /* Exit early if still users of this CPU */ + if (users!=0) return; + + THRDBG( "Shutting down cpu %d at %p\n", (*cpu)->cpu_num, cpu ); + + for ( i = 0; i < papi_num_components; i++ ) { + if (_papi_hwd[i]->cmp_info.disabled) continue; + retval = _papi_hwd[i]->shutdown_thread( (*cpu)->context[i] ); + if ( retval != PAPI_OK ) { + // failure = retval; + } + } + + for ( i = 0; i < papi_num_components; i++ ) { + if ( ( *cpu )->context[i] ) { + papi_free( ( *cpu )->context[i] ); + } + } + + if ( ( *cpu )->context ) { + papi_free( ( *cpu )->context ); + } + + if ( ( *cpu )->running_eventset ) { + papi_free( ( *cpu )->running_eventset ); + } + + /* why do we clear this? */ + memset( *cpu, 0x00, sizeof ( CpuInfo_t ) ); + papi_free( *cpu ); + *cpu = NULL; +} + +/* Must be called with CPUS_LOCK held! */ +static void +insert_cpu( CpuInfo_t * entry ) +{ + APIDBG("Entry: entry: %p\n", entry); + + if ( _papi_hwi_cpu_head == NULL ) { + /* 0 elements */ + THRDBG( "_papi_hwi_cpu_head is NULL\n" ); + entry->next = entry; + } else if ( _papi_hwi_cpu_head->next == _papi_hwi_cpu_head ) { + /* 1 element */ + THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", + _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); + _papi_hwi_cpu_head->next = entry; + entry->next = ( CpuInfo_t * ) _papi_hwi_cpu_head; + } else { + /* 2+ elements */ + THRDBG( "_papi_hwi_cpu_head was cpu %d at %p\n", + _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); + entry->next = _papi_hwi_cpu_head->next; + _papi_hwi_cpu_head->next = entry; + } + + _papi_hwi_cpu_head = entry; + + THRDBG( "_papi_hwi_cpu_head now cpu %d at %p\n", + _papi_hwi_cpu_head->cpu_num, _papi_hwi_cpu_head ); +} + + + +/* Must be called with CPUS_LOCK held! */ +int +_papi_hwi_initialize_cpu( CpuInfo_t **dest, unsigned int cpu_num ) +{ + APIDBG("Entry: dest: %p, *dest: %p, cpu_num: %d\n", dest, *dest, cpu_num); + + int retval; + CpuInfo_t *cpu; + int i; + + if ( ( cpu = allocate_cpu(cpu_num) ) == NULL ) { + *dest = NULL; + return PAPI_ENOMEM; + } + + /* Call the component to fill in anything special. */ + for ( i = 0; i < papi_num_components; i++ ) { + if (_papi_hwd[i]->cmp_info.disabled) continue; + retval = _papi_hwd[i]->init_thread( cpu->context[i] ); + if ( retval ) { + free_cpu( &cpu ); + *dest = NULL; + return retval; + } + } + + insert_cpu( cpu ); + + *dest = cpu; + return PAPI_OK; +} + +int +_papi_hwi_shutdown_cpu( CpuInfo_t *cpu ) +{ + APIDBG("Entry: cpu: %p, cpu_num: %d\n", cpu, cpu->cpu_num); + + free_cpu( &cpu ); + + return PAPI_OK; +} diff --git a/src/cpus.h b/src/cpus.h new file mode 100644 index 0000000..70c78f8 --- /dev/null +++ b/src/cpus.h @@ -0,0 +1,24 @@ +/** @file cpus.h + * Author: Gary Mohr + * gary.mohr@bull.com + * - based on threads.h by unknown author - + */ + +#ifndef PAPI_CPUS_H +#define PAPI_CPUS_H + +typedef struct _CpuInfo +{ + unsigned int cpu_num; + struct _CpuInfo *next; + hwd_context_t **context; + EventSetInfo_t **running_eventset; + EventSetInfo_t *from_esi; /* ESI used for last update this control state */ + int num_users; +} CpuInfo_t; + +int _papi_hwi_initialize_cpu( CpuInfo_t **dest, unsigned int cpu_num ); +int _papi_hwi_shutdown_cpu( CpuInfo_t *cpu ); +int _papi_hwi_lookup_or_create_cpu( CpuInfo_t ** here, unsigned int cpu_num ); + +#endif diff --git a/src/ctests/Makefile b/src/ctests/Makefile new file mode 100644 index 0000000..20e4c1f --- /dev/null +++ b/src/ctests/Makefile @@ -0,0 +1,31 @@ +# File: ctests/Makefile + +include Makefile.target + +INCLUDE = -I../testlib -I../validation_tests -I.. -I. + +testlibdir= ../testlib +TESTLIB= $(testlibdir)/libtestlib.a +DOLOOPS= $(testlibdir)/do_loops.o +CLOCKCORE= $(testlibdir)/clockcore.o + +validationlibdir= ../validation_tests +TESTFLOPS= $(validationlibdir)/flops_testcode.o +TESTINS= $(validationlibdir)/instructions_testcode.o +TESTCYCLES = $(validationlibdir)/busy_work.o +DISPLAYERROR= $(validationlibdir)/display_error.o + +include Makefile.recipies + +.PHONY : install + +install: default + @echo "C tests (DATADIR) being installed in: \"$(DATADIR)\""; + -mkdir -p $(DATADIR)/ctests + -chmod go+rx $(DATADIR) + -chmod go+rx $(DATADIR)/ctests + -find . -perm -100 -type f -exec cp {} $(DATADIR)/ctests \; + -chmod go+rx $(DATADIR)/ctests/* + -find . -name "*.[ch]" -type f -exec cp {} $(DATADIR)/ctests \; + -cp Makefile.target $(DATADIR)/ctests/Makefile + -cat Makefile.recipies >> $(DATADIR)/ctests/Makefile diff --git a/src/ctests/Makefile.recipies b/src/ctests/Makefile.recipies new file mode 100644 index 0000000..63c107c --- /dev/null +++ b/src/ctests/Makefile.recipies @@ -0,0 +1,430 @@ +OMP = zero_omp omptough +SMP = zero_smp +SHMEM = zero_shmem +PTHREADS= pthrtough pthrtough2 thrspecific profile_pthreads overflow_pthreads \ + zero_pthreads clockres_pthreads overflow3_pthreads locks_pthreads \ + krentel_pthreads +MPX = max_multiplex multiplex1 multiplex2 mendes-alt sdsc-mpx sdsc2-mpx \ + sdsc2-mpx-noreset sdsc4-mpx reset_multiplex +MPXPTHR = multiplex1_pthreads multiplex3_pthreads kufrin +MPI = mpifirst +SHARED = shlib +SERIAL = all_events all_native_events branches calibrate case1 case2 \ + cmpinfo code2name derived describe memory disable_component \ + dmem_info eventname exeinfo failed_events first flops \ + get_event_component inherit high-level high-level2 hl_rates \ + hwinfo ipc johnmay2 low-level matrix-hl \ + realtime remove_events reset second tenth version virttime \ + zero zero_flip zero_named +FORKEXEC = fork fork2 exec exec2 forkexec forkexec2 forkexec3 forkexec4 \ + fork_overflow exec_overflow child_overflow system_child_overflow \ + system_overflow burn zero_fork +OVERFLOW = fork_overflow exec_overflow child_overflow system_child_overflow \ + system_overflow burn overflow overflow_force_software \ + overflow_single_event overflow_twoevents timer_overflow overflow2 \ + overflow_index overflow_one_and_read overflow_allcounters +PROFILE = profile profile_force_software sprofile profile_twoevents \ + byte_profile +ATTACH = multiattach multiattach2 zero_attach attach3 attach2 attach_target attach_cpu +P4_TEST = p4_lst_ins +EAR = earprofile +RANGE = data_range +BROKEN = pernode val_omp +API = api +ifneq ($(MPICC),) +ALL = $(PTHREADS) $(SERIAL) $(FORKEXEC) $(OVERFLOW) $(PROFILE) $(MPI) $(MPX) $(MPXPTHR) $(OMP) $(SMP) $(SHMEM)\ + $(SHARED) $(EAR) $(RANGE) $(P4_TEST) $(ATTACH) $(API) +else +ALL = $(PTHREADS) $(SERIAL) $(FORKEXEC) $(OVERFLOW) $(PROFILE) $(MPX) $(MPXPTHR) $(OMP) $(SMP) $(SHMEM)\ + $(SHARED) $(EAR) $(RANGE) $(P4_TEST) $(ATTACH) $(API) +endif + +DEFAULT = papi_api serial forkexec_tests overflow_tests profile_tests attach multiplex_and_pthreads shared + +all: $(ALL) + +default ctests ctest: $(DEFAULT) + +attach: $(ATTACH) + +p4: $(P4_TEST) + +ear: $(EAR) + +range: $(RANGE) + +mpi: $(MPI) + +shared: $(SHARED) + +multiplex_and_pthreads: $(MPXPTHR) $(MPX) $(PTHREADS) + +multiplex: $(MPX) + +omp: $(OMP) + +smp: $(SMP) + +pthreads: $(PTHREADS) + +shmem: $(SHMEM) + +serial: $(SERIAL) + +forkexec_tests: $(FORKEXEC) + +overflow_tests: $(OVERFLOW) + +profile_tests: $(PROFILE) + +papi_api: $(API) + +api: api.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) api.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ + +sdsc2: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) sdsc.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ + +sdsc2-mpx: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc2.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ + +branches: branches.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) branches.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) $(LDFLAGS) -lm -o $@ + +sdsc2-mpx-noreset: sdsc2.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) -DMPX -DSTARTSTOP $(TOPTFLAGS) sdsc2.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) -lm $(LDFLAGS) -o $@ + +sdsc-mpx: sdsc-mpx.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc-mpx.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -o $@ + +sdsc4-mpx: sdsc4-mpx.c $(TESTLIB) $(PAPILIB) $(TESTFLOPS) + $(CC) $(INCLUDE) $(CFLAGS) -DMPX $(TOPTFLAGS) sdsc4-mpx.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -lm -o $@ + +calibrate: calibrate.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) calibrate.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o calibrate + +data_range: data_range.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) data_range.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o data_range + +p4_lst_ins: p4_lst_ins.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) p4_lst_ins.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o p4_lst_ins + +acpi: acpi.c dummy.o $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) acpi.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o acpi + +timer_overflow: timer_overflow.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) timer_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ + +mendes-alt: mendes-alt.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DMULTIPLEX mendes-alt.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ + +max_multiplex: max_multiplex.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) max_multiplex.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o $@ + +multiplex1: multiplex1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ + +multiplex2: multiplex2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ + +multiplex1_pthreads: multiplex1_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex1_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread + +kufrin: kufrin.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) kufrin.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread + +multiplex3_pthreads: multiplex3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiplex3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread + +overflow3_pthreads: overflow3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow3_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o $@ -lpthread + +thrspecific: thrspecific.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) thrspecific.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o thrspecific -lpthread + +pthrtough: pthrtough.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthrtough.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o pthrtough -lpthread + +pthrtough2: pthrtough2.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pthrtough2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o pthrtough2 -lpthread + +profile_pthreads: profile_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_pthreads -lpthread + +locks_pthreads: locks_pthreads.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) locks_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o locks_pthreads -lpthread -lm + +krentel_pthreads: krentel_pthreads.c $(TESTLIB) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) krentel_pthreads.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o krentel_pthreads -lpthread + +overflow_pthreads: overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_pthreads -lpthread + +version: version.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) version.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o version + +zero_pthreads: zero_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_pthreads.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_pthreads -lpthread + +zero_smp: zero_smp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_smp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_smp $(SMPLIBS) + +zero_shmem: zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC_R) $(INCLUDE) $(SMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_shmem.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_shmem $(SMPLIBS) + +zero_omp: zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) zero_omp.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_omp $(OMPLIBS) + +omptough: omptough.c $(TESTLIB) $(PAPILIB) + -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) omptough.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o omptough $(OMPLIBS) + +val_omp: val_omp.c $(TESTLIB) $(PAPILIB) + -$(CC_R) $(INCLUDE) $(OMPCFLGS) $(CFLAGS) $(TOPTFLAGS) val_omp.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o val_omp $(OMPLIBS) + +clockres_pthreads: clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) + $(CC_R) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) clockres_pthreads.c $(TESTLIB) $(CLOCKCORE) $(PAPILIB) $(LDFLAGS) -o clockres_pthreads -lpthread -lm + +inherit: inherit.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) inherit.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o inherit + +johnmay2: johnmay2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) johnmay2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o johnmay2 + +describe: describe.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) describe.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o describe + +derived: derived.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) derived.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o derived + +zero: zero.c $(TESTLIB) $(TESTINS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero.c $(TESTLIB) $(TESTINS) $(PAPILIB) $(LDFLAGS) -o zero + +zero_named: zero_named.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_named.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_named + +remove_events: remove_events.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) remove_events.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o remove_events + +zero_fork: zero_fork.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_fork.c $(DOLOOPS) $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o zero_fork + +try: try.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) try.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o try + +zero_flip: zero_flip.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_flip.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_flip + +realtime: realtime.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) realtime.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o realtime + +virttime: virttime.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) virttime.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o virttime + +first: first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o first + +mpifirst: mpifirst.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(MPICC) $(INCLUDE) $(MPFLAGS) $(CFLAGS) $(TOPTFLAGS) first.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o mpifirst + +first-twice: first-twice.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) first-twice.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o first-twice + +second: second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) second.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o second + +flops: flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) flops.c $(TESTLIB) $(TESTFLOPS) $(DISPLAYERROR) $(PAPILIB) $(LDFLAGS) -o flops + +ipc: ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) ipc.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o ipc + +overflow: overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow + +overflow_allcounters: overflow_allcounters.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_allcounters.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_allcounters + +overflow_twoevents: overflow_twoevents.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_twoevents.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_twoevents + +overflow_one_and_read: overflow_one_and_read.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_one_and_read.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_one_and_read + +overflow_index: overflow_index.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_index.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_index + +overflow_values: overflow_values.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_values.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o overflow_values + +overflow2: overflow2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow2 + +overflow_single_event: overflow_single_event.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_single_event.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_single_event + +overflow_force_software: overflow_force_software.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) overflow_force_software.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o overflow_force_software + +sprofile: sprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) sprofile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o sprofile + +profile: profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile + +profile_force_software: profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSWPROFILE profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_force_software + +profile_twoevents: profile_twoevents.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) profile_twoevents.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o profile_twoevents + +earprofile: earprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) earprofile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) $(LDFLAGS) -o earprofile + +byte_profile: byte_profile.c $(TESTLIB) $(DOLOOPS) prof_utils.o $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) byte_profile.c prof_utils.o $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o byte_profile + +pernode: pernode.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) pernode.c $(LDFLAGS) -o pernode + +dmem_info: dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) dmem_info.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o dmem_info + +all_events: all_events.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) all_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o all_events + +all_native_events: all_native_events.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) all_native_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o all_native_events + +failed_events: failed_events.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) failed_events.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o failed_events + +get_event_component: get_event_component.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) get_event_component.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o get_event_component + +disable_component: disable_component.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) disable_component.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o disable_component + +memory: memory.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) memory.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o memory + +tenth: tenth.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) tenth.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o tenth + +eventname: eventname.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) eventname.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o eventname + +case1: case1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) case1.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o case1 + +case2: case2.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) case2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o case2 + +low-level: low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) low-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o low-level + +matrix-hl: matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) matrix-hl.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o matrix-hl + +hl_rates: hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hl_rates.c $(TESTLIB) $(TESTFLOPS) $(PAPILIB) $(LDFLAGS) -o hl_rates + +high-level: high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o high-level + +high-level2: high-level2.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) high-level2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o high-level2 + +shlib: shlib.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) shlib.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o shlib $(LDL) + +exeinfo: exeinfo.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exeinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exeinfo + +cmpinfo: cmpinfo.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) cmpinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o cmpinfo + +hwinfo: hwinfo.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) hwinfo.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o hwinfo + +code2name: code2name.c $(TESTLIB) $(PAPILIB) + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) code2name.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o code2name + +attach_target: attach_target.c $(DOLOOPS) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_target.c -o attach_target $(DOLOOPS) $(TESTLIB) + +zero_attach: zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) zero_attach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o zero_attach + +multiattach: multiattach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiattach.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o multiattach + +multiattach2: multiattach2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) multiattach2.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o multiattach2 + +attach3: attach3.c attach_target $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach3.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o attach3 + +attach2: attach2.c attach_target $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o attach2 + +attach_cpu: attach_cpu.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) attach_cpu.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o attach_cpu + +reset: reset.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) reset.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o reset + +reset_multiplex: reset_multiplex.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) reset_multiplex.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o reset_multiplex + +fork_overflow: fork_overflow.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork_overflow + +exec_overflow: exec_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DPEXEC exec_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) $(LDFLAGS) -o exec_overflow + +child_overflow: child_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DPCHILD child_overflow.c $(TESTLIB) $(PAPILIB) $(TESTCYCLES) $(LDFLAGS) -o child_overflow + +system_child_overflow: system_child_overflow.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSYSTEM system_child_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o system_child_overflow + +system_overflow: system_overflow.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -DSYSTEM2 system_overflow.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o system_overflow + +burn: burn.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) burn.c $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o burn + +fork: fork.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork + +exec: exec.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exec.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exec + +exec2: exec2.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) exec2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o exec2 + +fork2: fork2.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) fork2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fork2 + +forkexec: forkexec.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec + +forkexec2: forkexec2.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec2.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec2 + +forkexec3: forkexec3.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec3.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec3 + +forkexec4: forkexec4.c $(TESTLIB) $(PAPILIB) + -$(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) forkexec4.c $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o forkexec4 + +prof_utils.o: prof_utils.c $(testlibdir)/papi_test.h prof_utils.h + $(CC) $(INCLUDE) $(CFLAGS) $(TOPTFLAGS) -c prof_utils.c + +.PHONY : all default ctests ctest clean + +clean: + rm -f *.o *.stderr *.stdout core *~ $(ALL) unregister_pthreads + +distclean clobber: clean + rm -f Makefile.target diff --git a/src/ctests/Makefile.target.in b/src/ctests/Makefile.target.in new file mode 100644 index 0000000..bb51c35 --- /dev/null +++ b/src/ctests/Makefile.target.in @@ -0,0 +1,21 @@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +datarootdir = @datarootdir@ +datadir = @datadir@/${PACKAGE_TARNAME} +testlibdir = $(datadir)/testlib +validationlibdir = $(datadir)/validation_tests +DATADIR = $(DESTDIR)$(datadir) +INCLUDE = -I. -I@includedir@ -I$(testlibdir) -I$(validationlibdir) +LIBDIR = @libdir@ +LIBRARY=@LIBRARY@ +SHLIB=@SHLIB@ +PAPILIB = ../@LINKLIB@ +TESTLIB = $(testlibdir)/libtestlib.a +LDFLAGS = @LDL@ @STATIC@ +CC = @CC@ +MPICC = @MPICC@ +F77 = @F77@ +CC_R = @CC_R@ +CFLAGS = @CFLAGS@ @TOPTFLAGS@ +OMPCFLGS = @OMPCFLGS@ diff --git a/src/ctests/all_events.c b/src/ctests/all_events.c new file mode 100644 index 0000000..571ebf7 --- /dev/null +++ b/src/ctests/all_events.c @@ -0,0 +1,112 @@ +/* This file tries to add,start,stop, and remove all pre-defined events. + * It is meant not to test the accuracy of the mapping but to make sure + * that all events in the component will at least start (Helps to + * catch typos). + * + * Author: Kevin London + * london@cs.utk.edu + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval, i; + int EventSet = PAPI_NULL, count = 0, err_count = 0; + long long values; + PAPI_event_info_t info; + int quiet=0; + char error_message[BUFSIZ]; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + if (!quiet) { + printf("\nTrying all pre-defined events:\n"); + } + + /* Initialize PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Create an EventSet */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* Add all preset events */ + for ( i = 0; i < PAPI_MAX_PRESET_EVENTS; i++ ) { + + if ( PAPI_get_event_info( PAPI_PRESET_MASK | i, &info ) != PAPI_OK ) + continue; + + if ( !( info.count ) ) + continue; + + if (!quiet) printf( "Adding %-14s", info.symbol ); + + retval = PAPI_add_event( EventSet, ( int ) info.event_code ); + if ( retval != PAPI_OK ) { + if (!quiet) { + printf("Error adding event %s\n",info.symbol); + if (retval==PAPI_ECNFLCT) { + printf("Probably NMI watchdog related\n"); + } + } + if (retval==PAPI_ECNFLCT) { + sprintf(error_message,"Problem adding %s (probably NMI Watchdog related)",info.symbol); + } + else { + sprintf(error_message,"Problem adding %s",info.symbol); + } + test_warn( __FILE__, __LINE__, error_message, retval ); + err_count++; + } else { + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + PAPI_perror( "PAPI_start" ); + err_count++; + } else { + retval = PAPI_stop( EventSet, &values ); + if ( retval != PAPI_OK ) { + PAPI_perror( "PAPI_stop" ); + err_count++; + } else { + if (!quiet) printf( "successful\n" ); + count++; + } + } + retval = PAPI_remove_event( EventSet, ( int ) info.event_code ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event", retval ); + } + } + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if (!quiet) { + printf( "Successfully added, started and stopped %d events.\n", count ); + } + + if ( err_count ) { + if (!quiet) printf( "Failed to add, start or stop %d events.\n", err_count ); + } + + if (count<=0) { + test_fail( __FILE__, __LINE__, "No events added", 1 ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/all_native_events.c b/src/ctests/all_native_events.c new file mode 100644 index 0000000..9df011e --- /dev/null +++ b/src/ctests/all_native_events.c @@ -0,0 +1,185 @@ +/* + * File: all_native_events.c + * Author: Haihang You + */ + +/* This test tries to add all native events from all components */ + +/* This file hardware info and performs the following test: + - Start and stop all native events. + This is a good preliminary way to validate native event tables. + In its current form this test also stresses the number of + events sets the library can handle outstanding. +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +static int +check_event( int event_code, char *name, int quiet ) +{ + int retval; + long long values; + int EventSet = PAPI_NULL; + + /* Possibly there was an older issue with the */ + /* REPLAY_EVENT:BR_MSP on Pentium4 ??? */ + + /* Create an eventset */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* Add the event */ + retval = PAPI_add_event( EventSet, event_code ); + if ( retval != PAPI_OK ) { + if (!quiet) printf( "Error adding %s %d\n", name, retval ); + return retval; + } + + /* Start the event */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + PAPI_perror( "PAPI_start" ); + } else { + retval = PAPI_stop( EventSet, &values ); + if ( retval != PAPI_OK ) { + PAPI_perror( "PAPI_stop" ); + return retval; + } else { + if (!quiet) printf( "Added and Stopped %s successfully.\n", name ); + } + } + + /* Cleanup the eventset */ + retval=PAPI_cleanup_eventset( EventSet ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval); + } + + /* Destroy the eventset */ + retval=PAPI_destroy_eventset( &EventSet ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval); + } + + return PAPI_OK; +} + +int +main( int argc, char **argv ) +{ + + int i, k, add_count = 0, err_count = 0; + int retval; + PAPI_event_info_t info, info1; + const PAPI_hw_info_t *hwinfo = NULL; + const PAPI_component_info_t* cmpinfo; + int event_code; + int numcmp, cid; + int quiet; + + /* Set quiet variable */ + quiet=tests_quiet( argc, argv ); + + /* Init PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (!quiet) { + printf("Test case ALL_NATIVE_EVENTS: Available " + "native events and hardware " + "information.\n"); + } + + hwinfo=PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + numcmp = PAPI_num_components( ); + + /* Loop through all components */ + for( cid = 0; cid < numcmp; cid++ ) { + + + cmpinfo = PAPI_get_component_info( cid ); + if (cmpinfo == NULL) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); + } + + /* Skip disabled components */ + if (cmpinfo->disabled) { + if (!quiet) { + printf( "Name: %-23s %s\n", + cmpinfo->name ,cmpinfo->description); + printf(" \\-> Disabled: %s\n", + cmpinfo->disabled_reason); + } + continue; + } + + /* For platform independence, always ASK FOR the first event */ + /* Don't just assume it'll be the first numeric value */ + i = 0 | PAPI_NATIVE_MASK; + retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cid ); + + do { + retval = PAPI_get_event_info( i, &info ); + + /* We used to skip OFFCORE and UNCORE events */ + /* Why? */ + + /* Enumerate all umasks */ + k = i; + if ( PAPI_enum_cmp_event(&k, PAPI_NTV_ENUM_UMASKS, cid )==PAPI_OK ) { + do { + retval = PAPI_get_event_info( k, &info1 ); + event_code = ( int ) info1.event_code; + if ( check_event( event_code, info1.symbol, quiet ) == PAPI_OK ) { + add_count++; + } + else { + err_count++; + } + } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cid ) == PAPI_OK ); + } else { + /* Event didn't have any umasks */ + event_code = ( int ) info.event_code; + if ( check_event( event_code, info.symbol, quiet ) == PAPI_OK) { + add_count++; + } + else { + err_count++; + } + } + + } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cid ) == PAPI_OK ); + + } + + if (!quiet) { + printf( "\n\nSuccessfully found and added %d events " + "(in %d eventsets).\n", + add_count , add_count); + } + + if ( err_count ) { + if (!quiet) printf( "Failed to add %d events.\n", err_count ); + } + + if ( add_count <= 0 ) { + test_fail( __FILE__, __LINE__, "No events added", 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/api.c b/src/ctests/api.c new file mode 100644 index 0000000..62ad009 --- /dev/null +++ b/src/ctests/api.c @@ -0,0 +1,361 @@ +/* + * File: api.c + * Author: Brian Sheely + * bsheely@eecs.utk.edu + * + * Description: This test is designed to provide unit testing and complete + * coverage for all functions which comprise the "Low Level API" + * and the "High Level API" as defined in papi.h. + * + * Currently only the "high level" API is actually tested. + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + const int NUM_COUNTERS = 1; + int Events[] = { PAPI_TOT_INS }; + long long values[NUM_COUNTERS]; + float rtime, ptime, ipc, mflips, mflops; + long long ins, flpins, flpops; + int retval; + int quiet=0; + + quiet=tests_quiet( argc, argv ); + + /* Initialize PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /****** High Level API ******/ + + if ( !quiet ) printf( "Testing PAPI_num_components... " ); + + /* get the number of components available on the system */ + retval = PAPI_num_components( ); + if ( !quiet ) printf( "%d\n", retval ); + + if ( retval == 0) { + if ( !quiet ) printf( "No components found, skipping high level tests\n"); + test_skip(__FILE__, __LINE__, "No components found", 0); + } + + if ( !quiet ) printf( "Testing PAPI_num_counters... " ); + + /* get the number of hardware counters available on the system */ + retval = PAPI_num_counters( ); + if ( retval != PAPI_get_cmp_opt( PAPI_MAX_HWCTRS, NULL, 0 ) ) { + test_fail( __FILE__, __LINE__, "PAPI_num_counters", retval ); + } + else if ( !quiet ) printf( "%d\n", retval ); + + + /* Test PAPI_start_counters() */ + if ( !quiet ) printf( "Testing PAPI_start_counters... " ); + // pass invalid 1st argument + retval = PAPI_start_counters( NULL, NUM_COUNTERS ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + // pass invalid 2nd argument + retval = PAPI_start_counters( Events, 0 ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + /* Try PAPI_TOT_INS */ + retval = PAPI_start_counters( Events, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + if (!quiet) printf("\nCould not start PAPI_TOT_INS\n"); + test_skip( __FILE__, __LINE__, "PAPI_TOT_INS not available.", retval ); + } + else { + if ( !quiet ) printf( "started PAPI_TOT_INS\n" ); + } + + /* Test PAPI_stop_counters() */ + if ( !quiet ) printf( "Testing PAPI_stop_counters... " ); + // pass invalid 1st argument + retval = PAPI_stop_counters( NULL, NUM_COUNTERS ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + // pass invalid 2nd argument + retval = PAPI_stop_counters( values, 0 ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + // stop counters and return current counts + retval = PAPI_stop_counters( values, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + else if ( !quiet ) printf( "stopped counting PAPI_TOT_INS\n" ); + //NOTE: There are currently no checks on whether or not counter values are correct + + + // start counting hardware events again + retval = PAPI_start_counters( Events, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + + /* Test PAPI_read_counters() */ + if ( !quiet ) printf( "Testing PAPI_read_counters... " ); + // pass invalid 1st argument + retval = PAPI_read_counters( NULL, NUM_COUNTERS ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + // pass invalid 2nd argument + retval = PAPI_read_counters( values, 0 ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + // copy current counts to array and reset counters + retval = PAPI_read_counters( values, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + else { + if ( !quiet ) printf( "read PAPI_TOT_INS counts and reset counter\n" ); + } + //NOTE: There are currently no checks on whether or not counter values are correct + + + /* Test PAPI_accum_counters() */ + if ( !quiet ) printf( "Testing PAPI_accum_counters... " ); + // pass invalid 1st argument + retval = PAPI_accum_counters( NULL, NUM_COUNTERS ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); + } + // pass invalid 2nd argument + retval = PAPI_accum_counters( values, 0 ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); + } + // add current counts to array and reset counters + retval = PAPI_accum_counters( values, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); + } + else { + if ( !quiet ) printf( "added PAPI_TOT_INS counts and reset counter\n" ); + } + //NOTE: There are currently no checks on whether or not counter values are correct + + + // stop counting hardware events + retval = PAPI_stop_counters( values, NUM_COUNTERS ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + + + /* Test PAPI_ipc() */ + if ( !quiet ) printf( "Testing PAPI_ipc... " ); + // pass invalid 1st argument + retval = PAPI_ipc( NULL, &ptime, &ins, &ipc ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + // pass invalid 2nd argument + retval = PAPI_ipc( &rtime, NULL, &ins, &ipc ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + // pass invalid 3rd argument + retval = PAPI_ipc( &rtime, &ptime, NULL, &ipc ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + // pass invalid 4th argument + retval = PAPI_ipc( &rtime, &ptime, &ins, NULL ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + // get instructions per cycle, real and processor time + retval = PAPI_ipc( &rtime, &ptime, &ins, &ipc ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + else { + if ( !quiet ) printf( "got instructions per cycle, real and processor time\n" ); + } + //NOTE: There are currently no checks on whether or not returned values are correct + + + //NOTE: PAPI_flips and PAPI_flops fail if any other low-level calls have been made! + PAPI_shutdown( ); + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Test PAPI_flips() */ + if ( !quiet ) printf( "Testing PAPI_flips... " ); + // pass invalid 1st argument + retval = PAPI_flips( NULL, &ptime, &flpins, &mflips ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + // pass invalid 2nd argument + retval = PAPI_flips( &rtime, NULL, &flpins, &mflips ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + // pass invalid 3rd argument + retval = PAPI_flips( &rtime, &ptime, NULL, &mflips ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + // pass invalid 4th argument + retval = PAPI_flips( &rtime, &ptime, &flpins, NULL ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + // get Mflips/s, real and processor time + retval = PAPI_flips( &rtime, &ptime, &flpins, &mflips ); + if ( retval == PAPI_ENOEVNT ) { + test_warn( __FILE__, __LINE__, "PAPI_flips", retval); + } + else if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + else { + if ( !quiet ) printf( "got Mflips/s, real and processor time\n" ); + } + //NOTE: There are currently no checks on whether or not returned values are correct + + + PAPI_shutdown( ); + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Test PAPI_flops() */ + if ( !quiet ) printf( "Testing PAPI_flops... " ); + // pass invalid 1st argument + retval = PAPI_flops( NULL, &ptime, &flpops, &mflops ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + // pass invalid 2nd argument + retval = PAPI_flops( &rtime, NULL, &flpops, &mflops ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + // pass invalid 3rd argument + retval = PAPI_flops( &rtime, &ptime, NULL, &mflops ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + // pass invalid 4th argument + retval = PAPI_flops( &rtime, &ptime, &flpops, NULL ); + if ( retval != PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + // get Mflops/s, real and processor time + retval = PAPI_flops( &rtime, &ptime, &flpops, &mflops ); + if ( retval == PAPI_ENOEVNT ) { + test_warn( __FILE__, __LINE__, "PAPI_flops", retval); + } + else if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + else if ( !quiet ) { + printf( "got Mflops/s, real and processor time\n" ); + } + //NOTE: There are currently no checks on whether or not returned values are correct + + + /***************************/ + /****** Low Level API ******/ + /***************************/ +/* + int PAPI_accum(int EventSet, long long * values); // accumulate and reset hardware events from an event set + int PAPI_add_event(int EventSet, int Event); // add single PAPI preset or native hardware event to an event set + int PAPI_add_events(int EventSet, int *Events, int number); // add array of PAPI preset or native hardware events to an event set + int PAPI_assign_eventset_component(int EventSet, int cidx); // assign a component index to an existing but empty eventset + int PAPI_attach(int EventSet, unsigned long tid); // attach specified event set to a specific process or thread id + int PAPI_cleanup_eventset(int EventSet); // remove all PAPI events from an event set + int PAPI_create_eventset(int *EventSet); // create a new empty PAPI event set + int PAPI_detach(int EventSet); // detach specified event set from a previously specified process or thread id + int PAPI_destroy_eventset(int *EventSet); // deallocates memory associated with an empty PAPI event set + int PAPI_enum_event(int *EventCode, int modifier); // return the event code for the next available preset or natvie event + int PAPI_event_code_to_name(int EventCode, char *out); // translate an integer PAPI event code into an ASCII PAPI preset or native name + int PAPI_event_name_to_code(char *in, int *out); // translate an ASCII PAPI preset or native name into an integer PAPI event code + int PAPI_get_dmem_info(PAPI_dmem_info_t *dest); // get dynamic memory usage information + int PAPI_get_event_info(int EventCode, PAPI_event_info_t * info); // get the name and descriptions for a given preset or native event code + const PAPI_exe_info_t *PAPI_get_executable_info(void); // get the executable's address space information + const PAPI_hw_info_t *PAPI_get_hardware_info(void); // get information about the system hardware + const PAPI_component_info_t *PAPI_get_component_info(int cidx); // get information about the component features + int PAPI_get_multiplex(int EventSet); // get the multiplexing status of specified event set + int PAPI_get_opt(int option, PAPI_option_t * ptr); // query the option settings of the PAPI library or a specific event set + int PAPI_get_cmp_opt(int option, PAPI_option_t * ptr,int cidx); // query the component specific option settings of a specific event set + long long PAPI_get_real_cyc(void); // return the total number of cycles since some arbitrary starting point + long long PAPI_get_real_nsec(void); // return the total number of nanoseconds since some arbitrary starting point + long long PAPI_get_real_usec(void); // return the total number of microseconds since some arbitrary starting point + const PAPI_shlib_info_t *PAPI_get_shared_lib_info(void); // get information about the shared libraries used by the process + int PAPI_get_thr_specific(int tag, void **ptr); // return a pointer to a thread specific stored data structure + int PAPI_get_overflow_event_index(int Eventset, long long overflow_vector, int *array, int *number); // # decomposes an overflow_vector into an event index array + long long PAPI_get_virt_cyc(void); // return the process cycles since some arbitrary starting point + long long PAPI_get_virt_nsec(void); // return the process nanoseconds since some arbitrary starting point + long long PAPI_get_virt_usec(void); // return the process microseconds since some arbitrary starting point + int PAPI_is_initialized(void); // return the initialized state of the PAPI library + int PAPI_library_init(int version); // initialize the PAPI library + int PAPI_list_events(int EventSet, int *Events, int *number); // list the events that are members of an event set + int PAPI_list_threads(unsigned long *tids, int *number); // list the thread ids currently known to PAPI + int PAPI_lock(int); // lock one of two PAPI internal user mutex variables + int PAPI_multiplex_init(void); // initialize multiplex support in the PAPI library + int PAPI_num_hwctrs(void); // return the number of hardware counters for the cpu + int PAPI_num_cmp_hwctrs(int cidx); // return the number of hardware counters for a specified component + int PAPI_num_hwctrs(void); // for backward compatibility + int PAPI_num_events(int EventSet); // return the number of events in an event set + int PAPI_overflow(int EventSet, int EventCode, int threshold, + int flags, PAPI_overflow_handler_t handler); // set up an event set to begin registering overflows + int PAPI_perror( char *msg); // convert PAPI error codes to strings + int PAPI_profil(void *buf, unsigned bufsiz, caddr_t offset, + unsigned scale, int EventSet, int EventCode, + int threshold, int flags); // generate PC histogram data where hardware counter overflow occurs + int PAPI_query_event(int EventCode); // query if a PAPI event exists + int PAPI_read(int EventSet, long long * values); // read hardware events from an event set with no reset + int PAPI_read_ts(int EventSet, long long * values, long long *cyc); + int PAPI_register_thread(void); // inform PAPI of the existence of a new thread + int PAPI_remove_event(int EventSet, int EventCode); // remove a hardware event from a PAPI event set + int PAPI_remove_events(int EventSet, int *Events, int number); // remove an array of hardware events from a PAPI event set + int PAPI_reset(int EventSet); // reset the hardware event counts in an event set + int PAPI_set_debug(int level); // set the current debug level for PAPI + int PAPI_set_cmp_domain(int domain, int cidx); // set the component specific default execution domain for new event sets + int PAPI_set_domain(int domain); // set the default execution domain for new event sets + int PAPI_set_cmp_granularity(int granularity, int cidx); // set the component specific default granularity for new event sets + int PAPI_set_granularity(int granularity); //set the default granularity for new event sets + int PAPI_set_multiplex(int EventSet); // convert a standard event set to a multiplexed event set + int PAPI_set_opt(int option, PAPI_option_t * ptr); // change the option settings of the PAPI library or a specific event set + int PAPI_set_thr_specific(int tag, void *ptr); // save a pointer as a thread specific stored data structure + void PAPI_shutdown(void); // finish using PAPI and free all related resources + int PAPI_sprofil(PAPI_sprofil_t * prof, int profcnt, int EventSet, int EventCode, int threshold, int flags); // generate hardware counter profiles from multiple code regions + int PAPI_start(int EventSet); // start counting hardware events in an event set + int PAPI_state(int EventSet, int *status); // return the counting state of an event set + int PAPI_stop(int EventSet, long long * values); // stop counting hardware events in an event set and return current events + char *PAPI_strerror(int); // return a pointer to the error message corresponding to a specified error code + unsigned long PAPI_thread_id(void); // get the thread identifier of the current thread + int PAPI_thread_init(unsigned long (*id_fn) (void)); // initialize thread support in the PAPI library + int PAPI_unlock(int); // unlock one of two PAPI internal user mutex variables + int PAPI_unregister_thread(void); // inform PAPI that a previously registered thread is disappearing + int PAPI_write(int EventSet, long long * values); // write counter values into counters +*/ + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/attach2.c b/src/ctests/attach2.c new file mode 100644 index 0000000..37098d3 --- /dev/null +++ b/src/ctests/attach2.c @@ -0,0 +1,247 @@ +/* This file performs the following test: start, stop and timer functionality for + attached processes. + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + + +#ifdef _AIX +#define _LINUX_SOURCE_COMPAT +#endif + +#if defined(__FreeBSD__) +# define PTRACE_ATTACH PT_ATTACH +# define PTRACE_TRACEME PT_TRACE_ME +#endif + +static int +wait_for_attach_and_loop( void ) +{ + char *path; + char newpath[PATH_MAX]; + path = getenv("PATH"); + + sprintf(newpath, "PATH=./:%s", (path)?path:"\0" ); + putenv(newpath); + + if (ptrace(PTRACE_TRACEME, 0, 0, 0) == 0) { + execlp("attach_target","attach_target","100000000",NULL); + perror("execl(attach_target) failed"); + } + perror("PTRACE_TRACEME"); + return ( 1 ); +} + +int +main( int argc, char **argv ) +{ + int status, retval, tmp; + int EventSet1 = PAPI_NULL; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + char event_name[PAPI_MAX_STR_LEN];; + const PAPI_hw_info_t *hw_info; + const PAPI_component_info_t *cmpinfo; + pid_t pid; + int quiet; + + /* Fork before doing anything with the PMU */ + + setbuf(stdout,NULL); + pid = fork( ); + if ( pid < 0 ) + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + if ( pid == 0 ) + exit( wait_for_attach_and_loop( ) ); + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + + /* Master only process below here */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); + + if ( cmpinfo->attach == 0 ) + test_skip( __FILE__, __LINE__, "Platform does not support attaching", + 0 ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depending on the availability of the event on the + platform */ + retval = PAPI_create_eventset(&EventSet1); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* Here we are testing that this does not cause a fail */ + + retval = PAPI_assign_eventset_component( EventSet1, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + + retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Problem adding PAPI_TOT_CYC\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + strcpy(event_name,"PAPI_FP_INS"); + + retval = PAPI_add_named_event(EventSet1, event_name); + if ( retval == PAPI_ENOEVNT ) { + strcpy(event_name,"PAPI_TOT_INS"); + retval = PAPI_add_named_event(EventSet1, event_name); + } + + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + values = allocate_test_space( 1, 2); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + elapsed_virt_us = PAPI_get_virt_usec( ); + + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + if (!quiet) printf("must_ptrace is %d\n",cmpinfo->attach_must_ptrace); + pid_t child = wait( &status ); + if (!quiet) printf( "Debugger exited wait() with %d\n",child ); + if (WIFSTOPPED( status )) + { + if (!quiet) printf( "Child has stopped due to signal %d (%s)\n", + WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); + } + if (WIFSIGNALED( status )) + { + if (!quiet) printf( "Child %ld received signal %d (%s)\n", + (long)child, + WTERMSIG(status) , strsignal(WTERMSIG( status )) ); + } + if (!quiet) printf("After %d\n",retval); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + if (!quiet) printf("Continuing\n"); +#if defined(__FreeBSD__) + if ( ptrace( PT_CONTINUE, pid, (caddr_t) 1, 0 ) == -1 ) { +#else + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { +#endif + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + + + do { + child = wait( &status ); + if (!quiet) printf( "Debugger exited wait() with %d\n", child); + if (WIFSTOPPED( status )) + { + if (!quiet) printf( "Child has stopped due to signal %d (%s)\n", + WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); + } + if (WIFSIGNALED( status )) + { + if (!quiet) printf( "Child %ld received signal %d (%s)\n", + (long)child, + WTERMSIG(status) , strsignal(WTERMSIG( status )) ); + } + } while (!WIFEXITED( status )); + + if (!quiet) printf("Child exited with value %d\n",WEXITSTATUS(status)); + if (WEXITSTATUS(status) != 0) { + test_fail( __FILE__, __LINE__, "Exit status of child to attach to", PAPI_EMISC); + } + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_cleanup_eventset(EventSet1); + if (retval != PAPI_OK) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset(&EventSet1); + if (retval != PAPI_OK) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if (!quiet) { + printf( "Test case: 3rd party attach start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t 1\n" ); + + printf( TAB1, "PAPI_TOT_CYC : \t", ( values[0] )[0] ); + printf( "%s : \t %12lld\n",event_name, ( values[0] )[1]); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: none\n" ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/attach3.c b/src/ctests/attach3.c new file mode 100644 index 0000000..a153a3e --- /dev/null +++ b/src/ctests/attach3.c @@ -0,0 +1,250 @@ +/* This file performs the following test: start, stop and timer functionality for + attached processes. + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include +#include +#include + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + + +#ifdef _AIX +#define _LINUX_SOURCE_COMPAT +#endif + +#if defined(__FreeBSD__) +# define PTRACE_ATTACH PT_ATTACH +# define PTRACE_TRACEME PT_TRACE_ME +#endif + +static int +wait_for_attach_and_loop( void ) +{ + char *path; + char newpath[PATH_MAX]; + path = getenv("PATH"); + + sprintf(newpath, "PATH=./:%s", (path)?path:"\0" ); + putenv(newpath); + + if (ptrace(PTRACE_TRACEME, 0, 0, 0) == 0) { + execlp("attach_target","attach_target","100000000",NULL); + perror("execl(attach_target) failed"); + } + perror("PTRACE_TRACEME"); + return ( 1 ); +} + +int +main( int argc, char **argv ) +{ + int status, retval, tmp; + int EventSet1 = PAPI_NULL; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + char event_name[PAPI_MAX_STR_LEN];; + const PAPI_hw_info_t *hw_info; + const PAPI_component_info_t *cmpinfo; + pid_t pid; + int quiet; + + /* Fork before doing anything with the PMU */ + + setbuf(stdout,NULL); + pid = fork( ); + if ( pid < 0 ) + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + if ( pid == 0 ) + exit( wait_for_attach_and_loop( ) ); + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + + /* Master only process below here */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); + + if ( cmpinfo->attach == 0 ) + test_skip( __FILE__, __LINE__, "Platform does not support attaching", + 0 ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depending on the availability of the event on the + platform */ + retval = PAPI_create_eventset(&EventSet1); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + + /* Force addition of component */ + + retval = PAPI_assign_eventset_component( EventSet1, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + /* The following call causes this test to fail for perf_events */ + + retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + + + + retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Could not add PAPI_TOT_CYC\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + strcpy(event_name,"PAPI_FP_INS"); + retval = PAPI_add_named_event(EventSet1, event_name); + if ( retval == PAPI_ENOEVNT ) { + strcpy(event_name,"PAPI_TOT_INS"); + retval = PAPI_add_named_event(EventSet1, event_name); + } + + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + values = allocate_test_space( 1, 2); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + elapsed_virt_us = PAPI_get_virt_usec( ); + + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + if (!quiet) printf("must_ptrace is %d\n",cmpinfo->attach_must_ptrace); + pid_t child = wait( &status ); + if (!quiet) printf( "Debugger exited wait() with %d\n",child ); + if (WIFSTOPPED( status )) + { + if (!quiet) printf( "Child has stopped due to signal %d (%s)\n", + WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); + } + if (WIFSIGNALED( status )) + { + if (!quiet) printf( "Child %ld received signal %d (%s)\n", + (long)child, + WTERMSIG(status) , strsignal(WTERMSIG( status )) ); + } + if (!quiet) printf("After %d\n",retval); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + if (!quiet) printf("Continuing\n"); +#if defined(__FreeBSD__) + if ( ptrace( PT_CONTINUE, pid, (caddr_t) 1, 0 ) == -1 ) { +#else + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { +#endif + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + + + do { + child = wait( &status ); + if (!quiet) printf( "Debugger exited wait() with %d\n", child); + if (WIFSTOPPED( status )) + { + if (!quiet) printf( "Child has stopped due to signal %d (%s)\n", + WSTOPSIG( status ), strsignal(WSTOPSIG( status )) ); + } + if (WIFSIGNALED( status )) + { + if (!quiet) printf( "Child %ld received signal %d (%s)\n", + (long)child, + WTERMSIG(status) , strsignal(WTERMSIG( status )) ); + } + } while (!WIFEXITED( status )); + + if (!quiet) printf("Child exited with value %d\n",WEXITSTATUS(status)); + if (WEXITSTATUS(status) != 0) { + test_fail( __FILE__, __LINE__, "Exit status of child to attach to", PAPI_EMISC); + } + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_cleanup_eventset(EventSet1); + if (retval != PAPI_OK) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset(&EventSet1); + if (retval != PAPI_OK) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if (!quiet) { + printf( "Test case: 3rd party attach start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t 1\n" ); + + printf( TAB1, "PAPI_TOT_CYC : \t", ( values[0] )[0] ); + printf( "%s : \t %12lld\n", event_name, ( values[0] )[1] ); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: none\n" ); + } + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/attach_cpu.c b/src/ctests/attach_cpu.c new file mode 100644 index 0000000..ff4c257 --- /dev/null +++ b/src/ctests/attach_cpu.c @@ -0,0 +1,99 @@ +/* + * This test case creates an event set and attaches it to a cpu. This causes only activity + * on that cpu to get counted. The test case then starts the event set does a little work and + * then stops the event set. It then prints out the event, count and cpu number which was used + * during the test case. + * + * Since this test case does not try to force its own execution to the cpu which it is using to + * count events, it is fairly normal to get zero counts printed at the end of the test. But every + * now and then it will count the cpu where the test case is running and then the counts will be non-zero. + * + * The test case allows the user to specify which cpu should be counted by providing an argument to the + * test case (ie: ./attach_cpu 3). Sometimes by trying different cpu numbers with the test case, you + * can find the cpu used to run the test (because counts will look like cycle counts). + * + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int num_tests=1; + int num_events=1; + int retval; + int cpu_num = 1; + int EventSet1 = PAPI_NULL; + long long **values; + char event_name[PAPI_MAX_STR_LEN] = "PAPI_TOT_CYC"; + PAPI_option_t opts; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + // user can provide cpu number on which to count events as arg 1 + if (argc > 1) { + retval = atoi(argv[1]); + if (retval >= 0) { + cpu_num = retval; + } + } + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + retval = PAPI_create_eventset(&EventSet1); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + + // Force event set to be associated with component 0 (perf_events component provides all core events) + retval = PAPI_assign_eventset_component( EventSet1, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", retval ); + + // Attach this event set to cpu 1 + opts.cpu.eventset = EventSet1; + opts.cpu.cpu_num = cpu_num; + + retval = PAPI_set_opt( PAPI_CPU_ATTACH, &opts ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + retval = PAPI_add_named_event(EventSet1, event_name); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Trouble adding event %s\n",event_name); + test_skip( __FILE__, __LINE__, "PAPI_add_named_event", retval ); + } + + // get space for counter values (this needs to do this call because it malloc's space that test_pass and friends free) + values = allocate_test_space( num_tests, num_events); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + // do some work + do_flops(NUM_FLOPS); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if (!quiet) printf ("Event: %s: %8lld on Cpu: %d\n", event_name, values[0][0], cpu_num); + + PAPI_shutdown( ); + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/attach_target.c b/src/ctests/attach_target.c new file mode 100644 index 0000000..678c3fa --- /dev/null +++ b/src/ctests/attach_target.c @@ -0,0 +1,21 @@ +#include +#include + +#include "do_loops.h" + + +int main(int argc, char **argv) +{ + int c, i = NUM_FLOPS; + + if (argc > 1) { + c = atoi(argv[1]); + if (c >= 0) { + i = c; + } + } + + do_flops(i); + + return 0; +} diff --git a/src/ctests/bgp/Makefile b/src/ctests/bgp/Makefile new file mode 100644 index 0000000..a7c159b --- /dev/null +++ b/src/ctests/bgp/Makefile @@ -0,0 +1,9 @@ +TESTS = papi_1 + +bgp_tests: $(TESTS) + +papi%: + $(CC) $(INCLUDE) $(CFLAGS) -o $@ $@.c $(LIBRARY) $(LDFLAGS) + +clean: + rm -f core $(TESTS) *~ *.o diff --git a/src/ctests/bgp/papi_1.c b/src/ctests/bgp/papi_1.c new file mode 100644 index 0000000..880217f --- /dev/null +++ b/src/ctests/bgp/papi_1.c @@ -0,0 +1,2092 @@ +/* + * Basic PAPI Test for BG/P + * + * NOTE: If a PAPI function is not listed below, the function is + * untested and user beware... + * + * The following high level functions are called... + * PAPI_num_counters - get the number of hardware counters available on the system + * PAPI_flips - simplified call to get Mflips/s (floating point instruction rate), real and processor time + * PAPI_flops - simplified call to get Mflops/s (floating point operation rate), real and processor time + * PAPI_ipc - gets instructions per cycle, real and processor time + * PAPI_accum_counters - add current counts to array and reset counters + * PAPI_read_counters - copy current counts to array and reset counters + * PAPI_start_counters - start counting hardware events + * PAPI_stop_counters - stop counters and return current counts + * + * The following low level functions are called... + * PAPI_accum - accumulate and reset hardware events from an event set + * PAPI_add_event - add single PAPI preset or native hardware event to an event set + * PAPI_cleanup_eventset - remove all PAPI events from an event set + * PAPI_create_eventset - create a new empty PAPI event set + * PAPI_destroy_eventset - deallocates memory associated with an empty PAPI event set + * PAPI_enum_event - return the event code for the next available preset or natvie event + * PAPI_event_code_to_name - translate an integer PAPI event code into an ASCII PAPI preset or native name + * PAPI_event_name_to_code - translate an ASCII PAPI preset or native name into an integer PAPI event code + * PAPI_get_dmem_info - get dynamic memory usage information + * PAPI_get_event_info - get the name and descriptions for a given preset or native event code + * PAPI_get_executable_info - get the executable�s address space information + * PAPIF_get_exe_info - Fortran version of PAPI_get_executable_info with different calling semantics + * PAPI_get_hardware_info - get information about the system hardware + * PAPI_get_multiplex - get the multiplexing status of specified event set + * PAPI_get_real_cyc - return the total number of cycles since some arbitrary starting point + * PAPI_get_real_usec - return the total number of microseconds since some arbitrary starting point + * PAPI_get_shared_lib_info - get information about the shared libraries used by the process + * PAPI_get_virt_cyc - return the process cycles since some arbitrary starting point + * PAPI_get_virt_usec - return the process microseconds since some arbitrary starting point + * PAPI_is_initialized - return the initialized state of the PAPI library + * PAPI_library_init - initialize the PAPI library + * PAPI_list_events - list the events that are members of an event set + * PAPI_num_hwctrs - return the number of hardware counters + * PAPI_num_events - return the number of events in an event set + * PAPI_query_event - query if a PAPI event exists + * PAPI_read - read hardware events from an event set with no reset + * PAPI_remove_event - remove a hardware event from a PAPI event set + * PAPI_reset - reset the hardware event counts in an event set + * PAPI_shutdown - finish using PAPI and free all related resources + * PAPI_start - start counting hardware events in an event set + * PAPI_state - return the counting state of an event set + * PAPI_stop - stop counting hardware events in an event set and return current events + * PAPI_write - write counter values into counters + * NOTE: Not supported when UPC is running, and when not running, only changes local PAPI memory. + */ + +#include +#include +#include + +#include + +#include +#include "papiStdEventDefs.h" +#include "papi.h" +#include "linux-bgp-native-events.h" + +#define MAX_COUNTERS 256 +#define NUMBER_COUNTERS_PER_ROW 8 +/* + * Prototypes... + */ +void Do_Tests(void); +void Do_Low_Level_Tests(void); +void Do_High_Level_Tests(void); +void Do_Multiplex_Tests(void); +void Run_Cycle(const int pNumEvents); +void Zero_Local_Counters(long long* pCounters); +void FPUArith(void); +void List_PAPI_Events(const int pEventSet, int* pEvents, int* xNumEvents); +void Print_Native_Counters(); +void Print_Native_Counters_via_Buffer(const BGP_UPC_Read_Counters_Struct_t* pBuffer); +void Print_Native_Counters_for_PAPI_Counters(const int pEventSet); +void Print_Native_Counters_for_PAPI_Counters_From_List(const int* pEvents, const int pNumEvents); +void Print_PAPI_Counters(const int pEventSet, const long long* pCounters); +void Print_PAPI_Counters_From_List(const int* pEventList, const int pNumEvents, const long long* pCounters); +void Print_Counters(const int pEventSet); +void Print_Node_Info(void); +void Read_Native_Counters(const int pLength); +void Print_PAPI_Events(const int pEventSet); +void Print_Counter_Values(const long long* pCounters, const int pNumCounters); +void DumpInHex(const char* pBuffer, int pSize); + + +/* + * Global variables... + */ +int PAPI_Events[MAX_COUNTERS]; +long long PAPI_Counters[MAX_COUNTERS]; +char Native_Buffer[BGP_UPC_MAXIMUM_LENGTH_READ_COUNTERS_STRUCTURE]; +double x[32] ALIGN_L3_CACHE; + + +const int NumEventsPerSet = MAX_COUNTERS; +const int MaxPresetEventId = 104; +const int MaxNativeEventId = 511; + +int main(int argc, char * argv[]) { + _BGP_Personality_t personality; + int pRank=0, pMode=-2, pCore=0, pEdge=1, xActiveCore=0, xActiveRank=0, xRC; + + /* + * Check args, print test inputs. + */ + + if ( argc > 1 ) + sscanf(argv[1], "%d", &pRank); + if ( argc > 2 ) + sscanf(argv[2], "%d", &pMode); + if ( argc > 3 ) + sscanf(argv[3], "%d", &pCore); + if ( argc > 4 ) + sscanf(argv[4], "%d", &pEdge); + +/* + * Check for valid rank... + */ + if ( pRank < 0 || pRank > 31 ) { + printf("Invalid rank (%d) specified\n", pRank); + exit(1); + } +/* + * Check for valid mode... + * Mode = -2 means use what was initialized by CNK + * Mode = -1 means to initialize with the default + * Mode = 0-3 means to initialize with mode 0-3 + */ + if ( pMode < -2 || pMode > 3 ) { + printf("Invalid mode (%d) specified\n", pMode); + exit(1); + } +/* + * Check for valid core... + */ + if ( pCore < 0 || pCore > 3 ) { + printf("Invalid core (%d) specified\n", pCore); + exit(1); + } +/* + * Check for valid edge... + * Edge = 1 means initialize with the default edge + * Edge = 0 means initialize with level high + * Edge = 4 means initialize with edge rise + * Edge = 8 means initialize with edge fall + * Edge = 12 means initialize with level low + */ + if ( pEdge != 0 && pEdge != 1 && pEdge != 4 && pEdge != 8 && pEdge != 12 ) { + printf("Invalid edge (%d) specified\n", pEdge); + exit(1); + } + +/* + * Initialize the UPC environment... + * NOTE: Must do this from all 'ranks'... + */ +// BGP_UPC_Initialize(); + xRC = PAPI_library_init(PAPI_VER_CURRENT); + if (xRC != 50921472) { + printf("PAPI_library_init failed: xRC=%d, ending...\n", xRC); + exit(1); + } + +/* + * Only run if this is specified rank... + */ + + xRC = Kernel_GetPersonality(&personality, sizeof(_BGP_Personality_t)); + if (xRC !=0) { + printf(" Kernel_GetPersonality returned %d\n",xRC) ; + exit(xRC); + } + xActiveRank = personality.Network_Config.Rank; + xActiveCore = Kernel_PhysicalProcessorID(); + + printf("Rank %d, core %d reporting...\n", xActiveRank, xActiveCore); + + if (xActiveRank != pRank) { + printf("Rank %d is not to run... Exiting...\n", xActiveRank); + exit(0); + } + + if ( xActiveCore == pCore ) { + printf("Program is to run on rank %d core %d, using mode= %d, edge= %d\n", pRank, xActiveCore, pMode, pEdge); + } + else { + printf("Program is NOT to run on rank %d core %d... Exiting...\n", pRank, xActiveCore); + exit(0); + } + +/* + * Main processing... + */ + printf("************************************************************\n"); + printf("* Configuration parameters used: *\n"); + printf("* Rank = %d *\n", pRank); + printf("* Mode = %d *\n", pMode); + printf("* Core = %d *\n", pCore); + printf("* Edge = %d *\n", pEdge); + printf("************************************************************\n\n"); + + printf("Print config after PAPI_library_init...\n"); + BGP_UPC_Print_Config(); + +/* + * If we are to initialize, do so with user mode and edge... + * Otherwise, use what was initialized by CNK... + */ + if (pMode > -2) { + BGP_UPC_Initialize_Counter_Config(pMode, pEdge); + printf("UPC unit(s) initialized with mode=%d, edge=%d...\n", pMode, pEdge); + } + + printf("Before running the main test procedure...\n"); + BGP_UPC_Print_Config(); + BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); + +/* + * Perform the main test procedure... + */ + Do_Tests(); + +/* + * Print out final configuration and results... + */ + printf("After running the main test procedure...\n"); + BGP_UPC_Print_Config(); + BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); + + exit(0); +} + + +/* + * Do_Tests + */ + +void Do_Tests(void) { + printf("==> Do_Tests(): Beginning of the main body...\n"); + + // NOTE: PAPI_library_init() has already been done for each participating node + // prior to calling this routine... + + Do_Low_Level_Tests(); + Do_High_Level_Tests(); + Do_Multiplex_Tests(); // NOTE: Not supported... + PAPI_shutdown(); + + printf("==> Do_Tests(): End of the main body...\n"); + fflush(stdout); + + return; +} + +/* + * Do_Low_Level_Tests + */ + +void Do_Low_Level_Tests(void) { + int xRC, xEventSet, xEventCode, xState; + long long xLLValue; + char xName[256]; + + printf("==> Do_Low_Level_Tests(): Beginning of the main body...\n"); + + /* + * Low-level API tests... + */ + + xRC = PAPI_is_initialized(); + if (xRC == 1) + printf("SUCCESS: PAPI has been low-level initialized by main()...\n"); + else { + printf("FAILURE: PAPI has not been properly initialized by main(), xRC=%d, ending...\n", xRC); + return; + } + + /* + * Print out the node information with respect to UPC units... + */ + Print_Node_Info(); + + /* + * Zero the buffers for counters... + */ + Zero_Local_Counters(PAPI_Counters); + BGP_UPC_Read_Counters_Struct_t* xTemp; + xTemp = (BGP_UPC_Read_Counters_Struct_t*)(void*)Native_Buffer; + Zero_Local_Counters(xTemp->counter); + + /* + * Start of real tests... + */ + xLLValue = -1; + xLLValue = PAPI_get_real_cyc(); + printf("PAPI_get_real_cyc: xLLValue=%lld...\n", xLLValue); + + xLLValue = -1; + xLLValue = PAPI_get_virt_cyc(); + printf("PAPI_get_virt_cyc: xLLValue=%lld...\n", xLLValue); + + xLLValue = -1; + xLLValue = PAPI_get_real_usec(); + printf("PAPI_get_real_usec: xLLValue=%lld...\n", xLLValue); + + xLLValue = -1; + xLLValue = PAPI_get_virt_usec(); + printf("PAPI_get_virt_usec: xLLValue=%lld...\n", xLLValue); + + xRC = PAPI_num_hwctrs(); + if (xRC == 256) + printf("SUCCESS: PAPI_num_hwctrs returned 256 hardware counters...\n"); + else + printf("FAILURE: PAPI_num_hwctrs failed, returned xRC=%d...\n", xRC); + + *xName = 0; + char* xEventName_1 = "PAPI_L3_LDM"; + xRC = PAPI_event_code_to_name(PAPI_L3_LDM, xName); + if (xRC == PAPI_OK) { + xRC = strcmp(xName,xEventName_1); + if (!xRC) + printf("SUCCESS: PAPI_event_code_to_name for PAPI_L3_LDM...\n"); + else + printf("FAILURE: PAPI_event_code_to_name returned incorrect name, xName=%s\n", xName); + } + else + printf("FAILURE: PAPI_event_code_to_name failed, xRC=%d...\n", xRC); + + *xName = 0; + char* xEventName_2 = "PNE_BGP_PU1_IPIPE_INSTRUCTIONS"; + xRC = PAPI_event_code_to_name(PNE_BGP_PU1_IPIPE_INSTRUCTIONS, xName); + if (xRC == PAPI_OK) { + xRC = strcmp(xName,xEventName_2); + if (!xRC) + printf("SUCCESS: PAPI_event_code_to_name for PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); + else + printf("FAILURE: PAPI_event_code_to_name returned incorrect name, xName=%s\n", xName); + } + else + printf("FAILURE: PAPI_event_code_to_name failed, xRC=%d...\n", xRC); + + strcpy(xName,"PAPI_L3_LDM"); + xRC = PAPI_event_name_to_code(xName, &xEventCode); + if (xRC == PAPI_OK) + if (xEventCode == 0x8000000E) + printf("SUCCESS: PAPI_event_name_to_code for PAPI_L3_LDM...\n"); + else + printf("FAILURE: PAPI_event_name_to_code returned incorrect code, xEventCode=%d\n", xEventCode); + else + printf("FAILURE: PAPI_event_name_to_code failed, xRC=%d...\n", xRC); + + strcpy(xName,"PNE_BGP_PU1_IPIPE_INSTRUCTIONS"); + xRC = PAPI_event_name_to_code(xName, &xEventCode); + if (xRC == PAPI_OK) + if (xEventCode == 0x40000027) + printf("SUCCESS: PAPI_event_name_to_code for PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); + else + printf("FAILURE: PAPI_event_name_to_code returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_event_name_to_code failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000000; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000001) + printf("SUCCESS: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL, returned 0x80000001...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000002; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000003) + printf("SUCCESS: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL, returned 0x80000003...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000067; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000068) + printf("SUCCESS: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL, returned 0x80000068...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000068; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_ALL, no next event...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x40000000; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x40000001) + printf("SUCCESS: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL, returned 0x40000001...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x40000001; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x40000002) + printf("SUCCESS: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL, returned 0x40000002...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x400000FC; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x400000FF) + printf("SUCCESS: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL, returned 0x400000FF...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x400001FD; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_OK) + if (xEventCode == 0x400001FF) + printf("SUCCESS: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL, returned 0x400001FF...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x400001FF; + xRC = PAPI_enum_event(&xEventCode, PAPI_ENUM_ALL); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_ALL, no next event...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_ALL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000000; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000001) + printf("SUCCESS: PAPI_enum_event for 0x80000000 PAPI_PRESET_ENUM_AVAIL, returned 0x80000001...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000000PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000000PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000002; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000006) + printf("SUCCESS: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL, returned 0x80000006...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000002 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000067; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x80000068) + printf("SUCCESS: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL, returned 0x80000068...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x80000067 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x80000068; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_AVAIL, no next event...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x80000068 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x40000000; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x40000001) + printf("SUCCESS: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL, returned 0x40000001...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x40000000 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x40000001; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x40000002) + printf("SUCCESS: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL, returned 0x40000002...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x40000001 PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + printf("NOTE: Might get two messages indicating invalid event id specified for 253 and 254. These are OK...\n"); + xEventCode = 0x400000FC; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x400000FF) + printf("SUCCESS: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL, returned 0x400000FF...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x400000FC PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + printf("NOTE: Might get one message indicating invalid event id specified for 510. This is OK...\n"); + xEventCode = 0x400001FD; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_OK) + if (xEventCode == 0x400001FF) + printf("SUCCESS: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL, returned 0x400001FF...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL returned incorrect code, xEventCode=%8.8x\n", xEventCode); + else + printf("FAILURE: PAPI_enum_event for 0x400001FD PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + xEventCode = 0x400001FF; + xRC = PAPI_enum_event(&xEventCode, PAPI_PRESET_ENUM_AVAIL); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_AVAIL, no next event...\n"); + else + printf("FAILURE: PAPI_enum_event for 0x400001FF PAPI_PRESET_ENUM_AVAIL failed, xRC=%d...\n", xRC); + + PAPI_dmem_info_t xDmemSpace; + xRC = PAPI_get_dmem_info(&xDmemSpace); + if (xRC == PAPI_OK) { + DumpInHex((char*)&xDmemSpace, sizeof( PAPI_dmem_info_t)); + printf("SUCCESS: PAPI_get_dmem_info...\n"); + } + else + printf("FAILURE: PAPI_get_dmem_info failed, xRC=%d...\n", xRC); + + PAPI_event_info_t xInfoSpace; + xRC = PAPI_get_event_info(PAPI_L3_LDM, &xInfoSpace); + if (xRC == PAPI_OK) { + DumpInHex((char*)&xInfoSpace, sizeof( PAPI_event_info_t)); + printf("SUCCESS: PAPI_get_event_info for PAPI_L3_LDM...\n"); + } + else + printf("FAILURE: PAPI_get_event_info failed for PAPI_L3_LDM, xRC=%d...\n", xRC); + + const PAPI_exe_info_t* xExeInfo = NULL; + if ((xExeInfo = PAPI_get_executable_info()) != NULL) { + DumpInHex((char*)xExeInfo, sizeof( PAPI_exe_info_t)); + printf("SUCCESS: PAPI_get_executable_info...\n"); + } + else + printf("FAILURE: PAPI_get_executable_info failed, returned null pointer...\n"); + + const PAPI_hw_info_t* xHwInfo = NULL; + if ((xHwInfo = PAPI_get_hardware_info()) != NULL) { + DumpInHex((char*)xHwInfo, sizeof( PAPI_hw_info_t)); + printf("SUCCESS: PAPI_get_hardware_info...\n"); + } + else + printf("FAILURE: PAPI_get_hardware_info failed, returned null pointer...\n"); + + const PAPI_shlib_info_t* xShLibInfo = NULL; + if ((xShLibInfo = PAPI_get_shared_lib_info()) != NULL) { + DumpInHex((char*)xShLibInfo, sizeof( PAPI_shlib_info_t)); + printf("SUCCESS: PAPI_get_shared_lib_info...\n"); + } + else + printf("FAILURE: PAPI_get_shared_lib_info failed, returned null pointer...\n"); + + xEventSet = PAPI_NULL; + xRC = PAPI_create_eventset(&xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_create_eventset created...\n"); + else { + printf("FAILURE: PAPI_create_eventset failed, xRC=%d...\n", xRC); + return; + } + + printf("==> No events should be in the event set...\n"); + Print_Counters(xEventSet); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 0) + printf("SUCCESS: PAPI_num_events returned 0...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PAPI_L1_DCM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_add_event PAPI_L1_DCM...\n"); + else + printf("FAILURE: PAPI_add_event PAPI_L1_DCM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 1) + printf("SUCCESS: PAPI_num_events returned 1...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES...\n"); + else + printf("FAILURE: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 2) + printf("SUCCESS: PAPI_num_events returned 2...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, BGP_PU3_L2_MEMORY_WRITES); + if (xRC == PAPI_EINVAL) + printf("SUCCESS: PAPI_add_event BGP_PU3_L2_MEMORY_WRITES not allowed...\n"); + else + printf("FAILURE: PAPI_add_event BGP_PU3_L2_MEMORY_WRITES allowed, or failed incorrectly..., xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 2) + printf("SUCCESS: PAPI_num_events returned 2...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, 0x40000208); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_add_event 0x40000208 not allowed...\n"); + else + printf("FAILURE: PAPI_add_event 0x40000208 allowed, or failed incorrectly..., xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 2) + printf("SUCCESS: PAPI_num_events returned 2...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PAPI_L1_ICM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_add_event PAPI_L1_ICM...\n"); + else + printf("FAILURE: PAPI_add_event PAPI_L1_ICM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 3) + printf("SUCCESS: PAPI_num_events returned 3...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PAPI_L1_TCM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_add_event PAPI_L1_TCM...\n"); + else + printf("FAILURE: PAPI_add_event PAPI_L1_TCM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 4) + printf("SUCCESS: PAPI_num_events returned 4...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PAPI_L1_DCM); + if (xRC == PAPI_ECNFLCT) + printf("SUCCESS: PAPI_add_event, redundantly adding PAPI_L1_DCM not allowed...\n"); + else + printf("FAILURE: PAPI_add_event PAPI_L1_DCM failed incorrectly, xRC=%d...\n", xRC); + + xRC = PAPI_add_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); + if (xRC == PAPI_ECNFLCT) + printf("SUCCESS: PAPI_add_event, redundantly adding PNE_BGP_PU3_L2_MEMORY_WRITES not allowed...\n"); + else + printf("FAILURE: PAPI_add_event PNE_BGP_PU3_L2_MEMORY_WRITES failed incorectly, xRC=%d...\n", xRC); + + printf("\n==> All events added... Perform a read now...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + + printf("\n==> Perform a reset now...\n"); + xRC = PAPI_reset(xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_reset...\n"); + else + printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); + + printf("\n==> Perform another read now...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + + printf("\n==> Should be 4 counters below, preset, native, preset, and preset. All counter values should be zero.\n"); + Print_Counters(xEventSet); + + printf("\n==> Stop the UPC now...\n"); + xRC = PAPI_stop(xEventSet, PAPI_Counters); + if (xRC == PAPI_ENOTRUN) + printf("SUCCESS: PAPI_stop, but not running...\n"); + else + printf("FAILURE: PAPI_stop failed incorectly, xRC=%d...\n", xRC); + + printf("\n==> Start the UPC now...\n"); + xRC = PAPI_start(xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_start...\n"); + else { + printf("FAILURE: PAPI_start failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Try to start it again...\n"); + xRC = PAPI_start(xEventSet); + if (xRC == PAPI_EISRUN) + printf("SUCCESS: PAPI_start, but already running...\n"); + else + printf("FAILURE: PAPI_start failed incorectly, xRC=%d...\n", xRC); + + FPUArith(); + + printf("\n==> Stop the UPC after the arithmetic was performed... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the UPC(s) being stopped...\n"); + xRC = PAPI_stop(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_stop...\n"); + else { + printf("FAILURE: PAPI_stop failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a read of the counters after performing arithmetic, UPC is stopped... Values should be the same as right after the prior PAPI_Stop()...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is stopped... Native values should be zero, and the local PAPI counters the same as the previous read...\n"); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_read, UPC is stopped... All values should be zero...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_read...\n"); + } + else { + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a reset after performing arithmetic, UPC is stopped... All values should be zero...\n"); + xRC = PAPI_reset(xEventSet); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_reset...\n"); + } + else { + printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform another read of the counters after resetting the counters, UPC is stopped... All values should be zero...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + printf("\n==> Perform another PAPI_accum after resetting the counters, UPC is stopped... All values should be zero...\n"); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform another PAPI_read after accumulating and resetting the UPC, UPC is stopped... All values should be zero...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_read...\n"); + } + else { + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Start the UPC again...\n"); + xRC = PAPI_start(xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_start...\n"); + else { + printf("FAILURE: PAPI_start failed, xRC=%d...\n", xRC); + return; + } + + FPUArith(); + + printf("\n==> Get the state of the event set...\n"); + xRC = PAPI_state(xEventSet, &xState); + if (xRC == PAPI_OK) { + if (xState == PAPI_RUNNING) { + printf("SUCCESS: PAPI_state is RUNNING...\n"); + } + else { + printf("FAILURE: PAPI_state failed, incorrect state, xState=%d...\n", xState); + } + } + else { + printf("FAILURE: PAPI_state failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Perform a read of the counters, UPC is running... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the reads for the individual counter values...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + FPUArith(); + + printf("\n==> Perform another read of the counters, UPC is running... Values should be increasing...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + FPUArith(); + + printf("\n==> Perform another read of the counters, UPC is running... Values should continue increasing...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + printf("\n==> Perform a reset after performing arithmetic, UPC is still running... Native counter values should be less than prior read, but PAPI counter values should be identical to the prior read (local buffer was not changed)...\n"); + xRC = PAPI_reset(xEventSet); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_reset...\n"); + } + else { + printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is still running...\n"); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + FPUArith(); + + printf("\n==> Accumulate local counters. Perform a PAPI_accum, UPC is still running... PAPI counters should show an increase from prior accumulate...\n"); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + FPUArith(); + + printf("\n==> Accumulate local counters. Perform another PAPI_accum, UPC is still running... PAPI counters should show an increase from prior accumulate...\n"); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Zero local counters. Perform a PAPI_accum, UPC is still running... PAPI counters should be less than the prior accumulate...\n"); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_read, UPC is still running... Native counters and PAPI counters should have both increased from prior accumulate...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_read...\n"); + } + else { + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_write (not supported when UPC is running)...\n"); + xRC = PAPI_write(xEventSet, PAPI_Counters); + if (xRC == PAPI_ECMP) { + printf("SUCCESS: PAPI_write, not allowed...\n"); + } + else { + printf("FAILURE: PAPI_write failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Stop the UPC... The individual native counter values will be greater than the PAPI counters because the PAPI counters are read prior to the UPC(s) being stopped...\n"); + xRC = PAPI_stop(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_stop...\n"); + else { + printf("FAILURE: PAPI_stop failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_read with the UPC stopped...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + + printf("\n==> Should be same 4 counters below, with the same native and PAPI counters as after the PAPI_stop...\n"); + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_accum with the UPC stopped... Native counters sould be zeroed, with the PAPI counters unchanged from prior read (with the UPC already stopped, the accumulate does not add any counter values to the local buffer)...\n"); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_accum...\n"); + } + else { + printf("FAILURE: PAPI_accum failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_read with the UPC stopped... Native and PAPI counters are zero...\n"); + xRC = PAPI_read(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read...\n"); + else + printf("FAILURE: PAPI_read failed, xRC=%d...\n", xRC); + Print_Counters(xEventSet); + + printf("\n==> Perform a reset, UPC is stopped... Native and PAPI counters are zero...\n"); + xRC = PAPI_reset(xEventSet); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_reset...\n"); + } + else { + printf("FAILURE: PAPI_reset failed, xRC=%d...\n", xRC); + return; + } + Print_Counters(xEventSet); + + printf("\n==> Perform a PAPI_write, but only to local memory...\n"); + xRC = PAPI_write(xEventSet, PAPI_Counters); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_write, but only to local memory...\n"); + } + else { + printf("FAILURE: PAPI_write failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Get the state of the event set...\n"); + xRC = PAPI_state(xEventSet, &xState); + if (xRC == PAPI_OK) { + if (xState == PAPI_STOPPED) { + printf("SUCCESS: PAPI_state is STOPPED...\n"); + } + else { + printf("FAILURE: PAPI_state failed, incorrect state, xState=%d...\n", xState); + } + } + else { + printf("FAILURE: PAPI_state failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Get the multiplex status of the eventset...\n"); + xRC = PAPI_get_multiplex(xEventSet); + if (xRC == PAPI_OK) { + printf("SUCCESS: PAPI_get_multiplex (NOTE: The rest of the multiplex path is untested)...\n"); + } + else { + printf("FAILURE: PAPI_get_multiplex failed, xRC=%d...\n", xRC); + return; + } + + printf("\n==> Remove the events, and clean up the event set...\n"); + xRC = PAPI_remove_event(xEventSet, PNE_BGP_PU1_IPIPE_INSTRUCTIONS); + if (xRC == PAPI_EINVAL) + printf("SUCCESS: PAPI_remove_event could not find PNE_BGP_PU1_IPIPE_INSTRUCTIONS...\n"); + else + printf("FAILURE: PAPI_remove_event PNE_BGP_PU1_IPIPE_INSTRUCTIONS failed, xRC=%d...\n", xRC); + + xRC = PAPI_remove_event(xEventSet, PAPI_L3_LDM); + if (xRC == PAPI_EINVAL) + printf("SUCCESS: PAPI_remove_event could not find PAPI_L3_LDM...\n"); + else + printf("FAILURE: PAPI_remove_event PAPI_L3_LDM failed, xRC=%d...\n", xRC); + + xRC = PAPI_remove_event(xEventSet, PAPI_L1_TCM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_remove_event PAPI_L1_TCM...\n"); + else + printf("FAILURE: PAPI_remove_event PAPI_L1_TCM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 3) + printf("SUCCESS: PAPI_num_events returned 3...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_remove_event(xEventSet, PAPI_L1_ICM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_remove_event PAPI_L1_ICM...\n"); + else + printf("FAILURE: PAPI_remove_event PAPI_L1_ICM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 2) + printf("SUCCESS: PAPI_num_events returned 2...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_remove_event(xEventSet, PNE_BGP_PU3_L2_MEMORY_WRITES); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_remove_event PNE_BGP_PU3_L2_MEMORY_WRITES...\n"); + else + printf("FAILURE: PAPI_remove_event PNE_BGP_PU3_L2_MEMORY_WRITES failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 1) + printf("SUCCESS: PAPI_num_events returned 1...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_remove_event(xEventSet, PAPI_L1_DCM); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_remove_event PAPI_L1_DCM...\n"); + else + printf("FAILURE: PAPI_remove_event PAPI_L1_DCM failed, xRC=%d...\n", xRC); + + xRC = PAPI_num_events(xEventSet); + if (xRC == 0) + printf("SUCCESS: PAPI_num_events returned 0...\n"); + else + printf("FAILURE: PAPI_num_events failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_cleanup_eventset(xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_cleanup_eventset...\n"); + else + printf("FAILURE: PAPI_cleanup_eventset failed, xRC=%d...\n", xRC); + + xRC = PAPI_destroy_eventset(&xEventSet); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_destroy_eventset...\n"); + else + printf("FAILURE: PAPI_destroy_eventset failed, xRC=%d...\n", xRC); + + printf("==> Do_Low_Level_Tests(): End of the main body...\n"); + + return; +} + +/* + * Do_High_Level_Tests + */ + +void Do_High_Level_Tests(void) { + uint xEventId, xEventCode; + int xRC, xNumEvents; + + printf("==> Do_High_Level_Tests(): Beginning of the main body...\n"); + + xRC = PAPI_num_counters(); + if (xRC == 256) + printf("SUCCESS: PAPI_num_counters returned 256 hardware counters...\n"); + else + printf("FAILURE: PAPI_num_counters failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_num_components(); + if (xRC == 1) + printf("SUCCESS: PAPI_num_components returned 256 hardware counters...\n"); + else + printf("FAILURE: PAPI_num_components failed, returned xRC=%d...\n", xRC); + + xEventId = 0; + while (xEventId < MaxPresetEventId) { + xNumEvents = 0; + while (xEventId <= MaxPresetEventId && xNumEvents < NumEventsPerSet) { + xEventCode = xEventId | 0x80000000; + xRC = PAPI_query_event(xEventCode); + if (xRC == PAPI_OK) { + switch(xEventCode) { + case 0x80000003: + case 0x80000004: + case 0x80000005: + case 0x80000007: + case 0x80000008: + case 0x8000000A: + case 0x8000000B: + case 0x8000000C: + case 0x8000000D: + case 0x8000000F: + case 0x80000010: + case 0x80000011: + case 0x80000012: + case 0x80000013: + case 0x80000014: + case 0x80000015: + case 0x80000016: + case 0x80000017: + case 0x80000018: + case 0x80000019: + case 0x8000001A: + case 0x8000001B: + case 0x8000001D: + case 0x8000001E: + case 0x8000001F: + case 0x80000020: + case 0x80000021: + case 0x80000022: + case 0x80000023: + case 0x80000024: + case 0x80000025: + case 0x80000026: + case 0x80000027: + case 0x80000028: + case 0x80000029: + case 0x8000002A: + case 0x8000002B: + case 0x8000002C: + case 0x8000002D: + case 0x8000002E: + case 0x8000002F: + case 0x80000031: + case 0x80000032: + case 0x80000033: + case 0x80000037: + case 0x80000038: + case 0x80000039: + case 0x8000003A: + case 0x8000003D: + case 0x80000042: + case 0x80000045: + case 0x80000046: + case 0x80000048: + case 0x8000004A: + case 0x8000004B: + case 0x8000004D: + case 0x8000004E: + case 0x80000050: + case 0x80000051: + case 0x80000053: + case 0x80000054: + case 0x80000056: + case 0x80000057: + case 0x80000059: + case 0x8000005c: + case 0x8000005f: + case 0x80000061: + case 0x80000062: + case 0x80000063: + case 0x80000064: + case 0x80000065: + printf("FAILURE: Do_High_Level_Tests, preset event code %#8.8x added to list of events to be started, but should not be allowed...\n", xEventCode); + break; + default: + printf("SUCCESS: Do_High_Level_Tests, preset event code %#8.8x added to list of events to be started...\n", xEventCode); + } + PAPI_Events[xNumEvents] = xEventCode; + xNumEvents++; + } + else { + switch(xEventCode) { + case 0x80000003: + case 0x80000004: + case 0x80000005: + case 0x80000007: + case 0x80000008: + case 0x8000000A: + case 0x8000000B: + case 0x8000000C: + case 0x8000000D: + case 0x8000000F: + case 0x80000010: + case 0x80000011: + case 0x80000012: + case 0x80000013: + case 0x80000014: + case 0x80000015: + case 0x80000016: + case 0x80000017: + case 0x80000018: + case 0x80000019: + case 0x8000001A: + case 0x8000001B: + case 0x8000001D: + case 0x8000001E: + case 0x8000001F: + case 0x80000020: + case 0x80000021: + case 0x80000022: + case 0x80000023: + case 0x80000024: + case 0x80000025: + case 0x80000026: + case 0x80000027: + case 0x80000028: + case 0x80000029: + case 0x8000002A: + case 0x8000002B: + case 0x8000002C: + case 0x8000002D: + case 0x8000002E: + case 0x8000002F: + case 0x80000031: + case 0x80000032: + case 0x80000033: + case 0x80000037: + case 0x80000038: + case 0x80000039: + case 0x8000003A: + case 0x8000003D: + case 0x80000042: + case 0x80000045: + case 0x80000046: + case 0x80000048: + case 0x8000004A: + case 0x8000004B: + case 0x8000004D: + case 0x8000004E: + case 0x80000050: + case 0x80000051: + case 0x80000053: + case 0x80000054: + case 0x80000056: + case 0x80000057: + case 0x80000059: + case 0x8000005c: + case 0x8000005f: + case 0x80000061: + case 0x80000062: + case 0x80000063: + case 0x80000064: + case 0x80000065: + printf("SUCCESS: Do_High_Level_Tests, preset event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); + break; + default: + printf("FAILURE: Do_High_Level_Tests, preset event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); + } + } + xEventId++; + } + if (xNumEvents) + Run_Cycle(xNumEvents); + } + + xEventId = 0; + while (xEventId < MaxNativeEventId) { + xNumEvents = 0; + while (xEventId <= MaxNativeEventId && xNumEvents < NumEventsPerSet) { + xEventCode = xEventId | 0x40000000; + xRC = PAPI_query_event(xEventCode); + if (xRC == PAPI_OK) { + switch(xEventCode) { + case 0x4000005C: + case 0x4000005D: + case 0x4000005E: + case 0x4000005F: + case 0x40000060: + case 0x40000061: + case 0x40000062: + case 0x40000063: + case 0x40000064: + case 0x4000007C: + case 0x4000007D: + case 0x4000007E: + case 0x4000007F: + case 0x40000080: + case 0x40000081: + case 0x40000082: + case 0x40000083: + case 0x40000084: + case 0x400000D8: + case 0x400000D9: + case 0x400000DA: + case 0x400000DB: + case 0x400000DC: + case 0x400000DD: + case 0x400000FD: + case 0x400000FE: + case 0x40000198: + case 0x40000199: + case 0x4000019A: + case 0x4000019B: + case 0x4000019C: + case 0x4000019D: + case 0x4000019E: + case 0x4000019F: + case 0x400001A0: + case 0x400001B8: + case 0x400001B9: + case 0x400001BA: + case 0x400001BB: + case 0x400001BC: + case 0x400001BD: + case 0x400001BE: + case 0x400001BF: + case 0x400001C0: + case 0x400001D2: + case 0x400001D3: + case 0x400001D4: + case 0x400001D5: + case 0x400001D6: + case 0x400001D7: + case 0x400001E6: + case 0x400001E7: + case 0x400001E8: + case 0x400001E9: + case 0x400001EA: + case 0x400001EB: + case 0x400001FE: + printf("FAILURE: Do_High_Level_Tests, native event code %#8.8x added to list of events to be started, but should not be allowed...\n", xEventCode); + break; + default: + printf("SUCCESS: Do_High_Level_Tests, native event code %#8.8x added to list of events to be started...\n", xEventCode); + } + PAPI_Events[xNumEvents] = xEventCode; + xNumEvents++; + } + else { + switch(xEventCode) { + case 0x4000005C: + case 0x4000005D: + case 0x4000005E: + case 0x4000005F: + case 0x40000060: + case 0x40000061: + case 0x40000062: + case 0x40000063: + case 0x40000064: + case 0x4000007C: + case 0x4000007D: + case 0x4000007E: + case 0x4000007F: + case 0x40000080: + case 0x40000081: + case 0x40000082: + case 0x40000083: + case 0x40000084: + case 0x400000D8: + case 0x400000D9: + case 0x400000DA: + case 0x400000DB: + case 0x400000DC: + case 0x400000DD: + case 0x400000FD: + case 0x400000FE: + case 0x40000198: + case 0x40000199: + case 0x4000019A: + case 0x4000019B: + case 0x4000019C: + case 0x4000019D: + case 0x4000019E: + case 0x4000019F: + case 0x400001A0: + case 0x400001B8: + case 0x400001B9: + case 0x400001BA: + case 0x400001BB: + case 0x400001BC: + case 0x400001BD: + case 0x400001BE: + case 0x400001BF: + case 0x400001C0: + case 0x400001D2: + case 0x400001D3: + case 0x400001D4: + case 0x400001D5: + case 0x400001D6: + case 0x400001D7: + case 0x400001E6: + case 0x400001E7: + case 0x400001E8: + case 0x400001E9: + case 0x400001EA: + case 0x400001EB: + case 0x400001FE: + printf("SUCCESS: Do_High_Level_Tests, native event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); + break; + default: + printf("FAILURE: Do_High_Level_Tests, native event code %#8.8x cannot be added to list of events to be started, xRC = %d...\n", xEventCode, xRC); + } + } + xEventId++; + } + if (xNumEvents) + Run_Cycle(xNumEvents); + } + + float xRtime, xPtime, xMflips, xMflops, xIpc; + long long xFlpins, xFlpops, xIns; + long long values[3] = {PAPI_FP_INS, PAPI_FP_OPS, PAPI_TOT_CYC}; + + xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); + + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flips started.\n"); + else + printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); + + FPUArith(); + + xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flips Rtime=%e Ptime=%e, Flpins=%lld, Mflips=%e\n", xRtime, xPtime, xFlpins, xMflips); + else + printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); + + FPUArith(); + FPUArith(); + + xRC = PAPI_flips(&xRtime, &xPtime, &xFlpins, &xMflips); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flips Rtime=%e Ptime=%e, Flpins=%lld, Mflips=%e\n", xRtime, xPtime, xFlpins, xMflips); + else + printf("FAILURE: PAPI_flips failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_stop_counters(values, 3); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_stop_counters stopped counters.\n"); + else + printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); + + + xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flops started.\n"); + else + printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); + + FPUArith(); + + xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flops Rtime=%e Ptime=%e Flpops=%lld Mflops=%e\n", xRtime, xPtime, xFlpops, xMflops); + else + printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); + + FPUArith(); + FPUArith(); + + xRC = PAPI_flops(&xRtime, &xPtime, &xFlpops, &xMflops); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_flops Rtime=%e Ptime=%e Flpops=%lld Mflops=%e\n", xRtime, xPtime, xFlpops, xMflops); + else + printf("FAILURE: PAPI_flops failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_stop_counters(values, 3); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_stop_counters stopped counters.\n"); + else + printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); + + xRC = PAPI_ipc(&xRtime, &xPtime, &xIns, &xIpc); + if (xRC == PAPI_ENOEVNT) + printf("SUCCESS: PAPI_ipc, no event found...\n"); + else + printf("FAILURE: PAPI_ipc failed, returned xRC=%d...\n", xRC); + + printf("==> Do_High_Level_Tests(): End of the main body...\n"); + + return; +} + + +/* + * Do_Multiplex_Tests + */ + +void Do_Multiplex_Tests(void) { + int xRC; + + printf("==> Do_Multiplex_Tests(): Beginning of the main body...\n"); + + xRC = PAPI_multiplex_init(); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_multiplex_init...\n"); + else + printf("FAILURE: PAPI_multiplex_init failed, returned xRC=%d...\n", xRC); + + printf("==> Do_Multiplex_Tests(): End of the main body...\n"); + + return; +} + + +void Run_Cycle(const int pNumEvents) { + int xRC; + +// BGP_UPC_Zero_Counter_Values(); + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_start_counters(PAPI_Events, pNumEvents); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_start_counters...\n"); + else + printf("FAILURE: PAPI_start_counters failed, returned xRC=%d...\n", xRC); + + Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + FPUArith(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); + + FPUArith(); + + xRC = PAPI_read_counters(PAPI_Counters, pNumEvents); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read_counters...\n"); + else + printf("FAILURE: PAPI_read_counters failed, returned xRC=%d...\n", xRC); + + Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + FPUArith(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); + + FPUArith(); + + Zero_Local_Counters(PAPI_Counters); + xRC = PAPI_accum_counters(PAPI_Counters, pNumEvents); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_accum_counters...\n"); + else + printf("FAILURE: PAPI_accum_counters failed, returned xRC=%d...\n", xRC); + + Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + FPUArith(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); + + FPUArith(); + + xRC = PAPI_read_counters(PAPI_Counters, pNumEvents); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_read_counters...\n"); + else + printf("FAILURE: PAPI_read_counters failed, returned xRC=%d...\n", xRC); + + Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + FPUArith(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); + + FPUArith(); + + xRC = PAPI_stop_counters(PAPI_Counters, pNumEvents); + if (xRC == PAPI_OK) + printf("SUCCESS: PAPI_stop_counters...\n"); + else + printf("FAILURE: PAPI_stop_counters failed, returned xRC=%d...\n", xRC); + + Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + FPUArith(); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, pNumEvents); + Print_PAPI_Counters_From_List(PAPI_Events, pNumEvents, PAPI_Counters); + + FPUArith(); + + return; +} + + +/* + * Zero_Local_Counters + */ + +void Zero_Local_Counters(long long* pCounters) { + int i; + for (i=0; i<255; i++) + pCounters[i] = 0; + + return; +} + + +/* + * FPU Arithmetic... + */ +void FPUArith(void) { + int i; + + printf("\n==> Start: Performing arithmetic...\n"); + register unsigned int zero = 0; + register double *x_p = &x[0]; + + for ( i = 0; i < 32; i++ ) + x[i] = 1.0; + + // Single Hummer Instructions: + + #if 1 + + asm volatile ("fabs 1,2"); + asm volatile ("fmr 1,2"); + asm volatile ("fnabs 1,2"); + asm volatile ("fneg 1,2"); + + asm volatile ("fadd 1,2,3"); + asm volatile ("fadds 1,2,3"); + asm volatile ("fdiv 1,2,3"); + asm volatile ("fdivs 1,2,3"); + asm volatile ("fmul 1,2,3"); + asm volatile ("fmuls 1,2,3"); + asm volatile ("fres 1,2"); + asm volatile ("frsqrte 1,2"); + //asm volatile ("fsqrt 1,2"); // gives exception + //asm volatile ("fsqrts 1,2"); // gives exception + asm volatile ("fsub 1,2,3"); + asm volatile ("fsubs 1,2,3"); + + asm volatile ("fmadd 3,4,5,6"); + asm volatile ("fmadds 3,4,5,6"); + asm volatile ("fmsub 3,4,5,6"); + asm volatile ("fmsubs 3,4,5,6"); + asm volatile ("fnmadd 3,4,5,6"); + asm volatile ("fnmadds 3,4,5,6"); + asm volatile ("fnmsub 3,4,5,6"); + asm volatile ("fnmsubs 3,4,5,6"); + + //asm volatile ("fcfid 5,6"); // invalid instruction + //asm volatile ("fctid 5,6"); // invalid instruction + //asm volatile ("fctidz 5,6"); // invalid instruction + asm volatile ("fctiw 5,6"); + asm volatile ("fctiwz 5,6"); + asm volatile ("frsp 5,6"); + + asm volatile ("fcmpo 0,1,2"); + asm volatile ("fcmpu 0,1,2"); + asm volatile ("fsel 0,1,2,3"); + + #endif + + #if 1 + + asm volatile("fpadd 9,10,11"); + asm volatile("fpsub 9,10,11"); + + #endif + + + #if 1 + + asm volatile("fpmul 23,24,25"); + asm volatile("fxmul 26, 27, 28"); + asm volatile("fxpmul 28, 29, 30"); + asm volatile("fxsmul 2, 3, 4"); + #endif + + #if 1 + + asm volatile("fpmadd 10,11,12,13"); + asm volatile("fpmsub 18, 19, 20, 21"); + asm volatile("fpnmadd 26, 27, 28, 29"); + asm volatile("fpnmsub 16,17,18,19"); + + asm volatile("fxmadd 10,11,12,13"); + asm volatile("fxmsub 18, 19, 20, 21"); + asm volatile("fxnmadd 26, 27, 28, 29"); + asm volatile("fxnmsub 16,17,18,19"); + + asm volatile("fxcpmadd 10,11,12,13"); + asm volatile("fxcpmsub 18, 19, 20, 21"); + asm volatile("fxcpnmadd 26, 27, 28, 29"); + asm volatile("fxcpnmsub 16,17,18,19"); + + asm volatile("fxcsmadd 10,11,12,13"); + asm volatile("fxcsmsub 18, 19, 20, 21"); + asm volatile("fxcsnmadd 26, 27, 28, 29"); + asm volatile("fxcsnmsub 16,17,18,19"); + + asm volatile("fxcpnpma 1,2,3,4"); + asm volatile("fxcsnpma 5,6,7,8"); + asm volatile("fxcpnsma 9,10,11,12"); + asm volatile("fxcsnsma 3,4,5,6"); + + asm volatile("fxcxnpma 9,10,11,12"); + asm volatile("fxcxnsma 8,9,10,11"); + asm volatile("fxcxma 3,4,5,6"); + asm volatile("fxcxnms 8,9,10,11"); + + #endif + + + #if 1 + + asm volatile("fpre 12, 13"); + asm volatile("fprsqrte 15, 16"); + asm volatile("fpsel 17, 18, 19, 20"); + asm volatile("fpctiw 1,2"); + asm volatile("fpctiwz 3,4"); + asm volatile("fprsp 5,6"); + asm volatile("fscmp 1,2,3"); + asm volatile("fpmr 1,2"); + asm volatile("fpneg 1,2"); + asm volatile("fpabs 1,2"); + asm volatile("fpnabs 1,2"); + asm volatile("fsmr 1,2"); + asm volatile("fsneg 1,2"); + asm volatile("fsabs 1,2"); + asm volatile("fsnabs 1,2"); + asm volatile("fxmr 1,2"); + asm volatile("fsmfp 1,2"); + asm volatile("fsmtp 1,2"); + + #endif + + #if 1 + asm volatile("lfdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + + asm volatile("lfsdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfsdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfssx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfssux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + + asm volatile("lfpsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfpsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfxsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfxsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + #endif + + #if 1 + asm volatile("lfpdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfpdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfxdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("lfxdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + #endif + + #if 1 + asm volatile("stfdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + + asm volatile("stfsdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfsdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfssx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + //asm volatile("stfssux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + + asm volatile("stfpsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfpsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfxsx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfxsux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + #endif + + #if 1 + asm volatile("stfpdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfpdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfxdx 16,%0,%1" : "+b"(x_p) : "b"(zero)); + asm volatile("stfxdux 16,%0,%1" : "+b"(x_p) : "b"(zero)); + #endif + printf("==> End: Performing arithmetic...\n"); + + return; +} + + +/* + * Print_Counters + */ +void Print_Counters(const int pEventSet) { + printf("\n***** Start Print Counter Values *****\n"); +// Print_Native_Counters_via_Buffer((BGP_UPC_Read_Counters_Struct_t*)Native_Buffer); +// Print_Native_Counters(); + Print_Native_Counters_for_PAPI_Counters(pEventSet); + Print_PAPI_Counters(pEventSet, PAPI_Counters); + printf("\n***** End Print Counter Values *****\n"); + + return; +} + + +/* + * Print_Native_Counters + */ + +void Print_Native_Counters() { + printf("\n***** Start Print of Native Counter Values *****\n"); + BGP_UPC_Print_Counter_Values(BGP_UPC_READ_EXCLUSIVE); + printf("***** End Print of Native Counter Values *****\n"); + + return; +} + + +/* + * Print_Native_Counters_for_PAPI_Counters + */ + +void Print_Native_Counters_for_PAPI_Counters(const int pEventSet) { + printf("\n***** Start Print of Native Counter Values for PAPI Counters *****\n"); + int xNumEvents = PAPI_num_events(pEventSet); + if (xNumEvents) { + List_PAPI_Events(pEventSet, PAPI_Events, &xNumEvents); + Print_Native_Counters_for_PAPI_Counters_From_List(PAPI_Events, xNumEvents); + } + else { + printf("No events are present in the event set.\n"); + } + printf("***** End Print of Native Counter Values for PAPI Counters *****\n"); + + return; +} + + +/* + * Print_Native_Counters_for_PAPI_Counters_From_List + */ +void Print_Native_Counters_for_PAPI_Counters_From_List(const int* pEvents, const int pNumEvents) { + int i, j, xRC; + char xName[256]; + BGP_UPC_Event_Id_t xNativeEventId; + PAPI_event_info_t xEventInfo; + +// BGP_UPC_Print_Counter_Values(); // DLH + for (i=0; inumber_of_counters); + printf("***** End Print of Native Counter Values *****\n"); + + return; +} + + +/* + * Print_PAPI_Counters + */ + +void Print_PAPI_Counters(const int pEventSet, const long long* pCounters) { + int i; + char xName[256]; + printf("\n***** Start Print of PAPI Counter Values *****\n"); +// printf("Print_PAPI_Counters: PAPI_Counters*=%p, pCounters*=%p\n", PAPI_Counters, pCounters); + int pNumEvents = PAPI_num_events(pEventSet); + printf("Number of Counters = %d\n", pNumEvents); + if (pNumEvents) { + printf(" Calculated Value Location Event Number Event Name\n"); + printf("-------------------- -------- ------------ --------------------------------------------\n"); + List_PAPI_Events(pEventSet, PAPI_Events, &pNumEvents); + for (i=0; irank); + printf("Core = %d\n", xTemp->core); + printf("UPC Number = %d\n", xTemp->upc_number); + printf("Number of Processes per UPC = %d\n", xTemp->number_processes_per_upc); + printf("User Mode = %d\n", (int) xTemp->mode); + printf("Location = %s\n", xTemp->location); + printf("\n***** End Print of Node Information *****\n\n"); + + return; +} + + +/* + * Read_Native_Counters + */ + +void Read_Native_Counters(const int pLength) { + + int xRC = BGP_UPC_Read_Counter_Values(Native_Buffer, pLength, BGP_UPC_READ_EXCLUSIVE); + if (xRC < 0) { + printf("FAILURE: BGP_UPC_Read_Counter_Values failed, xRC=%d...\n", xRC); + exit(1); + } + + return; +} + +/* + * Print_PAPI_Events + */ + +void Print_PAPI_Events(const int pEventSet) { + int i; + char xName[256]; + int pNumEvents = PAPI_num_events(pEventSet); + List_PAPI_Events(pEventSet, PAPI_Events, &pNumEvents); + for (i=0; i. + * and Phil Mucci + * This example verifies the accuracy of branch events + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define MAXEVENTS 4 +#define SLEEPTIME 100 +#define MINCOUNTS 100000 +#define MPX_TOLERANCE .20 + +int +main( int argc, char **argv ) +{ + PAPI_event_info_t info; + int i, j, retval; + int iters = 10000000; + double x = 1.1, y; + long long t1, t2; + long long values[MAXEVENTS], refvalues[MAXEVENTS]; + int sleep_time = SLEEPTIME; + double spread[MAXEVENTS]; + int nevents = MAXEVENTS; + int eventset = PAPI_NULL; + int events[MAXEVENTS]; + int quiet; + + /* Set quiet variable */ + quiet = tests_quiet( argc, argv ); + + /* Parse command line args */ + if ( argc > 1 ) { + if ( !strcmp( argv[1], "TESTS_QUIET" ) ) { + + } + else { + sleep_time = atoi( argv[1] ); + if ( sleep_time <= 0 ) + sleep_time = SLEEPTIME; + } + } + + events[0] = PAPI_BR_NTK; + events[1] = PAPI_BR_PRC; + events[2] = PAPI_BR_INS; + events[3] = PAPI_BR_MSP; + + /* Why were these disabled? + events[3]=PAPI_BR_CN; + events[4]=PAPI_BR_UCN; + events[5]=PAPI_BR_TKN; */ + + + for ( i = 0; i < MAXEVENTS; i++ ) { + values[i] = 0; + } + + if ( !quiet ) { + printf( "\nAccuracy check of branch presets.\n" ); + printf( "Comparing a measurement with separate measurements.\n\n" ); + } + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_create_eventset( &eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + +#ifdef MPX + if ( ( retval = PAPI_multiplex_init( ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval ); + + if ( ( retval = PAPI_set_multiplex( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); +#endif + + nevents = 0; + + for ( i = 0; i < MAXEVENTS; i++ ) { + if ( PAPI_query_event( events[i] ) != PAPI_OK ) + continue; + if ( PAPI_add_event( eventset, events[i] ) == PAPI_OK ) { + events[nevents] = events[i]; + nevents++; + } + } + + if ( nevents < 1 ) + test_skip( __FILE__, __LINE__, "Not enough events left...", 0 ); + + /* Find a reasonable number of iterations (each + * event active 20 times) during the measurement + */ + t2 = (long long)(10000 * 20) * nevents; /* Target: 10000 usec/multiplex, 20 repeats */ + if ( t2 > 30e6 ) + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); + + /* Measure one run */ + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + t1 = PAPI_get_real_usec( ) - t1; + + if ( t2 > t1 ) /* Scale up execution time to match t2 */ + iters = iters * ( int ) ( t2 / t1 ); + else if ( t1 > 30e6 ) /* Make sure execution time is < 30s per repeated test */ + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); + + x = 1.0; + + if ( !quiet ) + printf( "\nFirst run: Together.\n" ); + + t1 = PAPI_get_real_usec( ); + if ( ( retval = PAPI_start( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + y = do_flops3( x, iters, 1 ); + if ( ( retval = PAPI_stop( eventset, values ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + t2 = PAPI_get_real_usec( ); + + if ( !quiet ) { + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", ( y / ( double ) ( t2 - t1 ) ) ); + printf( "PAPI grouped measurement:\n" ); + } + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + if ( !quiet ) { + printf( "%20s = ", info.short_descr ); + printf( LLDFMT, values[j] ); + printf( "\n" ); + } + } + if ( !quiet ) + printf( "\n" ); + + + if ( ( retval = PAPI_remove_events( eventset, events, nevents ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_remove_events", retval ); + if ( ( retval = PAPI_destroy_eventset( &eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + eventset = PAPI_NULL; + if ( ( retval = PAPI_create_eventset( &eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + for ( i = 0; i < nevents; i++ ) { + + if ( ( retval = PAPI_cleanup_eventset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + if ( ( retval = PAPI_add_event( eventset, events[i] ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + x = 1.0; + + if ( !quiet ) + printf( "\nReference measurement %d (of %d):\n", i + 1, nevents ); + + t1 = PAPI_get_real_usec( ); + if ( ( retval = PAPI_start( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + y = do_flops3( x, iters, 1 ); + if ( ( retval = PAPI_stop( eventset, &refvalues[i] ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + t2 = PAPI_get_real_usec( ); + + if ( !quiet ) { + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", ( y / ( double ) ( t2 - t1 ) ) ); + } + PAPI_get_event_info( events[i], &info ); + if ( !quiet ) { + printf( "PAPI results:\n%20s = ", info.short_descr ); + printf( LLDFMT, refvalues[i] ); + printf( "\n" ); + } + } + if ( !quiet ) + printf( "\n" ); + + + if ( !quiet ) { + printf( "\n\nRelative accuracy:\n" ); + for ( j = 0; j < nevents; j++ ) + printf( " Event %.2d", j ); + printf( "\n" ); + } + + for ( j = 0; j < nevents; j++ ) { + spread[j] = abs( ( int ) ( refvalues[j] - values[j] ) ); + if ( values[j] ) + spread[j] /= ( double ) values[j]; + if ( !quiet ) + printf( "%10.3g ", spread[j] ); + /* Make sure that NaN get counted as errors */ + if ( spread[j] < MPX_TOLERANCE ) + i--; + else if ( refvalues[j] < MINCOUNTS ) /* Neglect inprecise results with low counts */ + i--; + } + if ( !quiet ) { + printf( "\n\n" ); + } + + if ( i ) { + test_fail( __FILE__, __LINE__, "Values outside threshold", i ); + } + + test_pass( __FILE__ ); + + return 0; +} + + diff --git a/src/ctests/burn.c b/src/ctests/burn.c new file mode 100644 index 0000000..6afcc49 --- /dev/null +++ b/src/ctests/burn.c @@ -0,0 +1,13 @@ +#include + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + (void)argc; + (void)argv; + + do_stuff( ); + return 0; +} diff --git a/src/ctests/byte_profile.c b/src/ctests/byte_profile.c new file mode 100644 index 0000000..cb86460 --- /dev/null +++ b/src/ctests/byte_profile.c @@ -0,0 +1,279 @@ +/* +* File: byte_profile.c +* Author: Dan Terpstra +* terpstra@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +/* This file profiles multiple events with byte level address resolution. + It's patterned after code suggested by John Mellor-Crummey, Rob Fowler, + and Nathan Tallent. + It is intended to illustrate the use of Multiprofiling on a very tight + block of code at byte level resolution of the instruction addresses. +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "prof_utils.h" + +#include "do_loops.h" + +#define PROFILE_ALL + +static const PAPI_hw_info_t *hw_info; + +static int num_events = 0; + +#define N (1 << 23) +#define T (10) + +double aa[N], bb[N]; +double s = 0, s2 = 0; + +static void +cleara( double a[N] ) +{ + int i; + + for ( i = 0; i < N; i++ ) { + a[i] = 0; + } +} + +static int +my_dummy( int i ) +{ + return ( i + 1 ); +} + +static void +my_main( void ) +{ + int i, j; + + for ( j = 0; j < T; j++ ) { + for ( i = 0; i < N; i++ ) { + bb[i] = 0; + } + cleara( aa ); + memset( aa, 0, sizeof ( aa ) ); + for ( i = 0; i < N; i++ ) { + s += aa[i] * bb[i]; + s2 += aa[i] * aa[i] + bb[i] * bb[i]; + } + } +} + +static int +do_profile( caddr_t start, unsigned long plength, unsigned scale, int thresh, + int bucket, unsigned int mask ) { + + int i, retval; + unsigned long blength; + int num_buckets,j=0; + + int num_bufs = num_events; + int event = num_events; + + int events[MAX_TEST_EVENTS]; + char header[BUFSIZ]; + + strncpy(header,"address\t\t",BUFSIZ); + + //= "address\t\t\tcyc\tins\tfp_ins\n"; + + for(i=0;imodel_string, "POWER6" ) != 0 ) { + printf( TAB1, "PAPI_TOT_INS:", ( values[0] )[--event] ); + } +#if defined(__powerpc__) + printf( TAB1, "PAPI_FP_INS", ( values[0] )[--event] ); +#else + if ( strcmp( hw_info->model_string, "Intel Pentium III" ) != 0 ) { + printf( TAB1, "PAPI_FP_OPS:", ( values[0] )[--event] ); + printf( TAB1, "PAPI_L2_TCM:", ( values[0] )[--event] ); + } +#endif + } + + for ( i = 0; i < num_events; i++ ) { + if ( ( retval = + PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, + EventSet, events[i], 0, + PAPI_PROFIL_POSIX ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + + if (!TESTS_QUIET) { + prof_head( blength, bucket, num_buckets, header ); + prof_out( start, num_events, bucket, num_buckets, scale ); + } + retval = prof_check( num_bufs, bucket, num_buckets ); + for ( i = 0; i < num_bufs; i++ ) { + free( profbuf[i] ); + } + return retval; +} + + + +int +main( int argc, char **argv ) +{ + long length; + int mask; + int retval; + const PAPI_exe_info_t *prginfo; + caddr_t start, end; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_OPS | MASK_L2_TCM; + +#if defined(__powerpc__) + if ( strcmp( hw_info->model_string, "POWER6" ) == 0 ) + mask = MASK_TOT_CYC | MASK_FP_INS; + else + mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_INS; +#endif + +#if defined(ITANIUM2) + mask = MASK_TOT_CYC | MASK_FP_OPS | MASK_L2_TCM | MASK_L1_DCM; +#endif + EventSet = add_test_events( &num_events, &mask, 0 ); + if (num_events==0) { + if (!quiet) printf("Trouble adding events\n"); + test_skip(__FILE__,__LINE__,"add_test_events",2); + } + values = allocate_test_space( 1, num_events ); + +/* profile the cleara and my_main address space */ + start = ( caddr_t ) cleara; + end = ( caddr_t ) my_dummy; + +/* Itanium and PowerPC64 processors return function descriptors instead + * of function addresses. You must dereference the descriptor to get the address. +*/ +#if defined(ITANIUM1) || defined(ITANIUM2) \ + || (defined(__powerpc64__) && (_CALL_ELF != 2)) + start = ( caddr_t ) ( ( ( struct fdesc * ) start )->ip ); + end = ( caddr_t ) ( ( ( struct fdesc * ) end )->ip ); + /* PPC64 Big Endian is ELF version 1 which uses function descriptors. + * PPC64 Little Endian is ELF version 2 which does not use + * function descriptors + */ +#endif + + /* call dummy so it doesn't get optimized away */ + retval = my_dummy( 1 ); + + length = end - start; + if ( length < 0 ) + test_fail( __FILE__, __LINE__, "Profile length < 0!", ( int ) length ); + + if (!quiet) { + prof_print_address( "Test case byte_profile: " + "Multi-event profiling at byte resolution.\n", + prginfo ); + prof_print_prof_info( start, end, THRESHOLD, event_name ); + } + + retval = do_profile( start, ( unsigned ) length, + FULL_SCALE * 2, THRESHOLD, + PAPI_PROFIL_BUCKET_32, mask ); + + remove_test_events( &EventSet, mask ); + + if (retval == 0) { + test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} + + + + diff --git a/src/ctests/calibrate.c b/src/ctests/calibrate.c new file mode 100644 index 0000000..462f40b --- /dev/null +++ b/src/ctests/calibrate.c @@ -0,0 +1,485 @@ +/* + Calibrate.c + A program to perform one or all of three tests to count flops. + Test 1. Inner Product: 2*n operations + for i = 1:n; a = a + x(i)*y(i); end + Test 2. Matrix Vector Product: 2*n^2 operations + for i = 1:n; for j = 1:n; x(i) = x(i) + a(i,j)*y(j); end; end; + Test 3. Matrix Matrix Multiply: 2*n^3 operations + for i = 1:n; for j = 1:n; for k = 1:n; c(i,j) = c(i,j) + a(i,k)*b(k,j); end; end; end; + + Supply a command line argument of 1, 2, or 3 to perform each test, or + no argument to perform all three. + + Each test initializes PAPI and presents a header with processor information. + Then it performs 500 iterations, printing result lines containing: + n, measured counts, theoretical counts, (measured - theory), % error + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define INDEX1 100 +#define INDEX5 500 + +#define MAX_WARN 10 +#define MAX_ERROR 80 +#define MAX_DIFF 14 + +/* + Extract and display hardware information for this processor. + (Re)Initialize PAPI_flops() and begin counting floating ops. +*/ +static void +headerlines( const char *title, int quiet ) +{ + + if ( !quiet ) { + printf( "\n%s:\n%8s %12s %12s %8s %8s\n", title, "i", "papi", "theory", + "diff", "%error" ); + printf( "-------------------------------------------------------------------------\n" ); + } +} + +/* + Read PAPI_flops. + Format and display results. + Compute error without using floating ops. +*/ +#if defined(mips) +#define FMA 1 +#elif (defined(sparc) && defined(sun)) +#define FMA 1 +#else +#define FMA 0 +#endif + +static void +resultline( int i, int j, int EventSet, int fail, int quiet ) +{ + float ferror = 0; + long long flpins = 0; + long long papi, theory; + int diff, retval; + char err_str[PAPI_MAX_STR_LEN]; + + retval = PAPI_stop( EventSet, &flpins ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + i++; /* convert to 1s base */ + theory = 2; + while ( j-- ) + theory *= i; /* theoretical ops */ + papi = flpins << FMA; + + diff = ( int ) ( papi - theory ); + + ferror = ( ( float ) abs( diff ) ) / ( ( float ) theory ) * 100; + + if (!quiet) { + printf( "%8d %12lld %12lld %8d %10.4f\n", i, papi, theory, diff, ferror ); + } + + if ( ferror > MAX_WARN && abs( diff ) > MAX_DIFF && i > 20 ) { + sprintf( err_str, "Calibrate: difference exceeds %d percent", MAX_WARN ); + test_warn( __FILE__, __LINE__, err_str, 0 ); + } + if (fail) { + if ( ferror > MAX_ERROR && abs( diff ) > MAX_DIFF && i > 20 ) { + sprintf( err_str, "Calibrate: error exceeds %d percent", MAX_ERROR ); + test_fail( __FILE__, __LINE__, err_str, PAPI_EMISC ); + } + } +} + + +static void +print_help( char **argv ) +{ + printf( "Usage: %s [-ivmdh] [-e event]\n", argv[0] ); + printf( "Options:\n\n" ); + printf( "\t-i Inner Product test.\n" ); + printf( "\t-v Matrix-Vector multiply test.\n" ); + printf( "\t-m Matrix-Matrix multiply test.\n" ); + printf( "\t-d Double precision data. Default is float.\n" ); + printf( "\t-e event Use as PAPI event instead of PAPI_FP_OPS\n" ); + printf( "\t-f Suppress failures\n" ); + printf( "\t-h Print this help message\n" ); + printf( "\n" ); + printf( "This test measures floating point operations for the specified test.\n" ); + printf( "Operations can be performed in single or double precision.\n" ); + printf( "Default operation is all three tests in single precision.\n" ); +} + +static float +inner_single( int n, float *x, float *y ) +{ + float aa = 0.0; + int i; + + for ( i = 0; i <= n; i++ ) + aa = aa + x[i] * y[i]; + return ( aa ); +} + +static double +inner_double( int n, double *x, double *y ) +{ + double aa = 0.0; + int i; + + for ( i = 0; i <= n; i++ ) + aa = aa + x[i] * y[i]; + return ( aa ); +} + +static void +vector_single( int n, float *a, float *x, float *y ) +{ + int i, j; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + y[i] = y[i] + a[i * n + j] * x[i]; +} + +static void +vector_double( int n, double *a, double *x, double *y ) +{ + int i, j; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + y[i] = y[i] + a[i * n + j] * x[i]; +} + +static void +matrix_single( int n, float *c, float *a, float *b ) +{ + int i, j, k; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + for ( k = 0; k <= n; k++ ) + c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; +} + +static void +matrix_double( int n, double *c, double *a, double *b ) +{ + int i, j, k; + + for ( i = 0; i <= n; i++ ) + for ( j = 0; j <= n; j++ ) + for ( k = 0; k <= n; k++ ) + c[i * n + j] = c[i * n + j] + a[i * n + k] * b[k * n + j]; +} + +static void +reset_flops( const char *title, int EventSet ) +{ + int retval; + char err_str[PAPI_MAX_STR_LEN]; + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + sprintf( err_str, "%s: PAPI_start", title ); + test_fail( __FILE__, __LINE__, err_str, retval ); + } +} + +int +main( int argc, char *argv[] ) +{ + extern void dummy( void * ); + + float aa, *a=NULL, *b=NULL, *c=NULL, *x=NULL, *y=NULL; + double aad, *ad=NULL, *bd=NULL, *cd=NULL, *xd=NULL, *yd=NULL; + int i, j, n; + int inner = 0; + int vector = 0; + int matrix = 0; + int double_precision = 0; + int fail = 1; + int retval = PAPI_OK; + char papi_event_str[PAPI_MIN_STR_LEN] = "PAPI_FP_OPS"; + int papi_event; + int EventSet = PAPI_NULL; + int quiet; + + /* Parse the input arguments */ + for ( i = 0; i < argc; i++ ) { + if ( strstr( argv[i], "-i" ) ) + inner = 1; + else if ( strstr( argv[i], "-f" ) ) + fail = 0; + else if ( strstr( argv[i], "-v" ) ) + vector = 1; + else if ( strstr( argv[i], "-m" ) ) + matrix = 1; + else if ( strstr( argv[i], "-e" ) ) { + if ( ( argv[i + 1] == NULL ) || ( strlen( argv[i + 1] ) == 0 ) ) { + print_help( argv ); + exit( 1 ); + } + strncpy( papi_event_str, argv[i + 1], sizeof ( papi_event_str ) - 1); + papi_event_str[sizeof ( papi_event_str )-1] = '\0'; + i++; + } else if ( strstr( argv[i], "-d" ) ) + double_precision = 1; + else if ( strstr( argv[i], "-h" ) ) { + print_help( argv ); + exit( 1 ); + } + } + + /* if no options specified, set all tests to TRUE */ + if ( inner + vector + matrix == 0 ) + inner = vector = matrix = 1; + + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( !quiet ) { + printf( "Initializing..." ); + } + + /* Initialize PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Translate name */ + retval = PAPI_event_name_to_code( papi_event_str, &papi_event ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + } + + if ( PAPI_query_event( papi_event ) != PAPI_OK ) { + test_skip( __FILE__, __LINE__, "PAPI_query_event", PAPI_ENOEVNT ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + if ( ( retval = PAPI_add_event( EventSet, papi_event ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if (!quiet) printf( "\n" ); + + retval = PAPI_OK; + + /* Inner Product test */ + if ( inner ) { + /* Allocate the linear arrays */ + if (double_precision) { + xd = malloc( INDEX5 * sizeof(double) ); + yd = malloc( INDEX5 * sizeof(double) ); + if ( !( xd && yd ) ) + retval = PAPI_ENOMEM; + } + else { + x = malloc( INDEX5 * sizeof(float) ); + y = malloc( INDEX5 * sizeof(float) ); + if ( !( x && y ) ) + retval = PAPI_ENOMEM; + } + + if ( retval == PAPI_OK ) { + headerlines( "Inner Product Test", quiet ); + + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n; i++ ) { + xd[i] = ( double ) rand( ) * ( double ) 1.1; + yd[i] = ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n; i++ ) { + x[i] = ( float ) rand( ) * ( float ) 1.1; + y[i] = ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + reset_flops( "Inner Product Test", EventSet ); + + /* do the multiplication */ + if ( double_precision ) { + aad = inner_double( n, xd, yd ); + dummy( ( void * ) &aad ); + } else { + aa = inner_single( n, x, y ); + dummy( ( void * ) &aa ); + } + resultline( n, 1, EventSet, fail, quiet ); + } + } + } + if (double_precision) { + free( xd ); + free( yd ); + } else { + free( x ); + free( y ); + } + } + + /* Matrix Vector test */ + if ( vector && retval != PAPI_ENOMEM ) { + /* Allocate the needed arrays */ + if (double_precision) { + ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); + xd = malloc( INDEX5 * sizeof(double) ); + yd = malloc( INDEX5 * sizeof(double) ); + if ( !( ad && xd && yd ) ) + retval = PAPI_ENOMEM; + } else { + a = malloc( INDEX5 * INDEX5 * sizeof(float) ); + x = malloc( INDEX5 * sizeof(float) ); + y = malloc( INDEX5 * sizeof(float) ); + if ( !( a && x && y ) ) + retval = PAPI_ENOMEM; + } + + if ( retval == PAPI_OK ) { + headerlines( "Matrix Vector Test", quiet ); + + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n; i++ ) { + yd[i] = 0.0; + xd[i] = ( double ) rand( ) * ( double ) 1.1; + for ( j = 0; j <= n; j++ ) + ad[i * n + j] = + ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n; i++ ) { + y[i] = 0.0; + x[i] = ( float ) rand( ) * ( float ) 1.1; + for ( j = 0; j <= n; j++ ) + a[i * n + j] = + ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + reset_flops( "Matrix Vector Test", EventSet ); + + /* compute the resultant vector */ + if ( double_precision ) { + vector_double( n, ad, xd, yd ); + dummy( ( void * ) yd ); + } else { + vector_single( n, a, x, y ); + dummy( ( void * ) y ); + } + resultline( n, 2, EventSet, fail, quiet ); + } + } + } + if (double_precision) { + free( ad ); + free( xd ); + free( yd ); + } else { + free( a ); + free( x ); + free( y ); + } + } + + /* Matrix Multiply test */ + if ( matrix && retval != PAPI_ENOMEM ) { + /* Allocate the needed arrays */ + if (double_precision) { + ad = malloc( INDEX5 * INDEX5 * sizeof(double) ); + bd = malloc( INDEX5 * INDEX5 * sizeof(double) ); + cd = malloc( INDEX5 * INDEX5 * sizeof(double) ); + if ( !( ad && bd && cd ) ) + retval = PAPI_ENOMEM; + } else { + a = malloc( INDEX5 * INDEX5 * sizeof(float) ); + b = malloc( INDEX5 * INDEX5 * sizeof(float) ); + c = malloc( INDEX5 * INDEX5 * sizeof(float) ); + if ( !( a && b && c ) ) + retval = PAPI_ENOMEM; + } + + + if ( retval == PAPI_OK ) { + headerlines( "Matrix Multiply Test", quiet ); + + /* step through the different array sizes */ + for ( n = 0; n < INDEX5; n++ ) { + if ( n < INDEX1 || ( ( n + 1 ) % 50 ) == 0 ) { + + /* Initialize the needed arrays at this size */ + if ( double_precision ) { + for ( i = 0; i <= n * n + n; i++ ) { + cd[i] = 0.0; + ad[i] = ( double ) rand( ) * ( double ) 1.1; + bd[i] = ( double ) rand( ) * ( double ) 1.1; + } + } else { + for ( i = 0; i <= n * n + n; i++ ) { + c[i] = 0.0; + a[i] = ( float ) rand( ) * ( float ) 1.1; + b[i] = ( float ) rand( ) * ( float ) 1.1; + } + } + + /* reset PAPI flops count */ + reset_flops( "Matrix Multiply Test", EventSet ); + + /* compute the resultant matrix */ + if ( double_precision ) { + matrix_double( n, cd, ad, bd ); + dummy( ( void * ) c ); + } else { + matrix_single( n, c, a, b ); + dummy( ( void * ) c ); + } + resultline( n, 3, EventSet, fail, quiet ); + } + } + } + if (double_precision) { + free( ad ); + free( bd ); + free( cd ); + } else { + free( a ); + free( b ); + free( c ); + } + } + + /* exit with status code */ + if ( retval == PAPI_ENOMEM ) { + test_fail( __FILE__, __LINE__, "malloc", retval ); + } + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/ctests/case1.c b/src/ctests/case1.c new file mode 100644 index 0000000..02fcbd7 --- /dev/null +++ b/src/ctests/case1.c @@ -0,0 +1,73 @@ +/* From Dave McNamara at PSRV. Thanks! */ + +/* If you try to add an event that doesn't exist, you get the correct error +message, yet you get subsequent Seg. Faults when you try to do PAPI_start and +PAPI_stop. I would expect some bizarre behavior if I had no events added to the +event set and then tried to PAPI_start but if I had successfully added one +event, then the 2nd one get an error when I tried to add it, is it possible for +PAPI_start to work but just count the first event? +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + double c, a = 0.999, b = 1.001; + int n = 1000; + int EventSet = PAPI_NULL; + int retval; + int i, j = 0; + long long g1[2]; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( PAPI_query_event( PAPI_L2_TCM ) == PAPI_OK ) + j++; + + if ( j == 1 && + ( retval = PAPI_add_event( EventSet, PAPI_L2_TCM ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + j--; /* The event was not added */ + } + + i = j; + if ( PAPI_query_event( PAPI_L2_DCM ) == PAPI_OK ) + j++; + + if ( j == ( i + 1 ) && + ( retval = PAPI_add_event( EventSet, PAPI_L2_DCM ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + j--; /* The event was not added */ + } + + if ( j ) { + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + for ( i = 0; i < n; i++ ) { + c = a * b; + } + if (!TESTS_QUIET) fprintf(stdout,"c=%lf\n",c); + + if ( ( retval = PAPI_stop( EventSet, g1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/case2.c b/src/ctests/case2.c new file mode 100644 index 0000000..ee3f8bc --- /dev/null +++ b/src/ctests/case2.c @@ -0,0 +1,82 @@ +/* From Dave McNamara at PSRV. Thanks! */ + +/* If an event is countable but you've exhausted the counter resources +and you try to add an event, it seems subsequent PAPI_start and/or +PAPI_stop will causes a Seg. Violation. + + I got around this by calling PAPI to get the # of countable events, +then making sure that I didn't try to add more than these number of +events. I still have a problem if someone adds Level 2 cache misses +and then adds FLOPS 'cause I didn't count FLOPS as actually requiring +2 counters. */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + double c, a = 0.999, b = 1.001; + int n = 1000; + int EventSet = PAPI_NULL; + int retval; + int j = 0, i; + long long g1[3]; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( PAPI_query_event( PAPI_BR_CN ) == PAPI_OK ) + j++; + + if ( j == 1 && + ( retval = PAPI_add_event( EventSet, PAPI_BR_CN ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + i = j; + if ( PAPI_query_event( PAPI_TOT_CYC ) == PAPI_OK ) + j++; + + if ( j == ( i + 1 ) && + ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + i = j; + if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) + j++; + + if ( j == ( i + 1 ) && + ( retval = PAPI_add_event( EventSet, PAPI_TOT_INS ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if ( j ) { + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + for ( i = 0; i < n; i++ ) { + c = a * b; + } + if (!TESTS_QUIET) fprintf(stdout,"c=%lf\n",c); + if ( ( retval = PAPI_stop( EventSet, g1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/child_overflow.c b/src/ctests/child_overflow.c new file mode 100644 index 0000000..8cb35e4 --- /dev/null +++ b/src/ctests/child_overflow.c @@ -0,0 +1,174 @@ +/* + * Test PAPI with fork() and exec(). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define MAX_EVENTS 3 + +static int Event[MAX_EVENTS] = { + PAPI_TOT_CYC, + PAPI_FP_INS, + PAPI_FAD_INS, +}; + +static int Threshold[MAX_EVENTS] = { + 8000000, + 4000000, + 4000000, +}; + +static struct timeval start, last; +static long count, total; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + count++; + total++; +} + + + + + +static void +print_rate( const char *str ) +{ + static int last_count = -1; + struct timeval now; + double st_secs, last_secs; + + gettimeofday( &now, NULL ); + st_secs = ( double ) ( now.tv_sec - start.tv_sec ) + + ( ( double ) ( now.tv_usec - start.tv_usec ) ) / 1000000.0; + last_secs = ( double ) ( now.tv_sec - last.tv_sec ) + + ( ( double ) ( now.tv_usec - last.tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%d] %s, time = %.3f, total = %ld, last = %ld, rate = %.1f/sec\n", + getpid( ), str, st_secs, total, count, + ( ( double ) count ) / last_secs ); + } + + if ( last_count != -1 ) { + if ( count < .1 * last_count ) { + test_fail( __FILE__, __LINE__, "Interrupt rate changed!", 1 ); + exit( 1 ); + } + } + last_count = ( int ) count; + count = 0; + last = now; +} + +static void +run( const char *str, int len ) +{ + int n; + + for ( n = 1; n <= len; n++ ) { + do_cycles( 1 ); + print_rate( str ); + } +} + +int +main( int argc, char **argv ) +{ + int quiet,retval; + int ev, EventSet = PAPI_NULL; + int num_events; + const char *name = "unknown"; + + /* Used to be able to set this via command line */ + num_events=1; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + do_cycles( 1 ); + + /* zero out the count fields */ + gettimeofday( &start, NULL ); + last = start; + count = 0; + total = 0; + + /* Initialize PAPI */ + retval=PAPI_library_init( PAPI_VER_CURRENT ); + if (retval!=PAPI_VER_CURRENT) { + test_fail( name, __LINE__, "PAPI_library_init failed", 1 ); + } + + name = argv[0]; + if (!quiet) { + printf( "[%d] %s, num_events = %d\n", getpid(), + name, num_events ); + } + + /* Set up eventset */ + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_create_eventset failed", 1 ); + } + + /* Add events */ + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_add_event( EventSet, Event[ev] ) != PAPI_OK ) { + if (!quiet) printf("Trouble adding event.\n"); + test_skip( name, __LINE__, "PAPI_add_event failed", 1 ); + } + } + + /* Set up overflow handler */ + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_overflow( EventSet, Event[ev], + Threshold[ev], 0, my_handler ) + != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_overflow failed", 1 ); + } + } + + /* Start the eventset */ + if ( PAPI_start( EventSet ) != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_start failed", 1 ); + } + + /* Generate some workload */ + run( name, 3 ); + + if (!quiet) { + printf("[%d] %s, %s\n", getpid(), name, "stop"); + } + + /* Stop measuring */ + if ( PAPI_stop( EventSet, NULL ) != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_stop failed", 1 ); + } + + if (!quiet) { + printf("[%d] %s, %s\n", getpid(), name, "end"); + } + + test_pass(__FILE__); + + return 0; +} diff --git a/src/ctests/clockres_pthreads.c b/src/ctests/clockres_pthreads.c new file mode 100644 index 0000000..ae659ee --- /dev/null +++ b/src/ctests/clockres_pthreads.c @@ -0,0 +1,100 @@ +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "clockcore.h" + +void * +pthread_main( void *arg ) +{ + ( void ) arg; + int retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + retval=clockcore( TESTS_QUIET ); + if (retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "clockcore failure", retval ); + } + + retval = PAPI_unregister_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + } + return NULL; +} + +int +main( int argc, char **argv ) +{ + pthread_t t1, t2, t3, t4; + pthread_attr_t attr; + int retval; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + if (( retval = PAPI_library_init( PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )(void) ) (pthread_self) ); + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) { + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + else { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + } + + if ( !TESTS_QUIET ) { + printf( "Test case: Clock latency and resolution.\n" ); + printf( "Note: Virtual timers are proportional to # CPUs.\n" ); + printf( "------------------------------------------------\n" ); + } + + pthread_attr_init( &attr ); + +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif + +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) { + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); + } +#endif + + if (pthread_create( &t1, &attr, pthread_main, NULL )) { + test_fail(__FILE__, __LINE__, "cannot create thread", retval); + } + + if (pthread_create( &t2, &attr, pthread_main, NULL )) { + test_fail(__FILE__, __LINE__, "cannot create thread", retval); + } + + if (pthread_create( &t3, &attr, pthread_main, NULL )) { + test_fail(__FILE__, __LINE__, "cannot create thread", retval); + } + + if (pthread_create( &t4, &attr, pthread_main, NULL )) { + test_fail(__FILE__, __LINE__, "cannot create thread", retval); + } + + pthread_main( NULL ); + + pthread_join( t1, NULL ); + pthread_join( t2, NULL ); + pthread_join( t3, NULL ); + pthread_join( t4, NULL ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/cmpinfo.c b/src/ctests/cmpinfo.c new file mode 100644 index 0000000..0d68bfc --- /dev/null +++ b/src/ctests/cmpinfo.c @@ -0,0 +1,61 @@ +/* +* File: cmpinfo.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + + const PAPI_component_info_t *cmpinfo; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", retval ); + + if (!TESTS_QUIET) { + printf( "name: %s\n", cmpinfo->name ); + printf( "component_version: %s\n", cmpinfo->version ); + printf( "support_version: %s\n", cmpinfo->support_version ); + printf( "kernel_version: %s\n", cmpinfo->kernel_version ); + printf( "num_cntrs: %d\n", cmpinfo->num_cntrs ); + printf( "num_mpx_cntrs: %d\n", cmpinfo->num_mpx_cntrs ); + printf( "num_preset_events: %d\n", cmpinfo->num_preset_events ); /* Number of counters the component supports */ + printf( "num_native_events: %d\n", cmpinfo->num_native_events ); /* Number of counters the component supports */ + printf( "default_domain: %#x (%s)\n", cmpinfo->default_domain, + stringify_all_domains( cmpinfo->default_domain ) ); + printf( "available_domains: %#x (%s)\n", cmpinfo->available_domains, stringify_all_domains( cmpinfo->available_domains ) ); /* Available domains */ + printf( "default_granularity: %#x (%s)\n", cmpinfo->default_granularity, + stringify_granularity( cmpinfo->default_granularity ) ); + /* The default granularity when this component is used */ + printf( "available_granularities: %#x (%s)\n", cmpinfo->available_granularities, stringify_all_granularities( cmpinfo->available_granularities ) ); /* Available granularities */ + printf( "hardware_intr_sig: %d\n", cmpinfo->hardware_intr_sig ); printf( "hardware_intr: %d\n", cmpinfo->hardware_intr ); /* Needs hw overflow intr to be emulated in software */ + printf( "precise_intr: %d\n", cmpinfo->precise_intr ); /* Performance interrupts happen precisely */ + printf( "posix1b_timers: %d\n", cmpinfo->posix1b_timers ); /* Performance interrupts happen precisely */ + printf( "kernel_profile: %d\n", cmpinfo->kernel_profile ); /* Needs kernel profile support (buffered interrupts) to be emulated */ + printf( "kernel_multiplex: %d\n", cmpinfo->kernel_multiplex ); /* In kernel multiplexing */ + printf( "fast_counter_read: %d\n", cmpinfo->fast_counter_read ); /* Has a fast counter read */ + printf( "fast_real_timer: %d\n", cmpinfo->fast_real_timer ); /* Has a fast real timer */ + printf( "fast_virtual_timer: %d\n", cmpinfo->fast_virtual_timer ); /* Has a fast virtual timer */ + printf( "attach: %d\n", cmpinfo->attach ); /* Supports attach */ + printf( "attach_must_ptrace: %d\n", cmpinfo->attach_must_ptrace ); /* */ + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/code2name.c b/src/ctests/code2name.c new file mode 100644 index 0000000..3f07c3a --- /dev/null +++ b/src/ctests/code2name.c @@ -0,0 +1,157 @@ +/* This file performs the following test: event_code_to_name */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +static void +test_continue( const char *call, int retval ) +{ + if (!TESTS_QUIET) { + printf( "Expected error in %s: %s\n", call, + PAPI_strerror(retval) ); + } +} + +int +main( int argc, char **argv ) +{ + int retval; + int code = PAPI_TOT_CYC, last; + char event_name[PAPI_MAX_STR_LEN]; + const PAPI_component_info_t *cmp_info; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if (!quiet) { + printf( "Test case code2name.c: " + "Check limits and indexing of event tables.\n"); + printf( "Looking for PAPI_TOT_CYC...\n" ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!quiet) printf( "Found |%s|\n", event_name ); + + code = PAPI_FP_OPS; + + if (!quiet) { + printf( "Looking for highest defined preset event " + "(PAPI_FP_OPS): %#x...\n",code ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + if (!quiet) printf( "Found |%s|\n", event_name ); + + code = PAPI_PRESET_MASK | ( PAPI_MAX_PRESET_EVENTS - 1 ); + + if (!quiet) { + printf( "Looking for highest allocated preset event:" + " %#x...\n", code ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_continue( "PAPI_event_code_to_name", retval ); + } + else { + if (!quiet) printf( "Found |%s|\n", event_name ); + } + + code = PAPI_PRESET_MASK | ( unsigned int ) PAPI_NATIVE_AND_MASK; + + if (!quiet) { + printf( "Looking for highest possible preset event:" + " %#x...\n", code ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + + if ( retval != PAPI_OK ) { + test_continue( "PAPI_event_code_to_name", retval ); + } + else { + if (!quiet) printf( "Found |%s|\n", event_name ); + } + + /* Find the first defined native event in component 0 */ + /* For platform independence, always ASK FOR the first event */ + /* Don't just assume it'll be the first numeric value */ + code = PAPI_NATIVE_MASK; + PAPI_enum_event( &code, PAPI_ENUM_FIRST ); + + if (!quiet) { + printf( "Looking for first native event: %#x...\n", code ); + } + + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Could not find first native event\n"); + test_skip( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + else { + if (!quiet) printf( "Found |%s|\n", event_name ); + } + + /* Find the last defined native event */ + + /* FIXME: hardcoded cmp 0 */ + cmp_info = PAPI_get_component_info( 0 ); + if ( cmp_info == NULL ) { + test_fail( __FILE__, __LINE__, + "PAPI_get_component_info", PAPI_ECMP ); + } + + code = PAPI_NATIVE_MASK; + last = code; + PAPI_enum_event( &code, PAPI_ENUM_FIRST ); + + while ( PAPI_enum_event( &code, PAPI_ENUM_EVENTS ) == PAPI_OK ) { + last=code; + } + + code = last; + if (!quiet) printf( "Looking for last native event: %#x...\n", code ); + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + else { + if (!quiet) printf( "Found |%s|\n", event_name ); + } + + /* Highly doubtful we have this many natives */ + /* Turn on all bits *except* PRESET bit and COMPONENT bits */ + code = PAPI_PRESET_AND_MASK; + if (!quiet) printf( "Looking for highest definable native event: %#x...\n", code ); + retval = PAPI_event_code_to_name( code, event_name ); + if ( retval != PAPI_OK ) { + test_continue( "PAPI_event_code_to_name", retval ); + } + else { + if (!quiet) printf( "Found |%s|\n", event_name ); + } + + if ( ( retval == PAPI_ENOCMP) || ( retval == PAPI_ENOEVNT ) || ( retval == PAPI_OK ) ) { + test_pass( __FILE__ ); + } + + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", PAPI_EBUG ); + + return 1; +} diff --git a/src/ctests/data_range.c b/src/ctests/data_range.c new file mode 100644 index 0000000..c81a277 --- /dev/null +++ b/src/ctests/data_range.c @@ -0,0 +1,264 @@ +/* +* File: data_range.c +* Author: Dan Terpstra +* terpstra@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: */ +/* exercise the Itanium data address range interface */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NUM 16384 + +static void init_array( void ); +static int do_malloc_work( long loop ); +static int do_static_work( long loop ); +static void measure_load_store( caddr_t start, caddr_t end ); +static void measure_event( int index, PAPI_option_t * option ); + +int *parray1, *parray2, *parray3; +int array1[NUM], array2[NUM], array3[NUM]; +char event_name[2][PAPI_MAX_STR_LEN]; +int PAPI_event[2]; +int EventSet = PAPI_NULL; + +int +main( int argc, char **argv ) +{ + int retval; + const PAPI_exe_info_t *prginfo = NULL; + const PAPI_hw_info_t *hw_info; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + +#if !defined(ITANIUM2) && !defined(ITANIUM3) + test_skip( __FILE__, __LINE__, "Currently only works on itanium2", 0 ); + exit( 1 ); +#endif + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + init_array( ); + printf( "Malloc'd array pointers: %p %p %p\n", &parray1, &parray2, + &parray3 ); + printf( "Malloc'd array addresses: %p %p %p\n", parray1, parray2, + parray3 ); + printf( "Static array addresses: %p %p %p\n", &array1, &array2, + &array3 ); + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + prginfo = PAPI_get_executable_info( ); + if ( prginfo == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + +#if defined(linux) && defined(__ia64__) + sprintf( event_name[0], "loads_retired" ); + sprintf( event_name[1], "stores_retired" ); + PAPI_event_name_to_code( event_name[0], &PAPI_event[0] ); + PAPI_event_name_to_code( event_name[1], &PAPI_event[1] ); +#else + test_skip( __FILE__, __LINE__, "only works for Itanium", PAPI_ENOSUPP ); +#endif + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + +/***************************************************************************************/ + printf + ( "\n\nMeasure loads and stores on the pointers to the allocated arrays\n" ); + printf( "Expected loads: %d; Expected stores: 0\n", NUM * 2 ); + printf + ( "These loads result from accessing the pointers to compute array addresses.\n" ); + printf + ( "They will likely disappear with higher levels of optimization.\n" ); + + measure_load_store( ( caddr_t ) & parray1, ( caddr_t ) ( &parray1 + 1 ) ); + measure_load_store( ( caddr_t ) & parray2, ( caddr_t ) ( &parray2 + 1 ) ); + measure_load_store( ( caddr_t ) & parray3, ( caddr_t ) ( &parray3 + 1 ) ); +/***************************************************************************************/ + printf + ( "\n\nMeasure loads and stores on the allocated arrays themselves\n" ); + printf( "Expected loads: %d; Expected stores: %d\n", NUM, NUM ); + + measure_load_store( ( caddr_t ) parray1, ( caddr_t ) ( parray1 + NUM ) ); + measure_load_store( ( caddr_t ) parray2, ( caddr_t ) ( parray2 + NUM ) ); + measure_load_store( ( caddr_t ) parray3, ( caddr_t ) ( parray3 + NUM ) ); +/***************************************************************************************/ + printf( "\n\nMeasure loads and stores on the static arrays\n" ); + printf + ( "These values will differ from the expected values by the size of the offsets.\n" ); + printf( "Expected loads: %d; Expected stores: %d\n", NUM, NUM ); + + measure_load_store( ( caddr_t ) array1, ( caddr_t ) ( array1 + NUM ) ); + measure_load_store( ( caddr_t ) array2, ( caddr_t ) ( array2 + NUM ) ); + measure_load_store( ( caddr_t ) array3, ( caddr_t ) ( array3 + NUM ) ); +/***************************************************************************************/ + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy", retval ); + + free( parray1 ); + free( parray2 ); + free( parray3 ); + + test_pass( __FILE__ ); + + return 0; + +} + +static void +measure_load_store( caddr_t start, caddr_t end ) +{ + PAPI_option_t option; + int retval; + + /* set up the optional address structure for starting and ending data addresses */ + option.addr.eventset = EventSet; + option.addr.start = start; + option.addr.end = end; + + if ( ( retval = PAPI_set_opt( PAPI_DATA_ADDRESS, &option ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt(PAPI_DATA_ADDRESS)", + retval ); + + measure_event( 0, &option ); + measure_event( 1, &option ); +} + +static void +measure_event( int index, PAPI_option_t * option ) +{ + int retval; + long long value; + + if ( ( retval = PAPI_add_event( EventSet, PAPI_event[index] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + if ( index == 0 ) { +/* if ((retval = PAPI_get_opt(PAPI_DATA_ADDRESS, option)) != PAPI_OK) + test_fail(__FILE__, __LINE__, "PAPI_get_opt(PAPI_DATA_ADDRESS)", retval); +*/ + printf + ( "Requested Start Address: %p; Start Offset: %#5x; Actual Start Address: %p\n", + option->addr.start, option->addr.start_off, + option->addr.start - option->addr.start_off ); + printf + ( "Requested End Address: %p; End Offset: %#5x; Actual End Address: %p\n", + option->addr.end, option->addr.end_off, + option->addr.end + option->addr.end_off ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + do_malloc_work( NUM ); + do_static_work( NUM ); + retval = PAPI_stop( EventSet, &value ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + printf( "%s: %lld\n", event_name[index], value ); + + if ( ( retval = + PAPI_remove_event( EventSet, PAPI_event[index] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event", retval ); +} + +static void +init_array( void ) +{ + parray1 = ( int * ) malloc( NUM * sizeof ( int ) ); + if ( parray1 == NULL ) + test_fail( __FILE__, __LINE__, "No memory available!\n", 0 ); + memset( parray1, 0x0, NUM * sizeof ( int ) ); + + parray2 = ( int * ) malloc( NUM * sizeof ( int ) ); + if ( parray2 == NULL ) + test_fail( __FILE__, __LINE__, "No memory available!\n", 0 ); + memset( parray2, 0x0, NUM * sizeof ( int ) ); + + parray3 = ( int * ) malloc( NUM * sizeof ( int ) ); + if ( parray3 == NULL ) + test_fail( __FILE__, __LINE__, "No memory available!\n", 0 ); + memset( parray3, 0x0, NUM * sizeof ( int ) ); + +} + +static int +do_static_work( long loop ) +{ + int i; + int sum = 0; + + for ( i = 0; i < loop; i++ ) { + array1[i] = i; + sum += array1[i]; + } + + for ( i = 0; i < loop; i++ ) { + array2[i] = i; + sum += array2[i]; + } + + for ( i = 0; i < loop; i++ ) { + array3[i] = i; + sum += array3[i]; + } + + return sum; +} + +static int +do_malloc_work( long loop ) +{ + int i; + int sum = 0; + + for ( i = 0; i < loop; i++ ) { + parray1[i] = i; + sum += parray1[i]; + } + + for ( i = 0; i < loop; i++ ) { + parray2[i] = i; + sum += parray2[i]; + } + + for ( i = 0; i < loop; i++ ) { + parray3[i] = i; + sum += parray3[i]; + } + + return sum; +} diff --git a/src/ctests/derived.c b/src/ctests/derived.c new file mode 100644 index 0000000..b46edce --- /dev/null +++ b/src/ctests/derived.c @@ -0,0 +1,124 @@ +/* This file performs the following test: start, stop with a derived event */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define EVENTSLEN 2 + +unsigned int PAPI_events[EVENTSLEN] = { 0, 0 }; +static const int PAPI_events_len = 1; + +int +main( int argc, char **argv ) +{ + int retval, tmp; + int EventSet = PAPI_NULL; + int i; + PAPI_event_info_t info; + long long values; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (!quiet) { + printf( "Test case %s: start, stop with a derived counter.\n", + __FILE__ ); + printf( "------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n\n", tmp, + stringify_granularity( tmp ) ); + } + + i = PAPI_PRESET_MASK; + do { + if ( PAPI_get_event_info( i, &info ) == PAPI_OK ) { + if ( info.count > 1 ) { + PAPI_events[0] = ( unsigned int ) info.event_code; + break; + } + } + } while ( PAPI_enum_event( &i, 0 ) == PAPI_OK ); + + if ( PAPI_events[0] == 0 ) { + test_skip(__FILE__, __LINE__, "No events found", 0); + } + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + for ( i = 0; i < PAPI_events_len; i++ ) { + PAPI_event_code_to_name( ( int ) PAPI_events[i], event_name ); + if ( !quiet ) { + printf( "Adding %s\n", event_name ); + } + retval = PAPI_add_event( EventSet, ( int ) PAPI_events[i] ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + } + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + if (!quiet) printf( "Running do_stuff().\n" ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if (!quiet) { + + sprintf( add_event_str, "%-12s : \t", event_name ); + printf( TAB1, add_event_str, values ); + printf( "------------------------------------------------\n" ); + } + + retval = PAPI_cleanup_eventset( EventSet ); /* JT */ + if ( retval != PAPI_OK ) { + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + } + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + } + + if (!quiet) printf( "Verification: Does it produce a non-zero value?\n" ); + + if ( values != 0 ) { + if (!quiet) { + printf( "Yes: " ); + printf( LLDFMT, values ); + printf( "\n" ); + } + } + else { + test_fail(__FILE__,__LINE__, "Validation", 1 ); + } + + test_pass(__FILE__); + + return 0; +} diff --git a/src/ctests/describe.c b/src/ctests/describe.c new file mode 100644 index 0000000..e018799 --- /dev/null +++ b/src/ctests/describe.c @@ -0,0 +1,118 @@ +/* From Paul Drongowski at HP. Thanks. */ + +/* I have not been able to call PAPI_describe_event without + incurring a segv, including the sample code on the man page. + I noticed that PAPI_describe_event is not exercised by the + PAPI test programs, so I haven't been able to check the + function call using known good code. (Or steal your code + for that matter. :-) +*/ + +/* PAPI_describe_event has been deprecated in PAPI 3, since + its functionality exists in other API calls. Below shows + several ways that this call was used, with replacement + code compatible with PAPI 3. +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + int retval; + long long g1[2]; + int eventcode = PAPI_TOT_INS; + PAPI_event_info_t info, info1, info2; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + if ( ( retval = PAPI_query_event( eventcode ) ) != PAPI_OK ) { + if (!quiet) printf("Trouble checking event\n"); + test_skip( __FILE__, __LINE__, "PAPI_query_event(PAPI_TOT_INS)", + retval ); + } + + if ( ( retval = PAPI_add_event( EventSet, eventcode ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + if ( ( retval = PAPI_stop( EventSet, g1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* Case 0, no info, should fail */ + eventcode = 0; +/* + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) == PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); +*/ + if (!quiet) { + printf("This test expects a 'PAPI Error' to be returned from this PAPI call.\n"); + } + if ( ( retval = PAPI_get_event_info( eventcode, &info ) ) == PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + /* Case 1, fill in name field. */ + eventcode = PAPI_TOT_INS; +/* + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); +*/ + if ( ( retval = PAPI_get_event_info( eventcode, &info1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + if ( strcmp( info1.symbol, "PAPI_TOT_INS" ) != 0 ) + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info symbol value is bogus", retval ); + if ( strlen( info1.long_descr ) == 0 ) + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info long_descr value is bogus", retval ); + + eventcode = 0; + + /* Case 2, fill in code field. */ +/* + if ( ( retval = PAPI_describe_event(eventname,(int *)&eventcode,eventdesc) ) != PAPI_OK) + test_fail(__FILE__,__LINE__,"PAPI_describe_event",retval); +*/ + if ( ( retval = PAPI_event_name_to_code( info1.symbol, ( int * ) &eventcode ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + } + + if ( eventcode != PAPI_TOT_INS ) + test_fail( __FILE__, __LINE__, + "PAPI_event_name_to_code code value is bogus", retval ); + + if ( ( retval = PAPI_get_event_info( eventcode, &info2 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + if ( strcmp( info2.symbol, "PAPI_TOT_INS" ) != 0 ) + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info symbol value is bogus", retval ); + if ( strlen( info2.long_descr ) == 0 ) + test_fail( __FILE__, __LINE__, + "PAPI_get_event_info long_descr value is bogus", retval ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/disable_component.c b/src/ctests/disable_component.c new file mode 100644 index 0000000..7de795b --- /dev/null +++ b/src/ctests/disable_component.c @@ -0,0 +1,93 @@ +/* + * File: disable_component.c + * Author: Vince Weaver + * vweaver1@eecs.utk.edu + */ + +/* + This tests the functionality of PAPI_disable_component() +*/ + +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + const PAPI_component_info_t* cmpinfo; + int numcmp, cid, active_components=0; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* Disable All Compiled-in Components */ + numcmp = PAPI_num_components( ); + + if (!TESTS_QUIET) printf("Compiled-in components:\n"); + for( cid = 0; cid < numcmp; cid++ ) { + cmpinfo = PAPI_get_component_info( cid ); + + if (!TESTS_QUIET) { + printf( "Name: %-23s %s\n", cmpinfo->name, cmpinfo->description); + } + + retval=PAPI_disable_component( cid ); + if (retval!=PAPI_OK) { + test_fail(__FILE__,__LINE__,"Error disabling component",retval); + } + } + + + /* Initialize the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Try to disable after init, should fail */ + retval=PAPI_disable_component( 0 ); + if (retval==PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_disable_component should fail", + retval ); + } + + if (!TESTS_QUIET) printf("\nAfter init components:\n"); + for( cid = 0; cid < numcmp; cid++ ) { + + cmpinfo = PAPI_get_component_info( cid ); + + if (!TESTS_QUIET) { + printf( "%d %d Name: %-23s %s\n", + cid, + PAPI_get_component_index((char *)cmpinfo->name), + cmpinfo->name ,cmpinfo->description); + + } + + if (cid!=PAPI_get_component_index((char *)cmpinfo->name)) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_index mismatch", + 2 ); + } + + + if (cmpinfo->disabled) { + if (!TESTS_QUIET) { + printf(" \\-> Disabled: %s\n",cmpinfo->disabled_reason); + } + } else { + active_components++; + } + } + + if (active_components>0) { + test_fail( __FILE__, __LINE__, "too many active components", retval ); + } + + test_pass( __FILE__ ); + + return PAPI_OK; +} diff --git a/src/ctests/dmem_info.c b/src/ctests/dmem_info.c new file mode 100644 index 0000000..3004dc2 --- /dev/null +++ b/src/ctests/dmem_info.c @@ -0,0 +1,81 @@ +/* + * This file perfoms the following test: dynamic memory info + * The pages used should increase steadily. + * + * Author: Kevin London + * london@cs.utk.edu + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define ALLOCMEM 200000 +static void +dump_memory_info( FILE * output, PAPI_dmem_info_t * d ) +{ + fprintf( output, "\n--------\n" ); + fprintf( output, "Mem Size:\t\t%lld\n", d->size ); + fprintf( output, "Mem Peak Size:\t\t%lld\n", d->peak ); + fprintf( output, "Mem Resident:\t\t%lld\n", d->resident ); + fprintf( output, "Mem High Water Mark:\t%lld\n", d->high_water_mark ); + fprintf( output, "Mem Shared:\t\t%lld\n", d->shared ); + fprintf( output, "Mem Text:\t\t%lld\n", d->text ); + fprintf( output, "Mem Library:\t\t%lld\n", d->library ); + fprintf( output, "Mem Heap:\t\t%lld\n", d->heap ); + fprintf( output, "Mem Locked:\t\t%lld\n", d->locked ); + fprintf( output, "Mem Stack:\t\t%lld\n", d->stack ); + fprintf( output, "Mem Pagesize:\t\t%lld\n", d->pagesize ); + fprintf( output, "Mem Page Table Entries:\t\t%lld\n", d->pte ); + fprintf( output, "--------\n\n" ); +} + +int +main( int argc, char **argv ) +{ + PAPI_dmem_info_t dmem; + long long value[7]; + int retval, i = 0, j = 0; + double *m[7]; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + for ( i = 0; i < 7; i++ ) { + retval = PAPI_get_dmem_info( &dmem ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_dmem_info", retval ); +/* dump_memory_info(stdout,&dmem); */ + value[i] = dmem.size; + m[i] = ( double * ) malloc( ALLOCMEM * sizeof ( double ) ); + touch_dummy( m[j], ALLOCMEM ); + } + + if ( !TESTS_QUIET ) { + printf( "Test case: Dynamic Memory Information.\n" ); + dump_memory_info( stdout, &dmem ); + printf + ( "------------------------------------------------------------------------\n" ); + for ( i = 0; i < 7; i++ ) + printf( "Malloc additional: %d KB Memory Size in KB: %d\n", + ( int ) ( ( sizeof ( double ) * ALLOCMEM ) / 1024 ), + ( int ) value[i] ); + printf + ( "------------------------------------------------------------------------\n" ); + } + if ( value[6] >= value[5] && value[5] >= value[4] && value[4] >= value[3] + && value[3] >= value[2] && value[2] >= value[1] && + value[1] >= value[0] ) + test_pass( __FILE__ ); + else + test_fail( __FILE__, __LINE__, "Calculating Resident Memory", + ( int ) value[6] ); + + return 1; +} diff --git a/src/ctests/earprofile.c b/src/ctests/earprofile.c new file mode 100644 index 0000000..5a5902a --- /dev/null +++ b/src/ctests/earprofile.c @@ -0,0 +1,202 @@ +/* +* File: profile.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Dan Terpstra +* terpstra@cs.utk.edu +*/ + +/* This file performs the following test: profiling and program info option call + + - This tests the SVR4 profiling interface of PAPI. These are counted + in the default counting domain and default granularity, depending on + the platform. Usually this is the user domain (PAPI_DOM_USER) and + thread context (PAPI_GRN_THR). + + The Eventset contains: + + PAPI_FP_INS (to profile) + + PAPI_TOT_CYC + + - Set up profile + - Start eventset 1 + - Do both (flops and reads) + - Stop eventset 1 +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "prof_utils.h" + +#include "do_loops.h" + +#undef THRESHOLD +#define THRESHOLD 1000 + +static void +ear_no_profile( void ) +{ + int retval; + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_l1misses( 10000 ); + + if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + printf( "Test type : \tNo profiling\n" ); + printf( TAB1, event_name, ( values[0] )[0] ); + printf( TAB1, "PAPI_TOT_CYC:", ( values[0] )[1] ); +} + +static int +do_profile( caddr_t start, unsigned long plength, unsigned scale, int thresh, + int bucket ) +{ + int i, retval; + unsigned long blength; + int num_buckets; + const char *profstr[2] = { "PAPI_PROFIL_POSIX", "PAPI_PROFIL_INST_EAR" }; + int profflags[2] = + { PAPI_PROFIL_POSIX, PAPI_PROFIL_POSIX | PAPI_PROFIL_INST_EAR }; + int num_profs; + + do_stuff( ); + + num_profs = sizeof ( profflags ) / sizeof ( int ); + ear_no_profile( ); + blength = prof_size( plength, scale, bucket, &num_buckets ); + prof_alloc( num_profs, blength ); + + for ( i = 0; i < num_profs; i++ ) { + if ( !TESTS_QUIET ) + printf( "Test type : \t%s\n", profstr[i] ); + + if ( ( retval = PAPI_profil( profbuf[i], blength, start, scale, + EventSet, PAPI_event, thresh, + profflags[i] | bucket ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + printf( TAB1, event_name, ( values[1] )[0] ); + printf( TAB1, "PAPI_TOT_CYC:", ( values[1] )[1] ); + } + if ( ( retval = PAPI_profil( profbuf[i], blength, start, scale, + EventSet, PAPI_event, 0, + profflags[i] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + + prof_head( blength, bucket, num_buckets, + "address\t\t\tPOSIX\tINST_DEAR\n" ); + prof_out( start, num_profs, bucket, num_buckets, scale ); + + retval = prof_check( num_profs, bucket, num_buckets ); + + for ( i = 0; i < num_profs; i++ ) { + free( profbuf[i] ); + } + + return retval; +} + + +int +main( int argc, char **argv ) +{ + int num_events, num_tests = 6; + long length; + int retval, retval2; + const PAPI_hw_info_t *hw_info; + const PAPI_exe_info_t *prginfo; + caddr_t start, end; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + } + + if ( ( hw_info = PAPI_get_hardware_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); + } + + if ( ( strncasecmp( hw_info->model_string, "Itanium", + strlen( "Itanium" ) ) != 0 ) && + ( strncasecmp( hw_info->model_string, "32", + strlen( "32" ) ) != 0 ) ) { + if (!quiet) printf("Itanium only for now.\n"); + test_skip( __FILE__, __LINE__, "Test unsupported", PAPI_ENOIMPL ); + } + +// if ( quiet ) { +// test_skip( __FILE__, __LINE__, +// "Test deprecated in quiet mode for PAPI 3.6", 0 ); +// } + + sprintf( event_name, "DATA_EAR_CACHE_LAT4" ); + if ( ( retval = + PAPI_event_name_to_code( event_name, &PAPI_event ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + num_events = 2; + values = allocate_test_space( num_tests, num_events ); + + /* use these lines to profile entire code address space */ + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; + length = end - start; + if ( length < 0 ) + test_fail( __FILE__, __LINE__, "Profile length < 0!", length ); + + prof_print_address + ( "Test earprofile: POSIX compatible event address register profiling.\n", + prginfo ); + prof_print_prof_info( start, end, THRESHOLD, event_name ); + retval = + do_profile( start, length, FULL_SCALE, THRESHOLD, + PAPI_PROFIL_BUCKET_16 ); + + retval2 = PAPI_remove_event( EventSet, PAPI_event ); + if ( retval2 == PAPI_OK ) + retval2 = PAPI_remove_event( EventSet, PAPI_TOT_CYC ); + if ( retval2 != PAPI_OK ) + test_fail( __FILE__, __LINE__, "Can't remove events", retval2 ); + + if ( retval ) + test_pass( __FILE__ ); + else + test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); + + return 1; + +} diff --git a/src/ctests/eventname.c b/src/ctests/eventname.c new file mode 100644 index 0000000..b70eaa2 --- /dev/null +++ b/src/ctests/eventname.c @@ -0,0 +1,36 @@ +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + int preset; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + retval = PAPI_event_name_to_code( "PAPI_FP_INS", &preset ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + if ( preset != PAPI_FP_INS ) + test_fail( __FILE__, __LINE__, "Wrong preset returned", retval ); + + retval = PAPI_event_name_to_code( "PAPI_TOT_CYC", &preset ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + if ( preset != PAPI_TOT_CYC ) + test_fail( __FILE__, __LINE__, + "*preset returned did not equal PAPI_TOT_CYC", retval ); + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/exec.c b/src/ctests/exec.c new file mode 100644 index 0000000..b8afb27 --- /dev/null +++ b/src/ctests/exec.c @@ -0,0 +1,47 @@ +/* +* File: exec.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: start, stop and timer +functionality for a parent and a forked child. */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } else { + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + + PAPI_shutdown( ); + + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/exec2.c b/src/ctests/exec2.c new file mode 100644 index 0000000..f779804 --- /dev/null +++ b/src/ctests/exec2.c @@ -0,0 +1,45 @@ +/* +* File: exec.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: start, stop and timer +functionality for a parent and a forked child. */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } else { + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/exec_overflow.c b/src/ctests/exec_overflow.c new file mode 100644 index 0000000..b5904eb --- /dev/null +++ b/src/ctests/exec_overflow.c @@ -0,0 +1,185 @@ +/* + * Test PAPI with fork() and exec(). + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "testcode.h" + +#define MAX_EVENTS 3 + +static int Event[MAX_EVENTS] = { + PAPI_TOT_CYC, + PAPI_FP_INS, + PAPI_FAD_INS, +}; + +static int Threshold[MAX_EVENTS] = { + 8000000, + 4000000, + 4000000, +}; + +static struct timeval start, last; +static long count, total; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + count++; + total++; +} + + + + + +static void +print_rate( const char *str ) +{ + static int last_count = -1; + struct timeval now; + double st_secs, last_secs; + + gettimeofday( &now, NULL ); + st_secs = ( double ) ( now.tv_sec - start.tv_sec ) + + ( ( double ) ( now.tv_usec - start.tv_usec ) ) / 1000000.0; + last_secs = ( double ) ( now.tv_sec - last.tv_sec ) + + ( ( double ) ( now.tv_usec - last.tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%d] %s, time = %.3f, total = %ld, last = %ld, rate = %.1f/sec\n", + getpid( ), str, st_secs, total, count, + ( ( double ) count ) / last_secs ); + } + + if ( last_count != -1 ) { + if ( count < .1 * last_count ) { + test_fail( __FILE__, __LINE__, "Interrupt rate changed!", 1 ); + exit( 1 ); + } + } + last_count = ( int ) count; + count = 0; + last = now; +} + +static void +run( const char *str, int len ) +{ + int n; + + for ( n = 1; n <= len; n++ ) { + do_cycles( 1 ); + print_rate( str ); + } +} + +int +main( int argc, char **argv ) +{ + int num_events = 1; + const char *name = "unknown"; + int ev,EventSet = PAPI_NULL; + int quiet,retval; + + /* Used to be able to set this via command line */ + num_events=1; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + do_cycles( 1 ); + + /* Zero out the Counters */ + gettimeofday( &start, NULL ); + last = start; + count = 0; + total = 0; + + /* Initialize PAPI */ + retval=PAPI_library_init( PAPI_VER_CURRENT ); + if (retval!=PAPI_VER_CURRENT) { + test_fail( __FILE__, __LINE__, "PAPI_library_init failed", 1 ); + } + + name = argv[0]; + if (!quiet) { + printf( "[%d] %s, num_events = %d\n", getpid(), + name, num_events ); + } + + /* Create eventset */ + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset failed", 1 ); + } + + /* Add events */ + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_add_event( EventSet, Event[ev] ) != PAPI_OK ) { + if (!quiet) printf("Trouble adding event\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_event failed", 1 ); + } + } + + /* Set overflow */ + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_overflow( EventSet, Event[ev], + Threshold[ev], 0, my_handler ) + != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow failed", 1 ); + } + } + + /* Start measuring */ + if ( PAPI_start( EventSet ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start failed", 1 ); + } + + + + /* Tun a bit */ + run( name, 3 ); + + /* Stop measuring */ + if (!quiet) { + printf("[%d] %s, %s\n", getpid(), name, "stop"); + } + + if ( PAPI_stop( EventSet, NULL ) != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_stop failed", 1 ); + } + + if (!quiet) { + printf("[%d] %s, %s\n", getpid(), + name, "exec(./child_overflow)"); + } + + /* exec the child_overflow helper program */ + /* we should never return from this */ + if ( access( "./child_overflow", X_OK ) == 0 ) + execl( "./child_overflow", "./child_overflow", + ( quiet ? "TESTS_QUIET" : NULL ), NULL ); + else if ( access( "./ctests/child_overflow", X_OK ) == 0 ) + execl( "./ctests/child_overflow", "./ctests/child_overflow", + ( quiet ? "TESTS_QUIET" : NULL ), NULL ); + + test_fail( name, __LINE__, "exec failed", 1 ); + + return 0; +} diff --git a/src/ctests/exeinfo.c b/src/ctests/exeinfo.c new file mode 100644 index 0000000..ae2e850 --- /dev/null +++ b/src/ctests/exeinfo.c @@ -0,0 +1,69 @@ +/* +* File: exeinfo.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + + const PAPI_exe_info_t *exeinfo; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( exeinfo = PAPI_get_executable_info( ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", retval ); + + if (!TESTS_QUIET) { + printf( "Path+Program: %s\n", exeinfo->fullname ); + printf( "Program: %s\n", exeinfo->address_info.name ); + printf( "Text start: %p, Text end: %p\n", exeinfo->address_info.text_start, + exeinfo->address_info.text_end ); + printf( "Data start: %p, Data end: %p\n", exeinfo->address_info.data_start, + exeinfo->address_info.data_end ); + printf( "Bss start: %p, Bss end: %p\n", exeinfo->address_info.bss_start, + exeinfo->address_info.bss_end ); + } + + if ( ( strlen( &(exeinfo->fullname[0]) ) == 0 ) ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + if ( ( strlen( &(exeinfo->address_info.name[0]) ) == 0 ) ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + if ( ( exeinfo->address_info.text_start == 0x0 ) || + ( exeinfo->address_info.text_end == 0x0 ) || + ( exeinfo->address_info.text_start >= + exeinfo->address_info.text_end ) ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + if ( ( exeinfo->address_info.data_start == 0x0 ) || + ( exeinfo->address_info.data_end == 0x0 ) || + ( exeinfo->address_info.data_start >= + exeinfo->address_info.data_end ) ) + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); +/* + if ((exeinfo->address_info.bss_start == 0x0) || (exeinfo->address_info.bss_end == 0x0) || + (exeinfo->address_info.bss_start >= exeinfo->address_info.bss_end)) + test_fail(__FILE__, __LINE__, "PAPI_get_executable_info",1); +*/ + + sleep( 1 ); /* Needed for debugging, so you can ^Z and stop the process, inspect /proc to see if it's right */ + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/failed_events.c b/src/ctests/failed_events.c new file mode 100644 index 0000000..5ee5a97 --- /dev/null +++ b/src/ctests/failed_events.c @@ -0,0 +1,218 @@ +/* + * File: failed_events.c + * Author: Vince Weaver + */ + +/* This test tries adding events that don't exist */ +/* We've had issues where the name resolution code might do weird */ +/* things when passed invalid event names */ + + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +#define LARGE_NAME_SIZE 4096 + +char large_name[LARGE_NAME_SIZE]; + +int +main( int argc, char **argv ) +{ + + int i, k, err_count = 0; + int retval; + PAPI_event_info_t info, info1; + const PAPI_component_info_t* cmpinfo; + int numcmp, cid; + int quiet; + + int EventSet = PAPI_NULL; + + /* Set quiet variable */ + quiet=tests_quiet( argc, argv ); + + /* Init PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (!quiet) { + printf("Test adding invalid events.\n"); + } + + /* Create an eventset */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + + /* Simple Event */ + if (!quiet) { + printf("+ Simple invalid event\t"); + } + + retval=PAPI_add_named_event(EventSet,"INVALID_EVENT"); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened!\n"); + err_count++; + } + } + else { + if (!quiet) printf("OK\n"); + } + + /* Extra Colons */ + if (!quiet) { + printf("+ Extra colons\t"); + } + + retval=PAPI_add_named_event(EventSet,"INV::::AL:ID:::_E=3V::E=NT"); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened!\n"); + err_count++; + } + } + else { + if (!quiet) printf("OK\n"); + } + + + /* Large Invalid Event */ + if (!quiet) { + printf("+ Large invalid event\t"); + } + + memset(large_name,'A',LARGE_NAME_SIZE); + large_name[LARGE_NAME_SIZE-1]=0; + + retval=PAPI_add_named_event(EventSet,large_name); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened!\n"); + err_count++; + } + } + else { + if (!quiet) printf("OK\n"); + } + + /* Large Unterminated Invalid Event */ + if (!quiet) { + printf("+ Large unterminated invalid event\t"); + } + + memset(large_name,'A',LARGE_NAME_SIZE); + + retval=PAPI_add_named_event(EventSet,large_name); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened!\n"); + err_count++; + } + } + else { + if (!quiet) printf("OK\n"); + } + + + /* Randomly modifying valid events */ + if (!quiet) { + printf("+ Randomly modifying valid events\t"); + } + + numcmp = PAPI_num_components( ); + + /* Loop through all components */ + for( cid = 0; cid < numcmp; cid++ ) { + + + cmpinfo = PAPI_get_component_info( cid ); + if (cmpinfo == NULL) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); + } + + /* Include disabled components */ + if (cmpinfo->disabled) { + // continue; + } + + + /* For platform independence, always ASK FOR the first event */ + /* Don't just assume it'll be the first numeric value */ + i = 0 | PAPI_NATIVE_MASK; + retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cid ); + + do { + retval = PAPI_get_event_info( i, &info ); + + k = i; + if ( PAPI_enum_cmp_event(&k, PAPI_NTV_ENUM_UMASKS, cid )==PAPI_OK ) { + do { + retval = PAPI_get_event_info( k, &info1 ); + + + + /* Skip perf_raw event as it is hard to error out */ + if (strstr(info1.symbol,"perf_raw")) { + break; + } + +// printf("%s\n",info1.symbol); + + if (strlen(info1.symbol)>5) { + info1.symbol[strlen(info1.symbol)-4]^=0xa5; + + retval=PAPI_add_named_event(EventSet,info1.symbol); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened %s!\n", + info1.symbol); + err_count++; + } + } + } + } while ( PAPI_enum_cmp_event( &k, PAPI_NTV_ENUM_UMASKS, cid ) == PAPI_OK ); + } else { + /* Event didn't have any umasks */ + +// printf("%s\n",info1.symbol); + if (strlen(info1.symbol)>5) { + info1.symbol[strlen(info1.symbol)-4]^=0xa5; + + retval=PAPI_add_named_event(EventSet,info1.symbol); + if (retval==PAPI_OK) { + if (!quiet) { + printf("Unexpectedly opened %s!\n", + info1.symbol); + err_count++; + } + } + } + } + + } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cid ) == PAPI_OK ); + + } + + + + if ( err_count ) { + if (!quiet) { + printf( "%d Invalid events added.\n", err_count ); + } + test_fail( __FILE__, __LINE__, "Invalid events added", 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/first.c b/src/ctests/first.c new file mode 100644 index 0000000..a8984e3 --- /dev/null +++ b/src/ctests/first.c @@ -0,0 +1,234 @@ +/* This file performs the following test: + start, read, stop and again functionality + + - It attempts to use the following three counters. + It may use fewer depending on hardware counter resource limitations. + These are counted in the default counting domain and default granularity, + depending on the platform. + Usually this is the user domain (PAPI_DOM_USER) and + thread context (PAPI_GRN_THR). + + PAPI_FP_INS (or PAPI_TOT_INS if PAPI_FP_INS doesn't exist) + + PAPI_TOT_CYC + - Start counters + - Do flops + - Read counters + - Reset counters + - Do flops + - Read counters + - Do flops + - Read counters + - Do flops + - Stop and read counters + - Read counters +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 5, num_events, tmp; + long long **values; + int EventSet = PAPI_NULL; + char event_name1[]="PAPI_TOT_CYC"; + char event_name2[]="PAPI_TOT_INS"; + char add_event_str[PAPI_MAX_STR_LEN]; + long long min, max; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* create the eventset */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + retval = PAPI_add_named_event( EventSet, event_name1); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Couldn't add %s\n",event_name1); + test_skip(__FILE__,__LINE__,"Couldn't add PAPI_TOT_CYC",0); + } + + retval = PAPI_add_named_event( EventSet, event_name2); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Couldn't add %s\n",event_name2); + test_skip(__FILE__,__LINE__,"Couldn't add PAPI_TOT_INS",0); + } + + num_events=2; + + sprintf( add_event_str, "PAPI_add_event[%s]", event_name2 ); + + /* Allocate space for results */ + values = allocate_test_space( num_tests, num_events ); + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* Benchmark code */ + do_flops( NUM_FLOPS ); + + /* read results 0 */ + retval = PAPI_read( EventSet, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /* Reset */ + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + /* Benchmark some more */ + do_flops( NUM_FLOPS ); + + /* Read Results 1 */ + retval = PAPI_read( EventSet, values[1] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /* Benchmark some more */ + do_flops( NUM_FLOPS ); + + /* Read results 2 */ + retval = PAPI_read( EventSet, values[2] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /* Benchmark some more */ + do_flops( NUM_FLOPS ); + + /* Read results 3 */ + retval = PAPI_stop( EventSet, values[3] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* Read results 4 */ + retval = PAPI_read( EventSet, values[4] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /* remove results. We never stop??? */ + PAPI_remove_named_event(EventSet,event_name1); + PAPI_remove_named_event(EventSet,event_name2); + + if ( !quiet ) { + printf( "Test case 1: Non-overlapping start, stop, read.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : 1 2 3 4 5\n" ); + sprintf( add_event_str, "%s:", event_name2 ); + printf( TAB5, add_event_str, + values[0][1], values[1][1], values[2][1], + values[3][1], values[4][1] ); + printf( TAB5, "PAPI_TOT_CYC:", + values[0][0], values[1][0], values[2][0], + values[3][0], values[4][0] ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + printf( "Row 1 Column 1 at least %d\n", NUM_FLOPS ); + printf( "%% difference between %s 1 & 2: %.2f\n", + add_event_str, + 100.0 * ( float ) values[0][1] / + ( float ) values[1][1] ); + printf( "%% difference between %s 1 & 2: %.2f\n", + "PAPI_TOT_CYC", + 100.0 * ( float ) values[0][0] / + ( float ) values[1][0] ); + printf( "Column 1 approximately equals column 2\n" ); + printf( "Column 3 approximately equals 2 * column 2\n" ); + printf( "Column 4 approximately equals 3 * column 2\n" ); + printf( "Column 4 exactly equals column 5\n" ); + } + + /* Validation */ + + /* Check cycles constraints */ + + min = ( long long ) ( ( double ) values[1][0] * .8 ); + max = ( long long ) ( ( double ) values[1][0] * 1.2 ); + + /* Check constraint Col1=Col2 */ + if ( values[0][0] > max || values[0][0] < min ) { + test_fail( __FILE__, __LINE__, "Cycle Col1!=Col2", 1 ); + } + /* Check constraint col3 == 2*col2 */ + if ( (values[2][0] > ( 2 * max )) || + (values[2][0] < ( 2 * min )) ) { + test_fail( __FILE__, __LINE__, "Cycle Col3!=2*Col2", 1 ); + } + /* Check constraint col4 == 3*col2 */ + if ( (values[3][0] > ( 3 * max )) || + (values[3][0] < ( 3 * min )) ) { + test_fail( __FILE__, __LINE__, "Cycle Col3!=3*Col2", 1 ); + } + /* Check constraint col4 == col5 */ + if ( values[3][0] != values[4][0] ) { + test_fail( __FILE__, __LINE__, "Cycle Col4!=Col5", 1 ); + } + + + /* Check FLOP constraints */ + + min = ( long long ) ( ( double ) values[1][1] * .9 ); + max = ( long long ) ( ( double ) values[1][1] * 1.1 ); + + /* Check constraint Col1=Col2 */ + if ( values[0][1] > max || values[0][1] < min ) { + test_fail( __FILE__, __LINE__, "FLOP Col1!=Col2", 1 ); + } + /* Check constraint col3 == 2*col2 */ + if ( (values[2][1] > ( 2 * max )) || + (values[2][1] < ( 2 * min )) ) { + test_fail( __FILE__, __LINE__, "FLOP Col3!=2*Col2", 1 ); + } + /* Check constraint col4 == 3*col2 */ + if ( (values[3][1] > ( 3 * max )) || + (values[3][1] < ( 3 * min )) ) { + test_fail( __FILE__, __LINE__, "FLOP Col4!=3*Col2", 1 ); + } + /* Check constraint col4 == col5 */ + if (values[3][1] != values[4][1]) { + test_fail( __FILE__, __LINE__, "FLOP Col4!=Col5", 1 ); + } + /* Check flops are sane */ + if (values[0][1] < ( long long ) NUM_FLOPS ) { + test_fail( __FILE__, __LINE__, "FLOP sanity", 1 ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/flops.c b/src/ctests/flops.c new file mode 100644 index 0000000..b92194c --- /dev/null +++ b/src/ctests/flops.c @@ -0,0 +1,110 @@ +/* + * A simple example for the use of PAPI, the number of flops you should + * get is about INDEX^3 on machines that consider add and multiply one flop + * such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL + * -Kevin London + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" +#include "display_error.h" + +int +main( int argc, char **argv ) +{ + float real_time, proc_time, mflops; + long long flpins; + int retval; + int fip = 0; + int quiet=0; + long long expected; + double double_result,error; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Initialize PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Try to use one of the FP events */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + fip = 1; + } + else if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { + fip = 2; + } + else { + if ( !quiet ) printf( "PAPI_FP_INS and PAPI_FP_OPS are not defined for this platform.\n" ); + test_skip(__FILE__,__LINE__,"No FP events available",1); + } + + /* Shutdown? */ + /* I guess because it would interfere with the high-level interface? */ + PAPI_shutdown( ); + + /* Initialize the Matrix arrays */ + expected=flops_float_init_matrix(); + + /* Setup PAPI library and begin collecting data from the counters */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } + else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + /* Matrix-Matrix multiply */ + double_result=flops_float_matrix_matrix_multiply(); + + /* Collect the data into the variables passed in */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + if (!quiet) printf("result=%lf\n",double_result); + + if ( !quiet ) { + printf( "Real_time: %f Proc_time: %f MFLOPS: %f\n", + real_time, proc_time, mflops ); + if ( fip == 1 ) { + printf( "Total flpins: "); + } else { + printf( "Total flpops: "); + } + printf( "%lld\n\n", flpins ); + } + + error=display_error(flpins,flpins,flpins,expected,quiet); + + if ((error > 1.0) || (error<-1.0)) { + if (!quiet) printf("Instruction count off by more than 1%%\n"); + test_fail( __FILE__, __LINE__, "Validation failed", 1 ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/fork.c b/src/ctests/fork.c new file mode 100644 index 0000000..6c48990 --- /dev/null +++ b/src/ctests/fork.c @@ -0,0 +1,55 @@ +/* +* File: fork.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: + + PAPI_library_init() + fork(); + / \ + parent child + wait() PAPI_library_init() + + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int status; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + + if ( fork( ) == 0 ) { + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "forked PAPI_library_init", retval ); + exit( 0 ); + } else { + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/fork2.c b/src/ctests/fork2.c new file mode 100644 index 0000000..81a181f --- /dev/null +++ b/src/ctests/fork2.c @@ -0,0 +1,58 @@ +/* +* File: fork2.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: + + PAPI_library_init() + fork(); + / \ + parent child + wait() PAPI_shutdown() + PAPI_library_init() + + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int status; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + + if ( fork( ) == 0 ) { + PAPI_shutdown(); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "forked PAPI_library_init", retval ); + exit( 0 ); + } else { + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/fork_overflow.c b/src/ctests/fork_overflow.c new file mode 100644 index 0000000..68ae2a6 --- /dev/null +++ b/src/ctests/fork_overflow.c @@ -0,0 +1,229 @@ +/* + * Test PAPI with fork() and exec(). + */ + +#include +#include +#include +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" + +#define MAX_EVENTS 3 + +static int Event[MAX_EVENTS] = { + PAPI_TOT_CYC, + PAPI_FP_INS, + PAPI_FAD_INS, +}; + +static int Threshold[MAX_EVENTS] = { + 8000000, + 4000000, + 4000000, +}; + +static int num_events = 1; +static int EventSet = PAPI_NULL; +static const char *name = "unknown"; +static struct timeval start, last; +static long count, total; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + count++; + total++; +} + +static void +zero_count( void ) +{ + gettimeofday( &start, NULL ); + last = start; + count = 0; + total = 0; +} + +static void +print_here( const char *str) { + + if (!TESTS_QUIET) printf("[%d] %s, %s\n", getpid(), name, str); +} + +static void +print_rate( const char *str ) +{ + static int last_count = -1; + struct timeval now; + double st_secs, last_secs; + + gettimeofday( &now, NULL ); + st_secs = ( double ) ( now.tv_sec - start.tv_sec ) + + ( ( double ) ( now.tv_usec - start.tv_usec ) ) / 1000000.0; + last_secs = ( double ) ( now.tv_sec - last.tv_sec ) + + ( ( double ) ( now.tv_usec - last.tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%d] %s, time = %.3f, total = %ld, last = %ld, rate = %.1f/sec\n", + getpid( ), str, st_secs, total, count, + ( ( double ) count ) / last_secs ); + } + + if ( last_count != -1 ) { + if ( count < .1 * last_count ) { + test_fail( name, __LINE__, "Interrupt rate changed!", 1 ); + exit( 1 ); + } + } + last_count = ( int ) count; + count = 0; + last = now; +} + +static void +do_cycles( int program_time ) +{ + struct timeval start, now; + double x, sum; + + gettimeofday( &start, NULL ); + + for ( ;; ) { + sum = 1.0; + for ( x = 1.0; x < 250000.0; x += 1.0 ) + sum += x; + if ( sum < 0.0 ) + printf( "==>> SUM IS NEGATIVE !! <<==\n" ); + + gettimeofday( &now, NULL ); + if ( now.tv_sec >= start.tv_sec + program_time ) + break; + } +} + +static void +my_papi_init( void ) +{ + if ( PAPI_library_init( PAPI_VER_CURRENT ) != PAPI_VER_CURRENT ) + test_fail( name, __LINE__, "PAPI_library_init failed", 1 ); +} + +static void +my_papi_start( void ) +{ + int ev; + + EventSet = PAPI_NULL; + + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_create_eventset failed", 1 ); + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_add_event( EventSet, Event[ev] ) != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble adding event\n"); + test_skip( name, __LINE__, "PAPI_add_event failed", 1 ); + } + } + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_overflow( EventSet, Event[ev], Threshold[ev], 0, my_handler ) + != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_overflow failed", 1 ); + } + } + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_start failed", 1 ); +} + +static void +my_papi_stop( void ) +{ + if ( PAPI_stop( EventSet, NULL ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_stop failed", 1 ); +} + +static void +run( const char *str, int len ) +{ + int n; + + for ( n = 1; n <= len; n++ ) { + do_cycles( 1 ); + print_rate( str ); + } +} + +int +main( int argc, char **argv ) +{ + char buf[100]; + + int quiet,retval; + + /* Used to be able to set this via command line */ + num_events=1; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + do_cycles( 1 ); + + zero_count( ); + + retval=PAPI_library_init( PAPI_VER_CURRENT ); + if (retval!=PAPI_VER_CURRENT) { + test_fail( name, __LINE__, "PAPI_library_init failed", 1 ); + } + + name = argv[0]; + if (!quiet) printf( "[%d] %s, num_events = %d\n", getpid( ), name, num_events ); + sprintf( buf, "%d", num_events ); + my_papi_start( ); + run( name, 3 ); + + print_here( "fork" ); + { + int ret = fork( ); + if ( ret < 0 ) + test_fail( name, __LINE__, "fork failed", 1 ); + if ( ret == 0 ) { + /* + * Child process. + */ + zero_count( ); + my_papi_init( ); + my_papi_start( ); + run( "child", 5 ); + print_here( "stop" ); + my_papi_stop( ); + sleep( 3 ); + print_here( "end" ); + exit( 0 ); + } + run( "main", 14 ); + my_papi_stop( ); + { + int status; + wait( &status ); + print_here( "end" ); + if ( WEXITSTATUS( status ) != 0 ) + test_fail( name, __LINE__, "child failed", 1 ); + else + test_pass( name); + } + } + + return 0; +} diff --git a/src/ctests/forkexec.c b/src/ctests/forkexec.c new file mode 100644 index 0000000..f23d8a2 --- /dev/null +++ b/src/ctests/forkexec.c @@ -0,0 +1,76 @@ +/* +* File: forkexec.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: + + PAPI_library_init(); + PAPI_shutdown() + fork() + / \ + parent child + wait() execlp() + PAPI_library_init() + + */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int quiet; + int status; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + /* In child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } + return 0; + } else { + if (!quiet) printf("Test fork/exec/PAPI_init\n"); + /* Init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + } + /* Then shut down ? */ + PAPI_shutdown( ); + + if ( fork( ) == 0 ) { + /* In child, exec ourself with "xxx" command line */ + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) { + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + } else { + /* In parent, wait for child to finish */ + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) { + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } + } + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/forkexec2.c b/src/ctests/forkexec2.c new file mode 100644 index 0000000..96b88e1 --- /dev/null +++ b/src/ctests/forkexec2.c @@ -0,0 +1,85 @@ +/* +* File: forkexec2.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: + + PAPI_library_init() + PAPI_shutdown() + fork() + / \ + parent child + wait() PAPI_library_init() + PAPI_shutdown() + execlp() + PAPI_library_init() + + */ + +#include +#include +#include +#include + +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int status; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + /* In child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } + return 0; + } else { + if (!quiet) printf("Testing fork/PAPI_init/PAPI_shudtdown/exec/PAPI_init\n"); + + /* Init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + } + PAPI_shutdown( ); + + if ( fork( ) == 0 ) { + /* Init PAPI in child before exec */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "forked PAPI_library_init", + retval ); + } + + PAPI_shutdown( ); + + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) { + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + } else { + /* In parent, wait for child to finish */ + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) { + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } + } + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/forkexec3.c b/src/ctests/forkexec3.c new file mode 100644 index 0000000..676b82a --- /dev/null +++ b/src/ctests/forkexec3.c @@ -0,0 +1,80 @@ +/* +* File: forkexec3.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: + + PAPI_library_init() + PAPI_shutdown() + fork() + / \ + parent child + wait() PAPI_library_init() + **unlike forkexec2, no shutdown here** + execlp() + PAPI_library_init() + + */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int status; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + /* In child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } + return 0; + } else { + if (!quiet) printf("Testing Init/Shutdown/fork/init/exec/init\n"); + /* Init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + + PAPI_shutdown( ); + + if ( fork( ) == 0 ) { + /* In child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "forked PAPI_library_init", + retval ); + } + + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) { + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + } else { + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) { + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } + } + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/forkexec4.c b/src/ctests/forkexec4.c new file mode 100644 index 0000000..95b4431 --- /dev/null +++ b/src/ctests/forkexec4.c @@ -0,0 +1,78 @@ +/* +* File: forkexec4.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: + + PAPI_library_init() + ** unlike forkexec2/forkexec3, no shutdown here ** + fork() + / \ + parent child + wait() PAPI_library_init() + execlp() + PAPI_library_init() + + */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +int +main( int argc, char **argv ) +{ + int retval; + int status; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( ( argc > 1 ) && ( strcmp( argv[1], "xxx" ) == 0 ) ) { + /* In child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "execed PAPI_library_init", retval ); + } + return 0; + } else { + if (!quiet) printf("Testing Init/fork/exec/Init\n"); + + /* Init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "main PAPI_library_init", retval ); + } + + if ( fork( ) == 0 ) { + /* In Child */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "forked PAPI_library_init", + retval ); + } + + if ( execlp( argv[0], argv[0], "xxx", NULL ) == -1 ) { + test_fail( __FILE__, __LINE__, "execlp", PAPI_ESYS ); + } + } else { + /* Waiting in parent */ + wait( &status ); + if ( WEXITSTATUS( status ) != 0 ) { + test_fail( __FILE__, __LINE__, "fork", WEXITSTATUS( status ) ); + } } + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/get_event_component.c b/src/ctests/get_event_component.c new file mode 100644 index 0000000..dd0ac09 --- /dev/null +++ b/src/ctests/get_event_component.c @@ -0,0 +1,87 @@ +/* + * File: get_event_component.c + * Author: Vince Weaver + * vweaver1@eecs.utk.edu + */ + +/* + This test makes sure PAPI_get_event_component() works +*/ + +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + + int i; + int retval; + PAPI_event_info_t info; + int numcmp, cid, our_cid; + const PAPI_component_info_t* cmpinfo; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* Init PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + numcmp = PAPI_num_components( ); + + + /* Loop through all components */ + for( cid = 0; cid < numcmp; cid++ ) + { + cmpinfo = PAPI_get_component_info( cid ); + + if (cmpinfo == NULL) + { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 2 ); + } + + if (cmpinfo->disabled && !TESTS_QUIET) { + printf( "Name: %-23s %s\n", cmpinfo->name ,cmpinfo->description); + printf(" \\-> Disabled: %s\n",cmpinfo->disabled_reason); + continue; + } + + + i = 0 | PAPI_NATIVE_MASK; + retval = PAPI_enum_cmp_event( &i, PAPI_ENUM_FIRST, cid ); + if (retval!=PAPI_OK) continue; + + do { + if (PAPI_get_event_info( i, &info ) != PAPI_OK) { + if (!TESTS_QUIET) { + printf("Getting information about event: %#x failed\n", i); + } + continue; + } + our_cid=PAPI_get_event_component(i); + + if (our_cid!=cid) { + if (!TESTS_QUIET) { + printf("%d %d %s\n",cid,our_cid,info.symbol); + } + test_fail( __FILE__, __LINE__, "component mismatch", 1 ); + } + + if (!TESTS_QUIET) { + printf("%d %d %s\n",cid,our_cid,info.symbol); + } + + + } while ( PAPI_enum_cmp_event( &i, PAPI_ENUM_EVENTS, cid ) == PAPI_OK ); + + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/high-level.c b/src/ctests/high-level.c new file mode 100644 index 0000000..a950776 --- /dev/null +++ b/src/ctests/high-level.c @@ -0,0 +1,127 @@ +/* These examples show the essentials in using the PAPI high-level + interface. The program consists of 4 work-loops. The programmer + intends to count the total events for loop 1, 2 and 4, but not + include the number of events in loop 3. + + To accomplish this PAPI_read_counters is used as a counter + reset function, while PAPI_accum_counters is used to sum + the contributions of loops 2 and 4 into the total count. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define NUM_EVENTS 2 + +int +main( int argc, char **argv ) +{ + int retval; + long long values[NUM_EVENTS], dummyvalues[NUM_EVENTS]; + long long myvalues[NUM_EVENTS]; + int Events[NUM_EVENTS]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* query and set up the right events to monitor */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + Events[0] = PAPI_FP_INS; + } else { + Events[0] = PAPI_TOT_INS; + } + Events[1] = PAPI_TOT_CYC; + + retval = PAPI_start_counters( ( int * ) Events, NUM_EVENTS ); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Cannot start events\n"); + test_skip( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + + /* Loop 1 */ + do_flops( NUM_FLOPS ); + + retval = PAPI_read_counters( values, NUM_EVENTS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); + + myvalues[0] = values[0]; + myvalues[1] = values[1]; + /* Loop 2 */ + do_flops( NUM_FLOPS ); + + retval = PAPI_accum_counters( values, NUM_EVENTS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters being ''held'')\n" ); + + /* Loop 3 */ + /* Simulated code that should not be counted */ + do_flops( NUM_FLOPS ); + + retval = PAPI_read_counters( dummyvalues, NUM_EVENTS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + if ( !quiet ) + printf( TWO12, dummyvalues[0], dummyvalues[1], "(Skipped counts)\n" ); + + if ( !quiet ) + printf( "%12s %12s (''Continuing'' counting)\n", "xxx", "xxx" ); + /* Loop 4 */ + do_flops( NUM_FLOPS ); + + retval = PAPI_accum_counters( values, NUM_EVENTS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_accum_counters", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "" ); + + if ( !quiet ) { + printf( "----------------------------------\n" ); + printf( "Verification: The last line in each experiment should be\n" ); + printf( "approximately three times the value of the first line.\n" ); + } + + { + long long min, max; + min = ( long long ) ( ( double ) myvalues[0] * .9 ); + max = ( long long ) ( ( double ) myvalues[0] * 1.1 ); + if ( values[0] < ( 3 * min ) || values[0] > ( 3 * max ) ) { + retval = 1; + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_FP_INS", 1 ); + } else { + test_fail( __FILE__, __LINE__, "PAPI_TOT_INS", 1 ); + } + } + min = ( long long ) ( ( double ) myvalues[1] * .9 ); + max = ( long long ) ( ( double ) myvalues[1] * 1.1 ); + if ( values[1] < ( 3 * min ) || values[1] > ( 3 * max ) ) { + retval = 1; + test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); + } + } + /* The values array is not allocated through allocate_test_space + * so we need to pass NULL here */ + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/high-level2.c b/src/ctests/high-level2.c new file mode 100644 index 0000000..5647cd3 --- /dev/null +++ b/src/ctests/high-level2.c @@ -0,0 +1,141 @@ +/* This test checks that mixing PAPI_flips and the other high + * level calls does the right thing. + * by Kevin London + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + int Events, fip = 0; + long long values, flpins; + float real_time, proc_time, mflops; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Initialize PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* First see if we have PAPI_FP_INS event */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + fip = 1; + Events = PAPI_FP_INS; + /* If not, look for PAPI_FP_OPS */ + } else if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { + fip = 2; + Events = PAPI_FP_OPS; + } else { + if ( !quiet ) { + printf( "PAPI_FP_INS and PAPI_FP_OPS are not defined for this platform.\n" ); + } + test_skip( __FILE__, __LINE__, "FLOPS event not supported", 1); + } + + /* Start counting flips or flops event */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + /* If we are flipsing/flopsing, then start_counters should fail */ + retval = PAPI_start_counters( &Events, 1 ); + if (retval == PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + + /* Try flipsing/flopsing again, should work */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + /* If we are flipsing/flopsing, then read should fail */ + if ( ( retval = PAPI_read_counters( &values, 1 ) ) == PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + + /* Stop should still work then */ + if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + + /* Restart flips/flops */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + /* Try reading again, should fail */ + if ( ( retval = PAPI_read_counters( &values, 1 ) ) == PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + + /* Stop */ + if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + + /* Now try starting, should work */ + if ( ( retval = PAPI_start_counters( &Events, 1 ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + } + + /* Read should work too */ + if ( ( retval = PAPI_read_counters( &values, 1 ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + } + + /* flipsing/flopsing should fail */ + if ( fip == 1 ) { + retval = PAPI_flips( &real_time, &proc_time, &flpins, &mflops ); + if (retval == PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flips", retval ); + } + } else { + retval = PAPI_flops( &real_time, &proc_time, &flpins, &mflops ); + if (retval == PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_flops", retval ); + } + } + + /* Stop everything */ + if ( ( retval = PAPI_stop_counters( &values, 1 ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/hl_rates.c b/src/ctests/hl_rates.c new file mode 100644 index 0000000..f3ead9b --- /dev/null +++ b/src/ctests/hl_rates.c @@ -0,0 +1,339 @@ +/* file hl_rates.c + * This test exercises the four PAPI High Level rate calls: + * PAPI_flops, PAPI_flips, PAPI_ipc, and PAPI_epc + * flops and flips report cumulative real and process time since the first call, + * and either floating point operations or instructions since the first call. + * Also reported is incremental flop or flip rate since the last call. + * + * PAPI_ipc reports the same cumulative information, substituting + * total instructions for flops or flips, and also reports + * instructions per (process) cycle as a measure of execution efficiency. + * + * PAPI_epc is new in PAPI 5.2. It reports the same information as PAPI_IPC, + * but for an arbitrary event instead of total cycles. It also reports + * incremental core and (where available) reference cycles to allow the + * computation of effective clock rates in the presence of clock scaling + * like speed step or turbo-boost. + * + * This test computes a 1000 x 1000 matrix multiply for orders of indexing for + * each of the four rate calls. It also accepts a command line parameter + * for the event to be measured for PAPI_epc. If not provided, PAPI_TOT_INS + * is measured. + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +int +main( int argc, char **argv ) +{ + int retval, event = 0; + float rtime, ptime, mflips, mflops, ipc, epc; + long long flpins, flpops, ins, ref, core, evt; + + double mflips_classic,mflips_swapped; + double mflops_classic,mflops_swapped; + double ipc_classic,ipc_swapped; + double epc_classic,epc_swapped; + + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Initialize the test matrix */ + flops_float_init_matrix(); + + /************************/ + /* FLIPS */ + /************************/ + + if (!quiet) { + printf( "\n----------------------------------\n" ); + printf( "PAPI_flips\n"); + } + + /* Run flips at start */ + retval=PAPI_flips(&rtime, &ptime, &flpins, &mflips); + if (retval!=PAPI_OK) { + if (!quiet) PAPI_perror( "PAPI_flips" ); + if (retval==PAPI_ENOEVNT) { + test_skip(__FILE__,__LINE__,"Could not add event",0); + } + } + + if (!quiet) { + printf( "\nStart\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Instructions: %lld\n", flpins); + printf( "MFLIPS %f\n", mflips); + } + + /* Be sure we are all zero at beginning */ + if ((rtime!=0) || (ptime!=0) || (flpins!=0) || (mflips!=0)) { + test_fail(__FILE__,__LINE__,"Not initialized to zero",0); + } + + // Flips classic + flops_float_matrix_matrix_multiply(); + if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) + PAPI_perror( "PAPI_flips" ); + + if (!quiet) { + printf( "\nClassic\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Instructions: %lld\n", flpins); + printf( "MFLIPS %f\n", mflips); + } + mflips_classic=mflips; + + // Flips swapped + flops_float_swapped_matrix_matrix_multiply(); + if ( PAPI_flips(&rtime, &ptime, &flpins, &mflips) != PAPI_OK ) + PAPI_perror( "PAPI_flips" ); + + if (!quiet) { + printf( "\nSwapped\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Instructions: %lld\n", flpins); + printf( "MFLIPS %f\n", mflips); + } + mflips_swapped=mflips; + + // turn off flips + if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { + PAPI_perror( "PAPI_stop_counters" ); + } + + + /************************/ + /* FLOPS */ + /************************/ + + if (!quiet) { + printf( "\n----------------------------------\n" ); + printf( "PAPI_flops\n"); + } + + // Start flops + if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) { + PAPI_perror( "PAPI_flops" ); + } + + if (!quiet) { + printf( "\nStart\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Operations: %lld\n", flpops); + printf( "MFLOPS %f\n", mflops); + } + + /* Be sure we are all zero at beginning */ + if ((rtime!=0) || (ptime!=0) || (flpops!=0) || (mflops!=0)) { + test_fail(__FILE__,__LINE__,"Not initialized to zero",0); + } + + // Classic flops + flops_float_matrix_matrix_multiply(); + if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) + PAPI_perror( "PAPI_flops" ); + + if (!quiet) { + printf( "\nClassic\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Operations: %lld\n", flpops); + printf( "MFLOPS %f\n", mflops); + } + mflops_classic=mflops; + + // Swapped flops + flops_float_swapped_matrix_matrix_multiply(); + if ( PAPI_flops(&rtime, &ptime, &flpops, &mflops) != PAPI_OK ) + PAPI_perror( "PAPI_flops" ); + + if (!quiet) { + printf( "\nSwapped\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "FP Operations: %lld\n", flpops); + printf( "MFLOPS %f\n", mflops); + } + mflops_swapped=mflops; + + // turn off flops + if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { + PAPI_perror( "PAPI_stop_counters" ); + } + + + /************************/ + /* IPC */ + /************************/ + + if (!quiet) { + printf( "\n----------------------------------\n" ); + printf( "PAPI_ipc\n"); + } + + // Start ipc + if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) + PAPI_perror( "PAPI_ipc" ); + + if (!quiet) { + printf( "\nStart\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Instructions: %lld\n", ins); + printf( "IPC %f\n", ipc); + } + + /* Be sure we are all zero at beginning */ + if ((rtime!=0) || (ptime!=0) || (ins!=0) || (ipc!=0)) { + test_fail(__FILE__,__LINE__,"Not initialized to zero",0); + } + + // Classic ipc + flops_float_matrix_matrix_multiply(); + if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) + PAPI_perror( "PAPI_ipc" ); + + if (!quiet) { + printf( "\nClassic\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Instructions: %lld\n", ins); + printf( "IPC %f\n", ipc); + } + ipc_classic=ipc; + + // Swapped ipc + flops_float_swapped_matrix_matrix_multiply(); + if ( PAPI_ipc(&rtime, &ptime, &ins, &ipc) != PAPI_OK ) + PAPI_perror( "PAPI_ipc" ); + + if (!quiet) { + printf( "\nSwapped\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Instructions: %lld\n", ins); + printf( "IPC %f\n", ipc); + } + ipc_swapped=ipc; + + // turn off ipc + if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { + PAPI_perror( "PAPI_stop_counters" ); + } + + + /************************/ + /* EPC */ + /************************/ + + if (!quiet) { + printf( "\n----------------------------------\n" ); + printf( "PAPI_epc\n"); + } + + /* This unfortunately conflicts a bit with the TESTS_QUIET */ + /* command line paramater nonsense. */ + + if ( argc >= 2) { + retval = PAPI_event_name_to_code( argv[1], &event ); + if (retval != PAPI_OK) { + if (!quiet) printf("Can't find %s; Using PAPI_TOT_INS\n", argv[1]); + event = PAPI_TOT_INS; + } else { + if (!quiet) printf("Using event %s\n", argv[1]); + } + } + + // Start epc + if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) + PAPI_perror( "PAPI_epc" ); + + if (!quiet) { + printf( "\nStart\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Ref Cycles: %lld\n", ref); + printf( "Core Cycles: %lld\n", core); + printf( "Events: %lld\n", evt); + printf( "EPC: %f\n", epc); + } + + /* Be sure we are all zero at beginning */ + if ((rtime!=0) || (ptime!=0) || (ref!=0) || (core!=0) + || (evt!=0) || (epc!=0)) { + test_fail(__FILE__,__LINE__,"Not initialized to zero",0); + } + + // Classic epc + flops_float_matrix_matrix_multiply(); + if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) + PAPI_perror( "PAPI_epc" ); + + if (!quiet) { + printf( "\nClassic\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Ref Cycles: %lld\n", ref); + printf( "Core Cycles: %lld\n", core); + printf( "Events: %lld\n", evt); + printf( "EPC: %f\n", epc); + } + epc_classic=epc; + + // Swapped epc + flops_float_swapped_matrix_matrix_multiply(); + if ( PAPI_epc(event, &rtime, &ptime, &ref, &core, &evt, &epc) != PAPI_OK ) { + PAPI_perror( "PAPI_epc" ); + } + + if (!quiet) { + printf( "\nSwapped\n"); + printf( "real time: %f\n", rtime); + printf( "process time: %f\n", ptime); + printf( "Ref Cycles: %lld\n", ref); + printf( "Core Cycles: %lld\n", core); + printf( "Events: %lld\n", evt); + printf( "EPC: %f\n", epc); + } + epc_swapped=epc; + + // turn off epc + if ( PAPI_stop_counters(NULL, 0) != PAPI_OK ) { + PAPI_perror( "PAPI_stop_counters" ); + } + + if (!quiet) { + printf( "\n----------------------------------\n" ); + } + + /* Validate */ + if (mflips_swapped +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval, i, j; + const PAPI_hw_info_t *hwinfo = NULL; + const PAPI_mh_info_t *mh; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if (!TESTS_QUIET) { + printf( "Test case hwinfo.c: " + "Check output of PAPI_get_hardware_info.\n"); + } + hwinfo=PAPI_get_hardware_info(); + if ( hwinfo == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + mh = &hwinfo->mem_hierarchy; + + validate_string( hwinfo->vendor_string, "vendor_string" ); + validate_string( hwinfo->model_string, "model_string" ); + + if ( hwinfo->vendor == PAPI_VENDOR_UNKNOWN ) + test_fail( __FILE__, __LINE__, "Vendor unknown", 0 ); + + if ( hwinfo->cpu_max_mhz == 0.0 ) + test_fail( __FILE__, __LINE__, "Mhz unknown", 0 ); + + if ( hwinfo->ncpu < 1 ) + test_fail( __FILE__, __LINE__, "ncpu < 1", 0 ); + + if ( hwinfo->totalcpus < 1 ) + test_fail( __FILE__, __LINE__, "totalcpus < 1", 0 ); + + /* if ( PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ) < 1 ) + test_fail( __FILE__, __LINE__, "get_opt(MAX_HWCTRS) < 1", 0 ); + + if ( PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ) < 1 ) + test_fail( __FILE__, __LINE__, "get_opt(MAX_MPX_CTRS) < 1", 0 );*/ + + if ( mh->levels < 0 ) + test_fail( __FILE__, __LINE__, "max mh level < 0", 0 ); + + if (!TESTS_QUIET) { + printf( "Max level of TLB or Cache: %d\n", mh->levels ); + for ( i = 0; i < mh->levels; i++ ) { + for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) { + const PAPI_mh_cache_info_t *c = &mh->level[i].cache[j]; + const PAPI_mh_tlb_info_t *t = &mh->level[i].tlb[j]; + printf( "Level %d, TLB %d: %d, %d, %d\n", i, j, t->type, + t->num_entries, t->associativity ); + printf( "Level %d, Cache %d: %d, %d, %d, %d, %d\n", i, j, c->type, + c->size, c->line_size, c->num_lines, c->associativity ); + } + } + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/inherit.c b/src/ctests/inherit.c new file mode 100644 index 0000000..df28dac --- /dev/null +++ b/src/ctests/inherit.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include + +#if defined(_AIX) || defined (__FreeBSD__) || defined (__APPLE__) +#include /* ARGH! */ +#else +#include +#endif + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, pid, status, EventSet = PAPI_NULL; + long long int values[] = {0,0}; + PAPI_option_t opt; + char event_name[BUFSIZ]; + int quiet; + + quiet=tests_quiet( argc, argv ); + + if ( ( retval = PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( ( retval = PAPI_assign_eventset_component( EventSet, 0 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", retval ); + + memset( &opt, 0x0, sizeof ( PAPI_option_t ) ); + opt.inherit.inherit = PAPI_INHERIT_ALL; + opt.inherit.eventset = EventSet; + if ( ( retval = PAPI_set_opt( PAPI_INHERIT, &opt ) ) != PAPI_OK ) { + if ( retval == PAPI_ECMP) { + test_skip( __FILE__, __LINE__, "Inherit not supported by current component.\n", retval ); + } else { + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + } + + if ( ( retval = PAPI_query_event( PAPI_TOT_CYC ) ) != PAPI_OK ) { + if (!quiet) printf("Trouble finding PAPI_TOT_CYC\n"); + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + } + + if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + strcpy(event_name,"PAPI_FP_INS"); + retval = PAPI_add_named_event( EventSet, event_name ); + if (retval == PAPI_ENOEVNT) { + strcpy(event_name,"PAPI_TOT_INS"); + retval = PAPI_add_named_event( EventSet, event_name ); + } + + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + pid = fork( ); + if ( pid == 0 ) { + do_flops( NUM_FLOPS ); + exit( 0 ); + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if (!quiet) { + printf( "Test case inherit: parent starts, child works, parent stops.\n" ); + printf( "------------------------------------------------------------\n" ); + + printf( "Test run : \t1\n" ); + printf( "%s : \t%lld\n", event_name, values[1] ); + printf( "PAPI_TOT_CYC: \t%lld\n", values[0] ); + printf( "------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + printf( "Row 1 at least %d\n", NUM_FLOPS ); + printf( "Row 2 greater than row 1\n"); + } + + if ( values[1] < 100 ) { + test_fail( __FILE__, __LINE__, event_name, 1 ); + } + + if ( (!strcmp(event_name,"PAPI_FP_INS")) && (values[1] < NUM_FLOPS)) { + test_fail( __FILE__, __LINE__, "PAPI_FP_INS", 1 ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/ipc.c b/src/ctests/ipc.c new file mode 100644 index 0000000..07ecb46 --- /dev/null +++ b/src/ctests/ipc.c @@ -0,0 +1,78 @@ +/* + * A simple example for the use of PAPI, using PAPI_ipc + * -Kevin London + */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + + +#define INDEX 500 + +int +main( int argc, char **argv ) +{ + extern void dummy( void * ); + float matrixa[INDEX][INDEX], matrixb[INDEX][INDEX], mresult[INDEX][INDEX]; + float real_time, proc_time, ipc; + long long ins; + int retval; + int i, j, k; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Initialize the Matrix arrays */ + for( i = 0; i < INDEX; i++ ) { + for( j= 0; j < INDEX; j++ ) { + mresult[i][j] = 0.0; + matrixa[i][j] = matrixb[i][j] = ( float ) rand( ) * ( float ) 1.1; + } + } + + /* Setup PAPI library and begin collecting data from the counters */ + retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ); + if (retval < PAPI_OK ) { + if (!quiet) printf("Trouble starting IPC\n"); + test_skip( __FILE__, __LINE__, "PAPI_ipc", retval ); + } + + /* Matrix-Matrix multiply */ + for ( i = 0; i < INDEX; i++ ) + for ( j = 0; j < INDEX; j++ ) + for ( k = 0; k < INDEX; k++ ) + mresult[i][j] = mresult[i][j] + matrixa[i][k] * matrixb[k][j]; + + /* Collect the data into the variables passed in */ + if ( ( retval = PAPI_ipc( &real_time, &proc_time, &ins, &ipc ) ) < PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_ipc", retval ); + dummy( ( void * ) mresult ); + + if ( !quiet ) { + printf( "Real_time: %f Proc_time: %f Total ins: ", real_time, + proc_time ); + printf( LLDFMT, ins ); + printf( " IPC: %f\n", ipc ); + } + + /* This should not happen unless the optimizer */ + /* gets too good */ + if (ins < INDEX*INDEX) { + test_fail( __FILE__, __LINE__, "Instruction count too low.", + 5 ); + } + /* Something is broken, or else you have a really */ + /* slow processor */ + if (ipc<0.01 ) { + test_fail( __FILE__, __LINE__, "IPC equals zero.", + 5 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/johnmay2.c b/src/ctests/johnmay2.c new file mode 100644 index 0000000..bb827b5 --- /dev/null +++ b/src/ctests/johnmay2.c @@ -0,0 +1,109 @@ +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int FPEventSet = PAPI_NULL; + long long values; + int PAPI_event, retval; + char event_name[PAPI_MAX_STR_LEN]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Use PAPI_FP_INS if available, otherwise use PAPI_TOT_INS */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) + PAPI_event = PAPI_FP_INS; + else + PAPI_event = PAPI_TOT_INS; + + retval = PAPI_query_event( PAPI_event ); + if (retval != PAPI_OK ) { + if (!quiet) printf("Trouble querying event\n"); + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + } + + /* Create the eventset */ + if ( ( retval = PAPI_create_eventset( &FPEventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* Add event to the eventset */ + if ( ( retval = PAPI_add_event( FPEventSet, PAPI_event ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + /* Start counting */ + if ( ( retval = PAPI_start( FPEventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + /* Try to cleanup while running */ + /* Fail test if this isn't refused */ + if ( ( retval = PAPI_cleanup_eventset( FPEventSet ) ) != PAPI_EISRUN ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + /* Try to destroy eventset while running */ + /* Fail test if this isn't refused */ + if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_EISRUN ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + /* do some work */ + do_flops( 1000000 ); + + /* stop counting */ + if ( ( retval = PAPI_stop( FPEventSet, &values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* Try to destroy eventset without cleaning first */ + /* Fail test if this isn't refused */ + if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_EINVAL ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + /* Try to cleanup eventset. */ + /* This should pass. */ + if ( ( retval = PAPI_cleanup_eventset( FPEventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + /* Try to destroy eventset. */ + /* This should pass. */ + if ( ( retval = PAPI_destroy_eventset( &FPEventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + /* Make sure eventset was set to PAPI_NULL */ + if ( FPEventSet != PAPI_NULL ) + test_fail( __FILE__, __LINE__, "FPEventSet != PAPI_NULL", retval ); + + if ( !quiet ) { + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + printf( "Test case John May 2: cleanup / destroy eventset.\n" ); + printf( "-------------------------------------------------\n" ); + printf( "Test run : \t1\n" ); + printf( "%s : \t", event_name ); + printf( LLDFMT, values ); + printf( "\n" ); + printf( "-------------------------------------------------\n" ); + printf( "The following messages will appear if PAPI is compiled with debug enabled:\n" ); + printf + ( "\tPAPI Error Code -10: PAPI_EISRUN: EventSet is currently counting\n" ); + printf + ( "\tPAPI Error Code -10: PAPI_EISRUN: EventSet is currently counting\n" ); + printf( "\tPAPI Error Code -1: PAPI_EINVAL: Invalid argument\n" ); + } + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/krentel_pthreads.c b/src/ctests/krentel_pthreads.c new file mode 100644 index 0000000..6fbfc88 --- /dev/null +++ b/src/ctests/krentel_pthreads.c @@ -0,0 +1,221 @@ +/* + * Test PAPI with multiple threads. + */ + +#define MAX_THREADS 256 + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define EVENT PAPI_TOT_CYC + +static int program_time = 5; +static int threshold = 20000000; +static int num_threads = 3; + +static long count[MAX_THREADS]; +static long iter[MAX_THREADS]; +static struct timeval last[MAX_THREADS]; + +static pthread_key_t key; + +static struct timeval start; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + long num = ( long ) pthread_getspecific( key ); + + if ( num < 0 || num > num_threads ) + test_fail( __FILE__, __LINE__, "getspecific failed", 1 ); + count[num]++; +} + +static void +print_rate( long num ) +{ + struct timeval now; + long st_secs; + double last_secs; + + gettimeofday( &now, NULL ); + st_secs = now.tv_sec - start.tv_sec; + last_secs = ( double ) ( now.tv_sec - last[num].tv_sec ) + + ( ( double ) ( now.tv_usec - last[num].tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%ld] time = %ld, count = %ld, iter = %ld, " + "rate = %.1f/Kiter\n", + num, st_secs, count[num], iter[num], + ( 1000.0 * ( double ) count[num] ) / ( double ) iter[num] ); + } + + count[num] = 0; + iter[num] = 0; + last[num] = now; +} + +static void +do_cycles( long num, int len ) +{ + struct timeval start, now; + double x, sum; + + gettimeofday( &start, NULL ); + + for ( ;; ) { + sum = 1.0; + for ( x = 1.0; x < 250000.0; x += 1.0 ) + sum += x; + if ( sum < 0.0 ) + printf( "==>> SUM IS NEGATIVE !! <<==\n" ); + + iter[num]++; + + gettimeofday( &now, NULL ); + if ( now.tv_sec >= start.tv_sec + len ) + break; + } +} + +static void * +my_thread( void *v ) +{ + long num = ( long ) v; + int n; + int EventSet = PAPI_NULL; + long long value; + + int retval; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + pthread_setspecific( key, v ); + + count[num] = 0; + iter[num] = 0; + last[num] = start; + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset failed", retval ); + } + + retval = PAPI_add_event( EventSet, EVENT ); + if (retval != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble adding event\n"); + test_fail( __FILE__, __LINE__, "PAPI_add_event failed", retval ); + } + + if ( PAPI_overflow( EventSet, EVENT, threshold, 0, my_handler ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow failed", 1 ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start failed", 1 ); + + if (!TESTS_QUIET) printf( "launched timer in thread %ld\n", num ); + + for ( n = 1; n <= program_time; n++ ) { + do_cycles( num, 1 ); + print_rate( num ); + } + + PAPI_stop( EventSet, &value ); + + retval = PAPI_overflow( EventSet, EVENT, 0, 0, my_handler); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow failed to reset the overflow handler", retval ); + + if ( PAPI_remove_event( EventSet, EVENT ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event", 1 ); + + if ( PAPI_destroy_eventset( &EventSet ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", 1 ); + + if ( PAPI_unregister_thread( ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", 1 ); + + return ( NULL ); +} + +int +main( int argc, char **argv ) +{ + pthread_t *td = NULL; + long n; + int quiet,retval; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + if ( argc < 2 || sscanf( argv[1], "%d", &program_time ) < 1 ) + program_time = 6; + if ( argc < 3 || sscanf( argv[2], "%d", &threshold ) < 1 ) + threshold = 20000000; + if ( argc < 4 || sscanf( argv[3], "%d", &num_threads ) < 1 ) + num_threads = 3; + + td = malloc((num_threads+1) * sizeof(pthread_t)); + if (!td) { + test_fail( __FILE__, __LINE__, "td malloc failed", 1 ); + } + + if (!quiet) { + printf( "program_time = %d, threshold = %d, num_threads = %d\n\n", + program_time, threshold, num_threads ); + } + + if ( PAPI_library_init( PAPI_VER_CURRENT ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init failed", 1 ); + + /* Test to be sure we can add events */ + retval = PAPI_query_event( EVENT ); + if (retval!=PAPI_OK) { + if (!quiet) printf("Trouble finding event\n"); + test_skip(__FILE__,__LINE__,"Event not available",1); + } + + if ( PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ) != + PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init failed", 1 ); + + if ( pthread_key_create( &key, NULL ) != 0 ) + test_fail( __FILE__, __LINE__, "pthread key create failed", 1 ); + + gettimeofday( &start, NULL ); + + for ( n = 1; n <= num_threads; n++ ) { + if ( pthread_create( &(td[n]), NULL, my_thread, ( void * ) n ) != 0 ) + test_fail( __FILE__, __LINE__, "pthread create failed", 1 ); + } + + my_thread( ( void * ) 0 ); + + /* wait for all the threads */ + for ( n = 1; n <= num_threads; n++ ) { + if ( pthread_join( td[n], NULL)) + test_fail( __FILE__, __LINE__, "pthread join failed", 1 ); + } + + free(td); + + if (!quiet) printf( "done\n" ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/kufrin.c b/src/ctests/kufrin.c new file mode 100644 index 0000000..354fe05 --- /dev/null +++ b/src/ctests/kufrin.c @@ -0,0 +1,200 @@ +/* +* File: multiplex1_pthreads.c +* Author: Rick Kufrin +* rkufrin@ncsa.uiuc.edu +* Mods: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file really bangs on the multiplex pthread functionality */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +static int *events; +static int numevents = 0; +static int max_events=0; + +double +loop( long n ) +{ + long i; + double a = 0.0012; + + for ( i = 0; i < n; i++ ) { + a += 0.01; + } + return a; +} + +void * +thread( void *arg ) +{ + ( void ) arg; /*unused */ + int eventset = PAPI_NULL; + long long *values; + + int ret = PAPI_register_thread( ); + if ( ret != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_register_thread", ret ); + ret = PAPI_create_eventset( &eventset ); + if ( ret != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", ret ); + + values=calloc(max_events,sizeof(long long)); + + if (!TESTS_QUIET) printf( "Event set %d created\n", eventset ); + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + ret = PAPI_assign_eventset_component( eventset, 0 ); + if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", ret ); + } + + ret = PAPI_set_multiplex( eventset ); + if ( ret == PAPI_ENOSUPP) { + test_skip( __FILE__, __LINE__, "Multiplexing not supported", 1 ); + } + else if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", ret ); + } + + ret = PAPI_add_events( eventset, events, numevents ); + if ( ret < PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_events", ret ); + } + + ret = PAPI_start( eventset ); + if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", ret ); + } + + do_stuff( ); + + ret = PAPI_stop( eventset, values ); + if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", ret ); + } + + ret = PAPI_cleanup_eventset( eventset ); + if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", ret ); + } + + ret = PAPI_destroy_eventset( &eventset ); + if ( ret != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); + } + + ret = PAPI_unregister_thread( ); + if ( ret != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); + return ( NULL ); +} + +int +main( int argc, char **argv ) +{ + int nthreads = 8, retval, i; + PAPI_event_info_t info; + pthread_t *threads; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if ( !quiet ) { + if ( argc > 1 ) { + int tmp = atoi( argv[1] ); + if ( tmp >= 1 ) + nthreads = tmp; + } + } + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) pthread_self ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval ); + } + + if ((max_events = PAPI_get_cmp_opt(PAPI_MAX_MPX_CTRS,NULL,0)) <= 0) { + test_fail( __FILE__, __LINE__, "PAPI_get_cmp_opt", max_events ); + } + + if ((events = calloc(max_events,sizeof(int))) == NULL) { + test_fail( __FILE__, __LINE__, "calloc", PAPI_ESYS ); + } + + /* Fill up the event set with as many non-derived events as we can */ + + i = PAPI_PRESET_MASK; + do { + if ( PAPI_get_event_info( i, &info ) == PAPI_OK ) { + if ( info.count == 1 ) { + events[numevents++] = ( int ) info.event_code; + if (!quiet) printf( "Added %s\n", info.symbol ); + } else { + if (!quiet) printf( "Skipping derived event %s\n", info.symbol ); + } + } + } while ( ( PAPI_enum_event( &i, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK ) + && ( numevents < max_events ) ); + + if (!quiet) printf( "Found %d events\n", numevents ); + + if (numevents==0) { + test_skip(__FILE__,__LINE__,"No events found",0); + } + + do_stuff( ); + + if (!quiet) printf( "Creating %d threads:\n", nthreads ); + + threads = + ( pthread_t * ) malloc( ( size_t ) nthreads * sizeof ( pthread_t ) ); + if ( threads == NULL ) { + test_fail( __FILE__, __LINE__, "malloc", PAPI_ENOMEM ); + } + + /* Create the threads */ + for ( i = 0; i < nthreads; i++ ) { + retval = pthread_create( &threads[i], NULL, thread, NULL ); + if ( retval != 0 ) { + test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + } + } + + /* Wait for thread completion */ + for ( i = 0; i < nthreads; i++ ) { + retval = pthread_join( threads[i], NULL ); + if ( retval != 0 ) { + test_fail( __FILE__, __LINE__, "pthread_join", PAPI_ESYS ); + } + } + + if (!quiet) printf( "Done." ); + + test_pass( __FILE__ ); + + pthread_exit( NULL ); + + return 0; +} diff --git a/src/ctests/locks_pthreads.c b/src/ctests/locks_pthreads.c new file mode 100644 index 0000000..e88e4f4 --- /dev/null +++ b/src/ctests/locks_pthreads.c @@ -0,0 +1,127 @@ +/* This file checks to make sure the locking mechanisms work correctly */ +/* on the platform. */ +/* Platforms where the locking mechanisms are not implemented or are */ +/* incorrectly implemented will fail. -KSL */ + +#define MAX_THREADS 256 +#define APPR_TOTAL_ITER 1000000 + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +volatile long long count = 0; +volatile long long tmpcount = 0; +volatile long long thread_iter = 0; + +static int quiet=0; + +void +lockloop( int iters, volatile long long *mycount ) +{ + int i; + for ( i = 0; i < iters; i++ ) { + PAPI_lock( PAPI_USR1_LOCK ); + *mycount = *mycount + 1; + PAPI_unlock( PAPI_USR1_LOCK ); + } +} + +void * +Slave( void *arg ) +{ + long long duration; + + duration = PAPI_get_real_usec( ); + lockloop( thread_iter, &count ); + duration = PAPI_get_real_usec( ) - duration; + + if (!quiet) { + printf("%f lock/unlocks per us\n", + (float)thread_iter/(float)duration); + } + pthread_exit( arg ); +} + + +int +main( int argc, char **argv ) +{ + pthread_t slaves[MAX_THREADS]; + int rc, i, nthr; + int retval; + const PAPI_hw_info_t *hwinfo = NULL; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hwinfo = PAPI_get_hardware_info( ); + if (hwinfo == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + retval = PAPI_thread_init((unsigned long (*)(void)) ( pthread_self ) ); + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) { + test_skip( __FILE__, __LINE__, + "PAPI_thread_init", retval ); + } + else { + test_fail( __FILE__, __LINE__, + "PAPI_thread_init", retval ); + } + } + + if ( hwinfo->ncpu > MAX_THREADS ) { + nthr = MAX_THREADS; + } + else { + nthr = hwinfo->ncpu; + } + + /* Scale the per thread work to keep the serial runtime about the same. */ + thread_iter = APPR_TOTAL_ITER/sqrt(nthr); + + if (!quiet) { + printf( "Creating %d threads, %lld lock/unlock\n", + nthr , thread_iter); + } + + for ( i = 0; i < nthr; i++ ) { + rc = pthread_create( &slaves[i], NULL, Slave, NULL ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, + "pthread_create", retval ); + } + } + + for ( i = 0; i < nthr; i++ ) { + pthread_join( slaves[i], NULL ); + } + + if (!quiet) { + printf( "Expected: %lld Received: %lld\n", + ( long long ) nthr * thread_iter, + count ); + } + + if ( nthr * thread_iter != count ) { + test_fail( __FILE__, __LINE__, "Thread Locks", 1 ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/low-level.c b/src/ctests/low-level.c new file mode 100644 index 0000000..83aee55 --- /dev/null +++ b/src/ctests/low-level.c @@ -0,0 +1,193 @@ +/* This examples show the essentials in using the PAPI low-level + interface. The program consists of 3 examples where the work + done over some work-loops. The example tries to illustrate + some simple mistakes that are easily made and how a correct + code would accomplish the same thing. + + Example 1: The total count over two work loops (Loops 1 and 2) + are supposed to be measured. Due to a mis-understanding of the + semantics of the API the total count gets wrong. + The example also illustrates that it is legal to read both + running and stopped counters. + + Example 2: The total count over two work loops (Loops 1 and 3) + is supposed to be measured while discarding the counts made in + loop 2. Instead the counts in loop1 are counted twice and the + counts in loop2 are added to the total number of counts. + + Example 3: One correct way of accomplishing the result aimed for + in example 2. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define NUM_EVENTS 2 + +int +main( int argc, char **argv ) +{ + int retval; + long long values[NUM_EVENTS], dummyvalues[NUM_EVENTS]; + int Events[NUM_EVENTS]; + int EventSet = PAPI_NULL; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* query and set up the right events to monitor */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + Events[0] = PAPI_FP_INS; + Events[1] = PAPI_TOT_CYC; + } else { + Events[0] = PAPI_TOT_INS; + Events[1] = PAPI_TOT_CYC; + } + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + retval = PAPI_add_events( EventSet, ( int * ) Events, NUM_EVENTS ); + if (retval < PAPI_OK ) { + if (!quiet) printf("Trouble adding events\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_events", retval ); + } + + if ( !quiet ) { + printf( "\n Incorrect usage of read and accum.\n" ); + printf( " Some cycles are counted twice\n" ); + } + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + /* Loop 1 */ + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); + + /* Loop 2 */ + do_flops( NUM_FLOPS ); + + /* Using PAPI_accum here is incorrect. The result is that Loop 1 * + * is being counted twice */ + if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters being accumulated)\n" ); + + /* Loop 3 */ + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_stop( EventSet, dummyvalues ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if ( !quiet ) { + printf( TWO12, dummyvalues[0], dummyvalues[1], + "(Reading stopped counters)\n" ); + + printf( TWO12, values[0], values[1], "" ); + + printf( "\n Incorrect usage of read and accum.\n" ); + printf( " Another incorrect use\n" ); + } + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + /* Loop 1 */ + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters continuing...)\n" ); + + /* Loop 2 */ + /* Code that should not be counted */ + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if ( !quiet ) + printf( TWO12, dummyvalues[0], dummyvalues[1], + "(Intermediate counts...)\n" ); + + /* Loop 3 */ + do_flops( NUM_FLOPS ); + + /* Since PAPI_read does not reset the counters it's use above after * + * loop 2 is incorrect. Instead Loop1 will in effect be counted twice. * + * and the counts in loop 2 are included in the total counts */ + if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); + if ( !quiet ) + printf( TWO12, values[0], values[1], "" ); + + if ( ( retval = PAPI_stop( EventSet, dummyvalues ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !quiet ) { + printf( "\n Correct usage of read and accum.\n" ); + printf( " PAPI_reset and PAPI_accum used to skip counting\n" ); + printf( " a section of the code.\n" ); + } + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_read( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + if ( !quiet ) + printf( TWO12, values[0], values[1], "(Counters continuing)\n" ); + + /* Code that should not be counted */ + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_reset( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + + if ( !quiet ) + printf( "%12s %12s (Counters reset)\n", "", "" ); + + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_accum( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); + + if ( !quiet ) + printf( TWO12, values[0], values[1], "" ); + + if ( !quiet ) { + printf( "----------------------------------\n" ); + printf( "Verification: The last line in each experiment should be\n" ); + printf( "approximately twice the value of the first line.\n" ); + printf + ( "The third case illustrates one possible way to accomplish this.\n" ); + } + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/matrix-hl.c b/src/ctests/matrix-hl.c new file mode 100644 index 0000000..c069bee --- /dev/null +++ b/src/ctests/matrix-hl.c @@ -0,0 +1,140 @@ +/**************************************************************************** + *C + *C matrix-hl.f + *C An example of matrix-matrix multiplication and using PAPI high level + *C to look at the performance. written by Kevin London + *C March 2000 + *C Added to c tests to check stop + *C**************************************************************************** + */ + + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + +#define NROWS1 175 +#define NCOLS1 225 +#define NROWS2 NCOLS1 +#define NCOLS2 150 + double p[NROWS1][NCOLS1], q[NROWS2][NCOLS2], r[NROWS1][NCOLS2]; + int i, j, k, num_events, retval; + /* PAPI standardized event to be monitored */ + int event[2]; + /* PAPI values of the counters */ + long long values[2], tmp; + int quiet; + + quiet = tests_quiet( argc, argv ); + + /* Setup default values */ + num_events = 0; + + /* See how many hardware events at one time are supported + * This also initializes the PAPI library */ + num_events = PAPI_num_counters( ); + if ( num_events < 2 ) { + if (!quiet) printf( "This example program requries the architecture to " + "support 2 simultaneous hardware events...shutting down.\n" ); + test_skip( __FILE__, __LINE__, "PAPI_num_counters", 1 ); + } + + if ( !quiet ) + printf( "Number of hardware counters supported: %d\n", num_events ); + + if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) + event[0] = PAPI_FP_OPS; + else if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) + event[0] = PAPI_FP_INS; + else + event[0] = PAPI_TOT_INS; + + /* Time used */ + event[1] = PAPI_TOT_CYC; + + /* matrix 1: read in the matrix values */ + for ( i = 0; i < NROWS1; i++ ) + for ( j = 0; j < NCOLS1; j++ ) + p[i][j] = i * j * 1.0; + + for ( i = 0; i < NROWS2; i++ ) + for ( j = 0; j < NCOLS2; j++ ) + q[i][j] = i * j * 1.0; + + for ( i = 0; i < NROWS1; i++ ) + for ( j = 0; j < NCOLS2; j++ ) + r[i][j] = i * j * 1.0; + + /* Set up the counters */ + num_events = 2; + retval = PAPI_start_counters( event, num_events ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); + + /* Clear the counter values */ + retval = PAPI_read_counters( values, num_events ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); + + /* Compute the matrix-matrix multiplication */ + for ( i = 0; i < NROWS1; i++ ) + for ( j = 0; j < NCOLS2; j++ ) + for ( k = 0; k < NCOLS1; k++ ) + r[i][j] = r[i][j] + p[i][k] * q[k][j]; + + /* Stop the counters and put the results in the array values */ + retval = PAPI_stop_counters( values, num_events ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); + + /* Make sure the compiler does not optimize away the multiplication + * with dummy(r); + */ + dummy( r ); + + if ( !quiet ) { + if ( event[0] == PAPI_TOT_INS ) { + printf( TAB1, "TOT Instructions:", values[0] ); + } else { + printf( TAB1, "FP Instructions:", values[0] ); + } + printf( TAB1, "Cycles:", values[1] ); + } + + /* + * Intel Core overreports flops by 50% when using -O + * Use -O2 or -O3 to produce the expected # of flops + */ + + if ( event[0] == PAPI_FP_INS ) { + /* Compare measured FLOPS to expected value */ + tmp = + 2 * ( long long ) ( NROWS1 ) * ( long long ) ( NCOLS2 ) * + ( long long ) ( NCOLS1 ); + if ( abs( ( int ) values[0] - ( int ) tmp ) > ( double ) tmp * 0.05 ) { + /* Maybe we are counting FMAs? */ + tmp = tmp / 2; + if ( abs( ( int ) values[0] - ( int ) tmp ) > + ( double ) tmp * 0.05 ) { + printf( "\n" TAB1, "Expected operation count: ", 2 * tmp ); + printf( TAB1, "Or possibly (using FMA): ", tmp ); + printf( TAB1, "Instead I got: ", values[0] ); + test_fail( __FILE__, __LINE__, + "Unexpected FLOP count (check vector operations)", + 1 ); + } + } + } + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/max_multiplex.c b/src/ctests/max_multiplex.c new file mode 100644 index 0000000..3b70921 --- /dev/null +++ b/src/ctests/max_multiplex.c @@ -0,0 +1,100 @@ +/* this tests attempts to add the maximum number of pre-defined events */ +/* to a multiplexed event set. This tests that we properly set the */ +/* maximum events value. */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main(int argc, char **argv) { + + int retval,max_multiplex,i,EventSet=PAPI_NULL; + PAPI_event_info_t info; + int added=0; + int events_tried=0; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* Initialize the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "Multiplex not supported", 1); + } + + max_multiplex=PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ); + + if (!TESTS_QUIET) { + printf("Maximum multiplexed counters=%d\n",max_multiplex); + } + + if (!TESTS_QUIET) { + printf("Trying to multiplex as many as possible:\n"); + } + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + } + + retval = PAPI_set_multiplex( EventSet ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_create_multiplex", retval ); + } + + + i = 0 | PAPI_PRESET_MASK; + PAPI_enum_event( &i, PAPI_ENUM_FIRST ); + do { + retval = PAPI_get_event_info( i, &info ); + if (retval==PAPI_OK) { + if (!TESTS_QUIET) printf("Adding %s: ",info.symbol); + } + + retval = PAPI_add_event( EventSet, info.event_code ); + if (retval!=PAPI_OK) { + if (!TESTS_QUIET) printf("Fail!\n"); + } + else { + if (!TESTS_QUIET) printf("Success!\n"); + added++; + } + events_tried++; + + } while (PAPI_enum_event( &i, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK ); + + PAPI_shutdown( ); + + if (!TESTS_QUIET) { + printf("Added %d of theoretical max %d\n",added,max_multiplex); + } + + if (events_tried +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OUT_FMT "%12d\t%12lld\t%12lld\t%.2f\n" + + +int +main( int argc, char **argv ) +{ + int retval, i, j; + int EventSet = PAPI_NULL; + long long values[2]; + const PAPI_hw_info_t *hwinfo = NULL; + char descr[PAPI_MAX_STR_LEN]; + PAPI_event_info_t evinfo; + PAPI_mh_level_t *L; + + + const int eventlist[] = { + PAPI_L1_DCA, + PAPI_L1_DCM, + PAPI_L1_DCH, + PAPI_L2_DCA, + PAPI_L2_DCM, + PAPI_L2_DCH, +#if 0 + PAPI_L1_LDM, + PAPI_L1_STM, + PAPI_L1_DCR, + PAPI_L1_DCW, + PAPI_L1_ICM, + PAPI_L1_TCM, + PAPI_LD_INS, + PAPI_SR_INS, + PAPI_LST_INS, + PAPI_L2_DCR, + PAPI_L2_DCW, + PAPI_CSR_TOT, + PAPI_MEM_SCY, + PAPI_MEM_RCY, + PAPI_MEM_WCY, + PAPI_L1_ICH, + PAPI_L1_ICA, + PAPI_L1_ICR, + PAPI_L1_ICW, + PAPI_L1_TCH, + PAPI_L1_TCA, + PAPI_L1_TCR, + PAPI_L1_TCW, + PAPI_L2_DCM, + PAPI_L2_ICM, + PAPI_L2_TCM, + PAPI_L2_LDM, + PAPI_L2_STM, + PAPI_L2_DCH, + PAPI_L2_DCA, + PAPI_L2_DCR, + PAPI_L2_DCW, + PAPI_L2_ICH, + PAPI_L2_ICA, + PAPI_L2_ICR, + PAPI_L2_ICW, + PAPI_L2_TCH, + PAPI_L2_TCA, + PAPI_L2_TCR, + PAPI_L2_TCW, +#endif + 0 + }; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* Extract and report the cache information */ + L = ( PAPI_mh_level_t * ) ( hwinfo->mem_hierarchy.level ); + for ( i = 0; i < hwinfo->mem_hierarchy.levels; i++ ) { + for ( j = 0; j < 2; j++ ) { + int tmp; + + tmp = PAPI_MH_CACHE_TYPE( L[i].cache[j].type ); + if ( tmp == PAPI_MH_TYPE_UNIFIED ) { + if (!TESTS_QUIET) printf( "L%d Unified ", i + 1 ); + } else if ( tmp == PAPI_MH_TYPE_DATA ) { + if (!TESTS_QUIET) printf( "L%d Data ", i + 1 ); + } else if ( tmp == PAPI_MH_TYPE_INST ) { + if (!TESTS_QUIET) printf( "L%d Instruction ", i + 1 ); + } else if ( tmp == PAPI_MH_TYPE_VECTOR ) { + if (!TESTS_QUIET) printf( "L%d Vector ", i + 1 ); + } else if ( tmp == PAPI_MH_TYPE_TRACE ) { + if (!TESTS_QUIET) printf( "L%d Trace ", i + 1 ); + } else if ( tmp == PAPI_MH_TYPE_EMPTY ) { + break; + } else { + test_fail( __FILE__, __LINE__, + "PAPI_get_hardware_info", + PAPI_EBUG ); + } + + tmp = PAPI_MH_CACHE_WRITE_POLICY( L[i].cache[j].type ); + if ( tmp == PAPI_MH_TYPE_WB ) { + if (!TESTS_QUIET) printf( "Write back " ); + } else if ( tmp == PAPI_MH_TYPE_WT ) { + if (!TESTS_QUIET) printf( "Write through " ); + } else { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", + PAPI_EBUG ); + } + + tmp = PAPI_MH_CACHE_REPLACEMENT_POLICY( L[i].cache[j].type ); + if ( tmp == PAPI_MH_TYPE_PSEUDO_LRU ) { + if (!TESTS_QUIET) printf( "Pseudo LRU policy " ); + } else if ( tmp == PAPI_MH_TYPE_LRU ) { + if (!TESTS_QUIET) printf( "LRU policy " ); + } else if ( tmp == PAPI_MH_TYPE_UNKNOWN ) { + if (!TESTS_QUIET) printf( "Unknown policy " ); + } else { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", + PAPI_EBUG ); + } + + if (!TESTS_QUIET) { + printf( "Cache:\n" ); + if ( L[i].cache[j].type ) { + printf( " Total size: %dKB\n" + " Line size: %dB\n" + " Number of Lines: %d\n" + " Associativity: %d\n\n", + ( L[i].cache[j].size ) >> 10, + L[i].cache[j].line_size, + L[i].cache[j].num_lines, + L[i].cache[j].associativity ); + } + } + } + } + + for ( i = 0; eventlist[i] != 0; i++ ) { + if (PAPI_event_code_to_name( eventlist[i], descr ) != PAPI_OK) + continue; + if ( PAPI_add_event( EventSet, eventlist[i] ) != PAPI_OK ) + continue; + + if ( PAPI_get_event_info( eventlist[i], &evinfo ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + if (!TESTS_QUIET) { + printf( "\nEvent: %s\nShort: %s\nLong: %s\n\n", + evinfo.symbol, evinfo.short_descr, + evinfo.long_descr ); + printf( " Bytes\t\tCold\t\tWarm\tPercent\n" ); + } + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + for ( j = 512; j <= 16 * ( 1024 * 1024 ); j = j * 2 ) { + do_misses( 1, j ); + do_flush( ); + + if ( ( retval = PAPI_reset( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + + do_misses( 1, j ); + + if ( ( retval = PAPI_read( EventSet, &values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + if ( ( retval = PAPI_reset( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + + do_misses( 1, j ); + + if ( ( retval = PAPI_read( EventSet, &values[1] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if (!TESTS_QUIET) { + printf( OUT_FMT, j, + values[0], values[1], + ( ( float ) values[1] / + ( float ) ( ( values[0] !=0 ) ? + values[0] : 1 ) * 100.0 ) ); + } + } + + if ( ( retval = PAPI_stop( EventSet, NULL ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( ( retval = + PAPI_remove_event( EventSet, eventlist[i] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event", retval ); + } + + if ( ( retval = PAPI_destroy_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/mendes-alt.c b/src/ctests/mendes-alt.c new file mode 100644 index 0000000..2792928 --- /dev/null +++ b/src/ctests/mendes-alt.c @@ -0,0 +1,188 @@ +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#ifdef SETMAX +#define MAX SETMAX +#else +#define MAX 10000 +#endif +#define TIMES 1000 + +#define PAPI_MAX_EVENTS 2 +long long PAPI_values1[PAPI_MAX_EVENTS]; +long long PAPI_values2[PAPI_MAX_EVENTS]; +long long PAPI_values3[PAPI_MAX_EVENTS]; +static int EventSet = PAPI_NULL; + +void funcX( double a[MAX], double b[MAX], int n) +{ + int i, k; + for ( k = 0; k < TIMES; k++ ) + for ( i = 0; i < n; i++ ) + a[i] = a[i] * b[i] + 1.; +} + +void funcA( double a[MAX], double b[MAX], int n) +{ + int i, k; + double t[MAX]; + for ( k = 0; k < TIMES; k++ ) + for ( i = 0; i < n; i++ ) { + t[i] = b[n - i]; + b[i] = a[n - i]; + a[i] = t[i]; + } +} + +int +main( int argc, char **argv ) +{ + int i, retval; + double a[MAX], b[MAX]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + for ( i = 0; i < MAX; i++ ) { + a[i] = 0.0; + b[i] = 0.; + } + + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) + PAPI_values1[i] = PAPI_values2[i] = 0; + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + +#ifdef MULTIPLEX + if ( !quiet ) { + printf( "Activating PAPI Multiplex\n" ); + } + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI multiplex init fail\n", + retval ); + } + +#endif + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI set event fail\n", retval ); + +#ifdef MULTIPLEX + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + retval = PAPI_set_multiplex( EventSet ); + if (retval == PAPI_ENOSUPP) { + test_skip( __FILE__, __LINE__, "Multiplex not supported", 1 ); + } + else if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex fails \n", retval ); +#endif + + retval = PAPI_add_event( EventSet, PAPI_FP_INS ); + if ( retval < PAPI_OK ) { + retval = PAPI_add_event( EventSet, PAPI_TOT_INS ); + if ( retval < PAPI_OK ) { + if (!quiet) printf("Trouble adding events\n"); + test_skip( __FILE__, __LINE__, + "PAPI add PAPI_FP_INS or PAPI_TOT_INS fail\n", retval ); + } else if ( !quiet ) { + printf( "PAPI_TOT_INS\n" ); + } + } else if ( !quiet ) { + printf( "PAPI_FP_INS\n" ); + } + + retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ); + if ( retval < PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI add PAPI_TOT_CYC fail\n", + retval ); + if ( !quiet ) { + printf( "PAPI_TOT_CYC\n" ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI start fail\n", retval ); + + funcX( a, b, MAX ); + + retval = PAPI_read( EventSet, PAPI_values1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI read fail \n", retval ); + + funcX( a, b, MAX ); + + retval = PAPI_read( EventSet, PAPI_values2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI read fail \n", retval ); + +#ifdef RESET + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI read fail \n", retval ); +#endif + + funcA( a, b, MAX ); + + retval = PAPI_stop( EventSet, PAPI_values3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI read fail \n", retval ); + + if ( !quiet ) { + printf( "values1 is:\n" ); + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) + printf( LLDFMT15, PAPI_values1[i] ); + + printf( "\nvalues2 is:\n" ); + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) + printf( LLDFMT15, PAPI_values2[i] ); + printf( "\nvalues3 is:\n" ); + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) + printf( LLDFMT15, PAPI_values3[i] ); + +#ifndef RESET + printf( "\nPAPI value (2-1) is : \n" ); + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) + printf( LLDFMT15, PAPI_values2[i] - PAPI_values1[i] ); + printf( "\nPAPI value (3-2) is : \n" ); + for ( i = 0; i < PAPI_MAX_EVENTS; i++ ) { + long long diff; + diff = PAPI_values3[i] - PAPI_values2[i]; + printf( LLDFMT15, diff); + if (diff<0) { + test_fail( __FILE__, __LINE__, "Multiplexed counter decreased", 1 ); + } + } +#endif + + printf( "\n\nVerification:\n" ); + printf( "From start to first PAPI_read %d fp operations are made.\n", + 2 * MAX * TIMES ); + printf( "Between 1st and 2nd PAPI_read %d fp operations are made.\n", + 2 * MAX * TIMES ); + printf( "Between 2nd and 3rd PAPI_read %d fp operations are made.\n", + 0 ); + printf( "\n" ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/mpifirst.c b/src/ctests/mpifirst.c new file mode 100644 index 0000000..f77d38f --- /dev/null +++ b/src/ctests/mpifirst.c @@ -0,0 +1,177 @@ +/* This file performs the following test: start, read, stop and again functionality + + - It attempts to use the following three counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS or PAPI_TOT_INS if PAPI_FP_INS doesn't exist + + PAPI_TOT_CYC + - Start counters + - Do flops + - Read counters + - Reset counters + - Do flops + - Read counters + - Do flops + - Read counters + - Do flops + - Stop and read counters + - Read counters +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 5, num_events, tmp; + long long **values; + int EventSet = PAPI_NULL; + int PAPI_event, mask; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + + MPI_Init( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + /* query and set up the right instruction to monitor */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + PAPI_event = PAPI_FP_INS; + mask = MASK_FP_INS | MASK_TOT_CYC; + } else { + PAPI_event = PAPI_TOT_INS; + mask = MASK_TOT_INS | MASK_TOT_CYC; + } + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + EventSet = add_test_events( &num_events, &mask ); + + values = allocate_test_space( num_tests, num_events ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_read( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_read( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_read( EventSet, values[2] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[3] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_read( EventSet, values[4] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + remove_test_events( &EventSet, mask ); + + if ( !quiet ) { + printf( "Test case 1: Non-overlapping start, stop, read.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t1\t\t2\t\t3\t\t4\t\t5\n" ); + sprintf( add_event_str, "%s : ", event_name ); + printf( TAB5, add_event_str, + ( values[0] )[0], ( values[1] )[0], ( values[2] )[0], + ( values[3] )[0], ( values[4] )[0] ); + printf( TAB5, "PAPI_TOT_CYC: ", ( values[0] )[1], ( values[1] )[1], + ( values[2] )[1], ( values[3] )[1], ( values[4] )[1] ); + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + printf( "Column 1 approximately equals column 2\n" ); + printf( "Column 3 approximately equals 2 * column 2\n" ); + printf( "Column 4 approximately equals 3 * column 2\n" ); + printf( "Column 4 exactly equals column 5\n" ); + } + + { + long long min, max; + min = ( long long ) ( values[1][0] * .9 ); + max = ( long long ) ( values[1][0] * 1.1 ); + + if ( values[0][0] > max || values[0][0] < min || + values[2][0] > ( 2 * max ) + || values[2][0] < ( 2 * min ) || values[3][0] > ( 3 * max ) + || values[3][0] < ( 3 * min ) + || values[3][0] != values[4][0] ) { + printf( "min: " ); + printf( LLDFMT, min ); + printf( "max: " ); + printf( LLDFMT, max ); + printf( "1st: " ); + printf( LLDFMT, values[0][0] ); + printf( "2nd: " ); + printf( LLDFMT, values[1][0] ); + printf( "3rd: " ); + printf( LLDFMT, values[2][0] ); + printf( "4th: " ); + printf( LLDFMT, values[3][0] ); + printf( "5th: " ); + printf( LLDFMT, values[4][0] ); + printf( "\n" ); + test_fail( __FILE__, __LINE__, event_name, 1 ); + } + + min = ( long long ) ( values[1][1] * .9 ); + max = ( long long ) ( values[1][1] * 1.1 ); + if ( values[0][1] > max || values[0][1] < min || + values[2][1] > ( 2 * max ) + || values[2][1] < ( 2 * min ) || values[3][1] > ( 3 * max ) + || values[3][1] < ( 3 * min ) + || values[3][1] != values[4][1] ) { + test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); + } + } + test_pass( __FILE__, values, num_tests ); + + MPI_Finalize( ); + exit( 1 ); +} diff --git a/src/ctests/multiattach.c b/src/ctests/multiattach.c new file mode 100644 index 0000000..2dccc65 --- /dev/null +++ b/src/ctests/multiattach.c @@ -0,0 +1,379 @@ +/* This file performs the following test: start, stop and timer functionality for + multiple attached processes. + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#ifdef _AIX +#define _LINUX_SOURCE_COMPAT +#endif + +#if defined(__FreeBSD__) +# define PTRACE_ATTACH PT_ATTACH +# define PTRACE_CONT PT_CONTINUE +#endif + +#define MULTIPLIER 5 + +static int +wait_for_attach_and_loop( int num ) +{ + kill( getpid( ), SIGSTOP ); + do_flops( NUM_FLOPS * num ); + kill( getpid( ), SIGSTOP ); + return 0; +} + +int +main( int argc, char **argv ) +{ + int status, retval, num_tests = 2, tmp; + int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; + int PAPI_event, PAPI_event2, mask1, mask2; + int num_events1, num_events2; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + const PAPI_component_info_t *cmpinfo; + pid_t pid, pid2; + double ratio1,ratio2; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* Initialize the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* get the component info and check if we support attach */ + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); + } + + if ( cmpinfo->attach == 0 ) { + test_skip( __FILE__, __LINE__, + "Platform does not support attaching", 0 ); + } + + /* fork off first child */ + pid = fork( ); + if ( pid < 0 ) { + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + } + if ( pid == 0 ) { + exit( wait_for_attach_and_loop( 1 ) ); + } + + /* fork off second child, does twice as much */ + pid2 = fork( ); + if ( pid2 < 0 ) { + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + } + if ( pid2 == 0 ) { + exit( wait_for_attach_and_loop( MULTIPLIER ) ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + EventSet2 = add_two_events( &num_events2, &PAPI_event2, &mask2 ); + + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_ATTACH, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1 ; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didnt return true to WIFSTOPPED", 0 ); + } + + if ( ptrace( PTRACE_ATTACH, pid2, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid2, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didnt return true to WIFSTOPPED", 0 ); + } + } + + retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + } + + retval = PAPI_attach( EventSet2, ( unsigned long ) pid2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + } + + strcpy(event_name, "PAPI_TOT_INS"); + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + /* num_events1 is greater than num_events2 so don't worry. */ + + values = allocate_test_space( num_tests, num_events1 ); + + /* Gather before values */ + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + elapsed_virt_us = PAPI_get_virt_usec( ); + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + /* Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + + if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( waitpid( pid2, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + /* start measuring in first child */ + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* start measuring in second child */ + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* Start first child and Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + + /* Start second child and Wait for the SIGSTOP. */ + if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid2, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + /* stop measuring and read first child */ + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) { + printf( "Warning: PAPI_stop returned error %d, probably ok.\n", + retval ); + } + + /* stop measuring and read second child */ + retval = PAPI_stop( EventSet2, values[1] ); + if ( retval != PAPI_OK ) { + printf( "Warning: PAPI_stop returned error %d, probably ok.\n", + retval ); + } + + /* close down the measurements */ + remove_test_events( &EventSet1, mask1 ); + remove_test_events( &EventSet2, mask2 ); + + /* restart events so they can end */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( ptrace( PTRACE_CONT, pid2, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + } + + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFEXITED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFEXITED", 0 ); + } + + if ( waitpid( pid2, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFEXITED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFEXITED", 0 ); + } + + /* This code isn't necessary as we know the child has exited, */ + /* it *may* return an error if the component so chooses. You */ + /* should use read() instead. */ + + if (!TESTS_QUIET) { + printf( "Test case: multiple 3rd party attach start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + sprintf( add_event_str, "(PID %jd) %-12s : \t", ( intmax_t ) pid, + event_name ); + printf( TAB1, add_event_str, values[0][1] ); + sprintf( add_event_str, "(PID %jd) PAPI_TOT_CYC : \t", + ( intmax_t ) pid ); + printf( TAB1, add_event_str, values[0][0] ); + sprintf( add_event_str, "(PID %jd) %-12s : \t", ( intmax_t ) pid2, + event_name ); + printf( TAB1, add_event_str,values[1][1] ); + sprintf( add_event_str, "(PID %jd) PAPI_TOT_CYC : \t", + ( intmax_t ) pid2 ); + printf( TAB1, add_event_str, values[1][0] ); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf("Verification: pid %d results should be %dx pid %d\n", + pid2,MULTIPLIER,pid ); + } + + /* FLOPS ratio */ + ratio1=(double)values[1][0]/(double)values[0][0]; + + /* CYCLES ratio */ + ratio2=(double)values[1][1]/(double)values[0][1]; + + if (!TESTS_QUIET) { + printf("\tFLOPS ratio %lld/%lld = %lf\n", + values[1][0],values[0][0],ratio1); + } + + double ratio1_high,ratio1_low,ratio2_high,ratio2_low; + + ratio1_high=(double)MULTIPLIER *1.10; + ratio1_low=(double)MULTIPLIER * 0.90; + + if ((ratio1 > ratio1_high ) || (ratio1 < ratio1_low)) { + printf("Ratio out of range, should be ~%lf not %lf\n", + (double)MULTIPLIER, ratio1); + test_fail( __FILE__, __LINE__, + "Error: Counter ratio not two", 0 ); + } + + if (!TESTS_QUIET) { + printf("\tCycles ratio %lld/%lld = %lf\n", + values[1][1],values[0][1],ratio2); + } + + ratio2_high=(double)MULTIPLIER *1.20; + ratio2_low=(double)MULTIPLIER * 0.80; + + if ((ratio2 > ratio2_high ) || (ratio2 < ratio2_low )) { + printf("Ratio out of range, should be ~%lf, not %lf\n", + (double)MULTIPLIER, ratio2); + test_fail( __FILE__, __LINE__, + "Known issue: Counter ratio not two", 0 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/multiattach2.c b/src/ctests/multiattach2.c new file mode 100644 index 0000000..2f4cd87 --- /dev/null +++ b/src/ctests/multiattach2.c @@ -0,0 +1,259 @@ +/* This file performs the following test: start, stop and timer functionality for + an attached process as well as itself. + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + + +#ifdef _AIX +#define _LINUX_SOURCE_COMPAT +#endif + +#if defined(__FreeBSD__) +# define PTRACE_ATTACH PT_ATTACH +# define PTRACE_CONT PT_CONTINUE +#endif + +int +wait_for_attach_and_loop( int num ) +{ + kill( getpid( ), SIGSTOP ); + do_flops( NUM_FLOPS * num ); + kill( getpid( ), SIGSTOP ); + return 0; +} + +int +main( int argc, char **argv ) +{ + int status, retval, num_tests = 2, tmp; + int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; + int PAPI_event, PAPI_event2, mask1, mask2; + int num_events1, num_events2; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + const PAPI_component_info_t *cmpinfo; + pid_t pid; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* init the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* get component info */ + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); + } + + /* see if we support attach */ + if ( cmpinfo->attach == 0 ) { + test_skip( __FILE__, __LINE__, + "Platform does not support attaching",0 ); + } + + /* fork! */ + pid = fork( ); + if ( pid < 0 ) { + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + } + + /* if child, wait_for_attach_and_loop */ + if ( pid == 0 ) { + exit( wait_for_attach_and_loop( 2 ) ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + EventSet2 = add_two_events( &num_events2, &PAPI_event2, &mask2 ); + + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_ATTACH, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didnt return true to WIFSTOPPED", 0 ); + } + } + + retval = PAPI_attach( EventSet2, ( unsigned long ) pid ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + } + + strcpy(event_name,"PAPI_TOT_INS"); + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + /* num_events1 is greater than num_events2 so don't worry. */ + + values = allocate_test_space( num_tests, num_events1 ); + + /* get before values */ + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + elapsed_virt_us = PAPI_get_virt_usec( ); + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + /* Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) { + printf( "Warning: PAPI_stop returned error %d, probably ok.\n", + retval ); + } + + retval = PAPI_stop( EventSet2, values[1] ); + if ( retval != PAPI_OK ) { + printf( "Warning: PAPI_stop returned error %d, probably ok.\n", + retval ); + } + + remove_test_events( &EventSet1, mask1 ); + remove_test_events( &EventSet2, mask2 ); + + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + } + + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFEXITED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFEXITED", 0 ); + } + + /* This code isn't necessary as we know the child has exited, + it *may* return an error if the component so chooses. + You should use read() instead. */ + + if (!TESTS_QUIET) { + printf( "Test case: multiple 3rd party attach start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + sprintf( add_event_str, "(PID self) %-12s : \t", + event_name ); + printf( TAB1, add_event_str, values[0][1] ); + sprintf( add_event_str, "(PID self) PAPI_TOT_CYC : \t" ); + printf( TAB1, add_event_str, values[0][0] ); + sprintf( add_event_str, "(PID %jd) %-12s : \t", ( intmax_t ) pid, + event_name ); + printf( TAB1, add_event_str, values[1][1] ); + sprintf( add_event_str, "(PID %jd) PAPI_TOT_CYC : \t", + ( intmax_t ) pid ); + printf( TAB1, add_event_str, values[1][0] ); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: none\n" ); + + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/multiplex1.c b/src/ctests/multiplex1.c new file mode 100644 index 0000000..1840c2f --- /dev/null +++ b/src/ctests/multiplex1.c @@ -0,0 +1,453 @@ +/* +* File: multiplex.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file tests the multiplex functionality, originally developed by + John May of LLNL. */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +/* Event to use in all cases; initialized in init_papi() */ + +#define TOTAL_EVENTS 6 + +int solaris_preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_TOT_CYC, PAPI_BR_MSP, PAPI_L2_TCM, PAPI_L1_ICM, 0 +}; +int power6_preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_TOT_CYC, PAPI_FP_INS, PAPI_L1_DCM, PAPI_L1_ICM, 0 +}; +int preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_TOT_CYC, PAPI_FP_INS, PAPI_TOT_INS, PAPI_L1_DCM, PAPI_L1_ICM, 0 +}; + +static int PAPI_events[TOTAL_EVENTS] = { 0, }; +static int PAPI_events_len = 0; + +static void +init_papi( int *out_events, int *len ) +{ + int retval; + int i, real_len = 0; + int *in_events = preset_PAPI_events; + const PAPI_hw_info_t *hw_info; + + /* Initialize the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__,__LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + if ( strstr( hw_info->model_string, "UltraSPARC" ) ) { + in_events = solaris_preset_PAPI_events; + } + + if ( strcmp( hw_info->model_string, "POWER6" ) == 0 ) { + in_events = power6_preset_PAPI_events; + retval = PAPI_set_domain( PAPI_DOM_ALL ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_set_domain", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) { + test_fail(__FILE__,__LINE__, "PAPI_multiplex_init", retval ); + } + + for ( i = 0; in_events[i] != 0; i++ ) { + char out[PAPI_MAX_STR_LEN]; + /* query and set up the right instruction to monitor */ + retval = PAPI_query_event( in_events[i] ); + if ( retval == PAPI_OK ) { + out_events[real_len++] = in_events[i]; + PAPI_event_code_to_name( in_events[i], out ); + if ( real_len == *len ) + break; + } else { + PAPI_event_code_to_name( in_events[i], out ); + if ( !TESTS_QUIET ) + printf( "%s does not exist\n", out ); + } + } + + if ( real_len < 1 ) { + if (!TESTS_QUIET) printf("Trouble adding events\n"); + test_skip(__FILE__,__LINE__, "No counters available", 0 ); + } + *len = real_len; +} + +/* Tests that PAPI_multiplex_init does not mess with normal operation. */ + +int +case1( void ) +{ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + PAPI_events_len = 2; + init_papi( PAPI_events, &PAPI_events_len ); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_create_eventset", retval ); + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + do_stuff( ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + test_print_event_header( "case1:", EventSet ); + printf( TAB2, "case1:", values[0], values[1] ); + } + retval = PAPI_cleanup_eventset( EventSet ); /* JT */ + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + + PAPI_shutdown( ); + return ( SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works before adding events */ + +int +case2( void ) +{ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + PAPI_events_len = 2; + init_papi( PAPI_events, &PAPI_events_len ); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_create_eventset", retval ); + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_assign_eventset_component", retval ); + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_set_multiplex", retval ); + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + do_stuff( ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + test_print_event_header( "case2:", EventSet ); + printf( TAB2, "case2:", values[0], values[1] ); + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + + PAPI_shutdown( ); + return ( SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works after adding events */ + +int +case3( void ) +{ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + PAPI_events_len = 2; + init_papi( PAPI_events, &PAPI_events_len ); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_create_eventset", retval ); + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } else if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_set_multiplex", retval ); + + do_stuff( ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + test_print_event_header( "case3:", EventSet ); + printf( TAB2, "case3:", values[0], values[1] ); + } + + retval = PAPI_cleanup_eventset( EventSet ); /* JT */ + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + + PAPI_shutdown( ); + return ( SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works before adding events */ + +/* Tests that PAPI_add_event() works after + PAPI_add_event()/PAPI_set_multiplex() */ + +int +case4( void ) +{ + int retval, i, EventSet = PAPI_NULL; + long long values[4]; + char out[PAPI_MAX_STR_LEN]; + + PAPI_events_len = 2; + init_papi( PAPI_events, &PAPI_events_len ); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_create_eventset", retval ); + + i = 0; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_set_multiplex", retval ); + + i = 1; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + + do_stuff( ); + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + test_print_event_header( "case4:", EventSet ); + printf( TAB2, "case4:", values[0], values[1] ); + } + + retval = PAPI_cleanup_eventset( EventSet ); /* JT */ + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_cleanup_eventset", retval ); + + PAPI_shutdown( ); + return ( SUCCESS ); +} + +/* Tests that PAPI_read() works immediately after + PAPI_start() */ + +int +case5( void ) +{ + int retval, i, j, EventSet = PAPI_NULL; + long long start_values[4] = { 0,0,0,0 }, values[4] = {0,0,0,0}; + char out[PAPI_MAX_STR_LEN]; + + PAPI_events_len = 2; + init_papi( PAPI_events, &PAPI_events_len ); + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_create_eventset", retval ); + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_assign_eventset_component", retval ); + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) { + test_fail(__FILE__,__LINE__, "PAPI_set_multiplex", retval ); + } + + /* Add 2 events... */ + + i = 0; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + i++; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + i++; + + do_stuff( ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_start", retval ); + + retval = PAPI_read( EventSet, start_values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_read", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail(__FILE__,__LINE__, "PAPI_stop", retval ); + + for (j=0;j +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define TOTAL_EVENTS 10 + +static int solaris_preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_BR_MSP, PAPI_TOT_CYC, PAPI_L2_TCM, PAPI_L1_ICM, 0 +}; + +static int power6_preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_FP_INS, PAPI_TOT_CYC, PAPI_L1_DCM, PAPI_L1_ICM, 0 +}; + +static int preset_PAPI_events[TOTAL_EVENTS] = { + PAPI_FP_INS, PAPI_TOT_INS, PAPI_L1_DCM, PAPI_L1_ICM, 0 +}; + +static int PAPI_events[TOTAL_EVENTS] = { 0, }; +static int PAPI_events_len = 0; + +static void +init_papi_pthreads( int *out_events, int *len ) +{ + int retval; + int i, real_len = 0; + int *in_events = preset_PAPI_events; + const PAPI_hw_info_t *hw_info; + + /* Initialize the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + if ( strstr( hw_info->model_string, "UltraSPARC" ) ) { + in_events = solaris_preset_PAPI_events; + } + + if ( strcmp( hw_info->model_string, "POWER6" ) == 0 ) { + in_events = power6_preset_PAPI_events; + retval = PAPI_set_domain( PAPI_DOM_ALL ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, + "PAPI_set_domain", retval ); + } + } + + retval = PAPI_multiplex_init( ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_multiplex_init", retval ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ); + if (retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + + for ( i = 0; in_events[i] != 0; i++ ) { + char out[PAPI_MAX_STR_LEN]; + /* query and set up the right instruction to monitor */ + retval = PAPI_query_event( in_events[i] ); + if ( retval == PAPI_OK ) { + out_events[real_len++] = in_events[i]; + PAPI_event_code_to_name( in_events[i], out ); + if ( real_len == *len ) + break; + } else { + PAPI_event_code_to_name( in_events[i], out ); + if ( !TESTS_QUIET ) + printf( "%s does not exist\n", out ); + } + } + if ( real_len < 1 ) { + if (!TESTS_QUIET) printf("No counters available\n"); + test_skip(__FILE__, __LINE__, "No counters available", 0 ); + } + *len = real_len; +} + +static int +do_pthreads( void *( *fn ) ( void * ) ) +{ + int i, rc, retval; + pthread_attr_t attr; + pthread_t id[NUM_THREADS]; + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + + for ( i = 0; i < NUM_THREADS; i++ ) { + rc = pthread_create( &id[i], &attr, fn, NULL ); + if ( rc ) + return ( FAILURE ); + } + for ( i = 0; i < NUM_THREADS; i++ ) + pthread_join( id[i], NULL ); + + pthread_attr_destroy( &attr ); + + return ( SUCCESS ); +} + +/* Tests that PAPI_multiplex_init does not mess with normal operation. */ + +static void * +case1_pthreads( void *arg ) +{ + ( void ) arg; /*unused */ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + if ( ( retval = PAPI_register_thread( ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + printf( "case1 thread %4x:", ( unsigned ) pthread_self( ) ); + test_print_event_header( "", EventSet ); + printf( "case1 thread %4x:", ( unsigned ) pthread_self( ) ); + printf( TAB2, "", values[0], values[1] ); + } + + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) /* JT */ + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + if ( ( retval = PAPI_destroy_eventset( &EventSet) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if ( ( retval = PAPI_unregister_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + + return ( ( void * ) SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works before adding events */ + +static void * +case2_pthreads( void *arg ) +{ + ( void ) arg; /*unused */ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + if ( ( retval = PAPI_register_thread( ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) { + test_fail(__FILE__, __LINE__, "PAPI_assign_eventset_component", retval ); + } + + if ( ( retval = PAPI_set_multiplex( EventSet ) ) != PAPI_OK ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + + if (!TESTS_QUIET) { + printf( "++case2 thread %4x:", ( unsigned ) pthread_self( ) ); + } + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + printf( "case2 thread %4x:", ( unsigned ) pthread_self( ) ); + test_print_event_header( "", EventSet ); + printf( "case2 thread %4x:", ( unsigned ) pthread_self( ) ); + printf( TAB2, "", values[0], values[1] ); + } + + /* JT */ + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + } + + if ( ( retval = PAPI_destroy_eventset( &EventSet) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + } + + if ( ( retval = PAPI_unregister_thread( ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + } + + return ( ( void * ) SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works after adding events */ + +static void * +case3_pthreads( void *arg ) +{ + ( void ) arg; /*unused */ + int retval, i, EventSet = PAPI_NULL; + long long values[2]; + + if ( ( retval = PAPI_register_thread( ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + for ( i = 0; i < PAPI_events_len; i++ ) { + char out[PAPI_MAX_STR_LEN]; + + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if ( !TESTS_QUIET ) + printf( "Added %s\n", out ); + } + + if ( ( retval = PAPI_set_multiplex( EventSet ) ) != PAPI_OK ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !TESTS_QUIET ) { + printf( "case3 thread %4x:", ( unsigned ) pthread_self( ) ); + test_print_event_header( "", EventSet ); + printf( "case3 thread %4x:", ( unsigned ) pthread_self( ) ); + printf( TAB2, "", values[0], values[1] ); + } + + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) /* JT */ + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + if ( ( retval = PAPI_destroy_eventset( &EventSet) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if ( ( retval = PAPI_unregister_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + + return ( ( void * ) SUCCESS ); +} + +/* Tests that PAPI_set_multiplex() works before/after adding events */ + +static void * +case4_pthreads( void *arg ) +{ + ( void ) arg; /*unused */ + int retval, i, EventSet = PAPI_NULL; + long long values[4]; + char out[PAPI_MAX_STR_LEN]; + + if ( ( retval = PAPI_register_thread( ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + i = 0; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + + if ( ( retval = PAPI_set_multiplex( EventSet ) ) != PAPI_OK ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + i = 1; + retval = PAPI_add_event( EventSet, PAPI_events[i] ); + if ( retval != PAPI_OK ) + test_fail(__FILE__, __LINE__, "PAPI_add_event", retval ); + PAPI_event_code_to_name( PAPI_events[i], out ); + if (!TESTS_QUIET) printf( "Added %s\n", out ); + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + printf( "case4 thread %4x:", ( unsigned ) pthread_self( ) ); + test_print_event_header( "", EventSet ); + printf( "case4 thread %4x:", ( unsigned ) pthread_self( ) ); + printf( TAB2, "", values[0], values[1] ); + } + + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) /* JT */ + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + if ( ( retval = PAPI_destroy_eventset( &EventSet) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if ( ( retval = PAPI_unregister_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + + return ( ( void * ) SUCCESS ); +} + +static int +case1( void ) +{ + int retval; + + PAPI_events_len = 2; + init_papi_pthreads( PAPI_events, &PAPI_events_len ); + + retval = do_pthreads( case1_pthreads ); + + PAPI_shutdown( ); + + return retval; +} + +static int +case2( void ) +{ + int retval; + + PAPI_events_len = 2; + init_papi_pthreads( PAPI_events, &PAPI_events_len ); + + retval = do_pthreads( case2_pthreads ); + + PAPI_shutdown( ); + + return retval; +} + +static int +case3( void ) +{ + int retval; + + PAPI_events_len = 2; + init_papi_pthreads( PAPI_events, &PAPI_events_len ); + + retval = do_pthreads( case3_pthreads ); + + PAPI_shutdown( ); + + return retval; +} + +static int +case4( void ) +{ + int retval; + + PAPI_events_len = 2; + init_papi_pthreads( PAPI_events, &PAPI_events_len ); + + retval = do_pthreads( case4_pthreads ); + + PAPI_shutdown( ); + + return retval; +} + +int +main( int argc, char **argv ) +{ + int retval; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) { + printf( "%s: Using %d threads\n\n", argv[0], NUM_THREADS ); + } + + /* Case1 */ + if (!quiet) { + printf ( "case1: Does PAPI_multiplex_init() " + "not break regular operation?\n" ); + } + + if ( case1() != SUCCESS ) { + test_fail( __FILE__, __LINE__, "case1", PAPI_ESYS ); + } + + /* Case2 */ + if (!quiet) { + printf( "case2: Does setmpx/add work?\n" ); + } + if ( case2( ) != SUCCESS ) { + test_fail( __FILE__, __LINE__, "case2", PAPI_ESYS ); + } + + /* Case3 */ + if (!quiet) { + printf( "case3: Does add/setmpx work?\n" ); + } + if ( case3( ) != SUCCESS ) { + test_fail( __FILE__, __LINE__, "case3", PAPI_ESYS ); + } + + /* Case4 */ + if (!quiet) { + printf( "case4: Does add/setmpx/add work?\n" ); + } + if ( case4( ) != SUCCESS ) { + test_fail( __FILE__, __LINE__, "case4", PAPI_ESYS ); + } + + /* Finally init PAPI? */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail(__FILE__, __LINE__, "PAPI_library_init", retval ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/multiplex2.c b/src/ctests/multiplex2.c new file mode 100644 index 0000000..d342ec5 --- /dev/null +++ b/src/ctests/multiplex2.c @@ -0,0 +1,208 @@ +/* +* File: multiplex.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file tests the multiplex functionality, originally developed by + John May of LLNL. */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + + +/* Tests that we can really multiplex a lot. */ + +static int +case1( void ) +{ + int retval, i, EventSet = PAPI_NULL, j = 0, k = 0, allvalid = 1; + int max_mux, nev, *events; + long long *values; + PAPI_event_info_t pset; + char evname[PAPI_MAX_STR_LEN]; + + /* Initialize PAPI */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI multiplex init fail\n", retval ); + } + +#if 0 + if ( PAPI_set_domain( PAPI_DOM_KERNEL ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); +#endif + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + +#if 0 + if ( PAPI_set_domain( PAPI_DOM_KERNEL ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); +#endif + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + } +#if 0 + if ( PAPI_set_domain( PAPI_DOM_KERNEL ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); +#endif + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + + max_mux = PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL ); + if ( max_mux > 32 ) max_mux = 32; + +#if 0 + if ( PAPI_set_domain( PAPI_DOM_KERNEL ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); +#endif + + /* Fill up the event set with as many non-derived events as we can */ + if (!TESTS_QUIET) { + printf( "\nFilling the event set with as many non-derived events as we can...\n" ); + } + + i = PAPI_PRESET_MASK; + do { + if ( PAPI_get_event_info( i, &pset ) == PAPI_OK ) { + if ( pset.count && ( strcmp( pset.derived, "NOT_DERIVED" ) == 0 ) ) { + retval = PAPI_add_event( EventSet, ( int ) pset.event_code ); + if ( retval != PAPI_OK ) { + printf("Failed trying to add %s\n",pset.symbol); + break; + } + else { + if (!TESTS_QUIET) printf( "Added %s\n", pset.symbol ); + j++; + } + } + } + } while ( ( PAPI_enum_event( &i, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK ) && + ( j < max_mux ) ); + + if (j==0) { + if (!TESTS_QUIET) printf("No events found\n"); + test_skip(__FILE__,__LINE__,"No events",0); + } + + events = ( int * ) malloc( ( size_t ) j * sizeof ( int ) ); + if ( events == NULL ) + test_fail( __FILE__, __LINE__, "malloc events", 0 ); + + values = ( long long * ) malloc( ( size_t ) j * sizeof ( long long ) ); + if ( values == NULL ) + test_fail( __FILE__, __LINE__, "malloc values", 0 ); + + do_stuff( ); + +#if 0 + if ( PAPI_set_domain( PAPI_DOM_KERNEL ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); +#endif + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + nev = j; + retval = PAPI_list_events( EventSet, events, &nev ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_list_events", retval ); + + if (!TESTS_QUIET) printf( "\nEvent Counts:\n" ); + for ( i = 0, allvalid = 0; i < j; i++ ) { + PAPI_event_code_to_name( events[i], evname ); + if (!TESTS_QUIET) printf( TAB1, evname, values[i] ); + if ( values[i] == 0 ) + allvalid++; + } + if (!TESTS_QUIET) { + printf( "\n" ); + if ( allvalid ) { + printf( "Caution: %d counters had zero values\n", allvalid ); + } + } + + if (allvalid==j) { + test_fail( __FILE__, __LINE__, "All counters returned zero", 5 ); + } + + for ( i = 0, allvalid = 0; i < j; i++ ) { + for ( k = i + 1; k < j; k++ ) { + if ( ( i != k ) && ( values[i] == values[k] ) ) { + allvalid++; + break; + } + } + } + + if (!TESTS_QUIET) { + if ( allvalid ) { + printf( "Caution: %d counter pair(s) had identical values\n", + allvalid ); + } + } + + free( events ); + free( values ); + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + return ( SUCCESS ); +} + +int +main( int argc, char **argv ) +{ + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) { + printf( "%s: Does PAPI_multiplex_init() handle lots of events?\n", + argv[0] ); + printf( "Using %d iterations\n", NUM_ITERS ); + } + + case1( ); + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/multiplex3_pthreads.c b/src/ctests/multiplex3_pthreads.c new file mode 100644 index 0000000..da44d3f --- /dev/null +++ b/src/ctests/multiplex3_pthreads.c @@ -0,0 +1,256 @@ +/* +* File: multiplex3_pthreads.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: John May +* johnmay@llnl.gov +*/ + +/* This file tests the multiplex functionality when there are + * threads in which the application isn't calling PAPI (and only + * one thread that is calling PAPI.) + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define MAX_TO_ADD 5 + +/* A thread function that does nothing forever, while the other + * tests are running. + */ +void * +thread_fn( void *dummy ) +{ + ( void ) dummy; + while ( 1 ) { + do_stuff( ); + } + return NULL; +} + +/* Runs a bunch of multiplexed events */ + +static void +mainloop( int arg ) +{ + int allvalid; + long long *values; + int EventSet = PAPI_NULL; + int retval, i, j = 2, skipped_counters=0; + PAPI_event_info_t pset; + + ( void ) arg; + + /* Initialize the library */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI multiplex init fail\n", retval ); + } + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet, 0 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + } + + retval = PAPI_set_multiplex( EventSet ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } else if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ); + if (retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + + retval = PAPI_add_event( EventSet, PAPI_TOT_INS ); + if ( ( retval != PAPI_OK ) && ( retval != PAPI_ECNFLCT ) ) { + if (!TESTS_QUIET) printf("Trouble adding PAPI_TOT_INS\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if ( !TESTS_QUIET ) { + printf( "Added %s\n", "PAPI_TOT_INS" ); + } + + retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ); + if ( ( retval != PAPI_OK ) && ( retval != PAPI_ECNFLCT ) ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + if ( !TESTS_QUIET ) { + printf( "Added %s\n", "PAPI_TOT_CYC" ); + } + + values = ( long long * ) malloc( MAX_TO_ADD * sizeof ( long long ) ); + if ( values == NULL ) + test_fail( __FILE__, __LINE__, "malloc", 0 ); + + for ( i = 0; i < PAPI_MAX_PRESET_EVENTS; i++ ) { + retval = PAPI_get_event_info( i | PAPI_PRESET_MASK, &pset ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_get_event_info", retval ); + + if ( pset.count ) { + if (!TESTS_QUIET) printf( "Adding %s\n", pset.symbol ); + + retval = PAPI_add_event( EventSet, ( int ) pset.event_code ); + if ( ( retval != PAPI_OK ) && ( retval != PAPI_ECNFLCT ) ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + if ( retval == PAPI_OK ) { + if (!TESTS_QUIET) printf( "Added %s\n", pset.symbol ); + } else { + if (!TESTS_QUIET) printf( "Could not add %s\n", pset.symbol ); + } + + do_stuff( ); + + if ( retval == PAPI_OK ) { + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( values[j] ) { + if ( ++j >= MAX_TO_ADD ) + break; + } else { + retval = + PAPI_remove_event( EventSet, ( int ) pset.event_code ); + if ( retval == PAPI_OK ) + if (!TESTS_QUIET) printf( "Removed %s\n", pset.symbol ); + /* This added because the test */ + /* can take a long time if mplexing */ + /* is broken and all values are 0 */ + skipped_counters++; + if (skipped_counters>MAX_TO_ADD) break; + + } + } + } + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if (!TESTS_QUIET) { + test_print_event_header( "multiplex3_pthreads:\n", EventSet ); + } + allvalid = 0; + for ( i = 0; i < MAX_TO_ADD; i++ ) { + if (!TESTS_QUIET) printf( ONENUM, values[i] ); + if ( values[i] != 0 ) + allvalid++; + } + if (!TESTS_QUIET) printf( "\n" ); + if ( !allvalid ) + test_fail( __FILE__, __LINE__, "all counter registered no counts", 1 ); + + retval = PAPI_cleanup_eventset( EventSet ); /* JT */ + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + free( values ); + PAPI_shutdown( ); +} + +int +main( int argc, char **argv ) +{ + int i, rc, retval; + pthread_t id[NUM_THREADS]; + pthread_attr_t attr; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) { + printf( "%s: Using %d threads\n\n", argv[0], NUM_THREADS ); + printf( "Does non-threaded multiplexing work " + "with extraneous threads present?\n" ); + } + + /* Create a bunch of unused pthreads, to simulate threads created + * by the system that the user doesn't know about. + */ + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + +#ifdef PPC64 + sigset_t sigprof; + sigemptyset( &sigprof ); + sigaddset( &sigprof, SIGPROF ); + retval = sigprocmask( SIG_BLOCK, &sigprof, NULL ); + if ( retval != 0 ) + test_fail( __FILE__, __LINE__, "sigprocmask SIG_BLOCK", retval ); +#endif + + for ( i = 0; i < NUM_THREADS; i++ ) { + rc = pthread_create( &id[i], &attr, thread_fn, NULL ); + if ( rc ) + test_fail( __FILE__, __LINE__, "pthread_create", rc ); + } + pthread_attr_destroy( &attr ); + +#ifdef PPC64 + retval = sigprocmask( SIG_UNBLOCK, &sigprof, NULL ); + if ( retval != 0 ) + test_fail( __FILE__, __LINE__, "sigprocmask SIG_UNBLOCK", retval ); +#endif + + mainloop( NUM_ITERS ); + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/native.c b/src/ctests/native.c new file mode 100644 index 0000000..404c37d --- /dev/null +++ b/src/ctests/native.c @@ -0,0 +1,177 @@ +/* +* File: native.c +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +/* + This test defines an array of native event names, either at compile time + or at run time (some x86 platforms). It then: + - add the table of events to an event set; + - starts counting + - does a little work + - stops counting; + - reports the results. +*/ + +#include "papi_test.h" + +static int EventSet = PAPI_NULL; +extern int TESTS_QUIET; /* Declared in test_utils.c */ + + +#if (defined(PPC32)) + /* Select 4 events common to both ppc750 and ppc7450 */ +static char *native_name[] = { "CPU_CLK", "FLOPS", "TOT_INS", "BR_MSP", NULL +}; + +#elif defined(_POWER4) || defined(_PPC970) + /* arbitrarily code events from group 28: pm_fpu3 - Floating point events by unit */ +static char *native_name[] = + { "PM_FPU0_FDIV", "PM_FPU1_FDIV", "PM_FPU0_FRSP_FCONV", +"PM_FPU1_FRSP_FCONV", + "PM_FPU0_FMA", "PM_FPU1_FMA", "PM_INST_CMPL", "PM_CYC", NULL +}; + +#elif defined(_POWER5p) +/* arbitrarily code events from group 33: pm_fpustall - Floating Point Unit stalls */ +static char *native_name[] = + { "PM_FPU_FULL_CYC", "PM_CMPLU_STALL_FDIV", "PM_CMPLU_STALL_FPU", + "PM_RUN_INST_CMPL", "PM_RUN_CYC", NULL +}; + + +#elif defined(_POWER5) + /* arbitrarily code events from group 78: pm_fpu1 - Floating Point events */ +static char *native_name[] = + { "PM_FPU_FDIV", "PM_FPU_FMA", "PM_FPU_FMOV_FEST", "PM_FPU_FEST", + "PM_INST_CMPL", "PM_RUN_CYC", NULL +}; + +#elif defined(POWER3) +static char *native_name[] = + { "PM_IC_MISS", "PM_FPU1_CMPL", "PM_LD_MISS_L1", "PM_LD_CMPL", + "PM_FPU0_CMPL", "PM_CYC", "PM_TLB_MISS", NULL +}; + +#elif defined(__ia64__) +#ifdef ITANIUM2 +static char *native_name[] = + { "CPU_CYCLES", "L1I_READS", "L1D_READS_SET0", "IA64_INST_RETIRED", NULL +}; +#else +static char *native_name[] = + { "DEPENDENCY_SCOREBOARD_CYCLE", "DEPENDENCY_ALL_CYCLE", + "UNSTALLED_BACKEND_CYCLE", "MEMORY_CYCLE", NULL +}; +#endif + +#elif ((defined(linux) && (defined(__i386__) || (defined __x86_64__))) ) +static char *p3_native_name[] = { "DATA_MEM_REFS", "DCU_LINES_IN", NULL }; +static char *core_native_name[] = { "UnhltCore_Cycles", "Instr_Retired", NULL }; +static char *k7_native_name[] = + { "TOT_CYC", "IC_MISSES", "DC_ACCESSES", "DC_MISSES", NULL }; +// static char *k8_native_name[] = { "FP_ADD_PIPE", "FP_MULT_PIPE", "FP_ST_PIPE", "FP_NONE_RET", NULL }; +static char *k8_native_name[] = + { "DISPATCHED_FPU:OPS_ADD", "DISPATCHED_FPU:OPS_MULTIPLY", +"DISPATCHED_FPU:OPS_STORE", "CYCLES_NO_FPU_OPS_RETIRED", NULL }; +static char *p4_native_name[] = + { "retired_mispred_branch_type:CONDITIONAL", "resource_stall:SBFULL", + "tc_ms_xfer:CISC", "instr_retired:BOGUSNTAG:BOGUSTAG", + "BSQ_cache_reference:RD_2ndL_HITS", NULL +}; +static char **native_name = p3_native_name; + +#elif defined(mips) && defined(sgi) +static char *native_name[] = { "Primary_instruction_cache_misses", + "Primary_data_cache_misses", NULL +}; +#elif defined(mips) && defined(linux) +static char *native_name[] = { "CYCLES", NULL }; +#elif defined(sun) && defined(sparc) +static char *native_name[] = { "Cycle_cnt", "Instr_cnt", NULL }; + +#elif defined(_BGL) +static char *native_name[] = + { "BGL_UPC_PU0_PREF_STREAM_HIT", "BGL_PAPI_TIMEBASE", +"BGL_UPC_PU1_PREF_STREAM_HIT", NULL }; + +#elif defined(__bgp__) +static char *native_name[] = + { "PNE_BGP_PU0_JPIPE_LOGICAL_OPS", "PNE_BGP_PU0_JPIPE_LOGICAL_OPS", +"PNE_BGP_PU2_IPIPE_INSTRUCTIONS", NULL }; + +#else +#error "Architecture not supported in test file." +#endif + + +int +main( int argc, char **argv ) +{ + int i, retval, native; + const PAPI_hw_info_t *hwinfo; + long long values[8]; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", PAPI_EMISC ); + + printf( "Architecture %s, %d\n", hwinfo->model_string, hwinfo->model ); + +#if ((defined(linux) && (defined(__i386__) || (defined __x86_64__))) ) + if ( !strncmp( hwinfo->model_string, "Intel Pentium 4", 15 ) ) { + native_name = p4_native_name; + } else if ( !strncmp( hwinfo->model_string, "AMD K7", 6 ) ) { + native_name = k7_native_name; + } else if ( !strncmp( hwinfo->model_string, "AMD K8", 6 ) ) { + native_name = k8_native_name; + } else if ( !strncmp( hwinfo->model_string, "Intel Core", 17 ) || + !strncmp( hwinfo->model_string, "Intel Core 2", 17 ) ) { + native_name = core_native_name; + } +#endif + + for ( i = 0; native_name[i] != NULL; i++ ) { + retval = PAPI_event_name_to_code( native_name[i], &native ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_name_to_code", retval ); + printf( "Adding %s\n", native_name[i] ); + if ( ( retval = PAPI_add_event( EventSet, native ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_both( 1000 ); + + if ( ( retval = PAPI_stop( EventSet, values ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + for ( i = 0; native_name[i] != NULL; i++ ) { + fprintf( stderr, "%-40s: ", native_name[i] ); + fprintf( stderr, LLDFMT, values[i] ); + fprintf( stderr, "\n" ); + } + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup", retval ); + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + test_pass( __FILE__, NULL, 0 ); + exit( 0 ); +} diff --git a/src/ctests/net-mpi-test/Makefile b/src/ctests/net-mpi-test/Makefile new file mode 100644 index 0000000..12cc24a --- /dev/null +++ b/src/ctests/net-mpi-test/Makefile @@ -0,0 +1,53 @@ +CC = gcc +CC_R = gcc -pthread +CC_SHR = gcc -shared +#MXMPIPATH = /usr/local/mpich/mpich-gcc +#MXMPIPATH = /usr/local/mpich-mx +#MPICC = $(MXMPIPATH)/bin/mpicc +#MPICC = /usr/bin/mpicc +MPICC = mpicc +MPICC_SHR = $(MPICC) -shared +MPICCLD_SHR = $(MPICC_SHR) +F77 = g77 +FLAGS = -g -Wall +CFLAGS = $(FLAGS) -O3 # -DPROFILE_TIMER -DDEBUG -DVERBOSE + +BLASLIBS = -lblas +#BLASLIBS = -L/usr/local/lib -lf77blas -latlas +LAPACKLIBS = -llapack + +UTILOBJS= ../do_loops.o ../test_utils.o ../dummy.o +INCLUDE = -I.. -I../.. -I/usr/include +PAPILIB = -L../.. -lpapi +MPILIBS = +MPIINC = + +XTRALIBS = +PTHRLIBS = +MPILIBS = +LIBS =$(PAPILIB) -lm + +TESTS = cpi + +tests: $(TESTS) + +# Applications + +# Test programs +../test_utils.o: ../test_utils.c ../papi_test.h ../test_utils.h + $(CC) $(CFLAGS) $(INCLUDE) -c ../test_utils.c -o ../test_utils.o + +../do_loops.o: ../do_loops.c ../papi_test.h ../test_utils.h + $(CC) $(CFLAGS) $(INCLUDE) -c ../do_loops.c -o ../do_loops.o + +../dummy.o: ../dummy.c + $(CC) $(CFLAGS) $(INCLUDE) -c ../dummy.c -o ../dummy.o + +cpi: cpi.c $(UTILOBJS) + $(MPICC) $(MPFLAGS) $(CFLAGS) $(INCLUDE) $(MPIINC) $(TOPTFLAGS) cpi.c $(UTILOBJS) $(PAPILIB) $(MPILIBS) -o cpi + +#cpi: cpi.c +# $(MPICC) $(FLAGS) cpi.c -o $@ $(MPIPERFLIBS) $(XTRALIBS) $(MPILIBS) -lm + +clean: + rm -f core $(TESTS) *~ *.o diff --git a/src/ctests/net-mpi-test/cpi.c b/src/ctests/net-mpi-test/cpi.c new file mode 100644 index 0000000..0dfc6c0 --- /dev/null +++ b/src/ctests/net-mpi-test/cpi.c @@ -0,0 +1,167 @@ +/* From Dave McNamara at PSRV. Thanks! */ + +/* If an event is countable but you've exhausted the counter resources +and you try to add an event, it seems subsequent PAPI_start and/or +PAPI_stop will causes a Seg. Violation. + + I got around this by calling PAPI to get the # of countable events, +then making sure that I didn't try to add more than these number of +events. I still have a problem if someone adds Level 2 cache misses +and then adds FLOPS 'cause I didn't count FLOPS as actually requiring +2 counters. */ + +#include "papi_test.h" +#include +#include +#include + +extern int TESTS_QUIET; /* Declared in test_utils.c */ +char *netevents[] = + { "LO_RX_PACKETS", "LO_TX_PACKETS", "ETH0_RX_PACKETS", "ETH0_TX_PACKETS" }; + +double +f( double a ) +{ + return ( 4.0 / ( 1.0 + a * a ) ); +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL, EventSet1 = PAPI_NULL; + int evtcode; + int retval, i, ins = 0; + long long g1[2], g2[2]; + + int done = 0, n, myid, numprocs; + double PI25DT = 3.141592653589793238462643; + double mypi, pi, h, sum, x; + double startwtime = 0.0, endwtime; + int namelen; + char processor_name[MPI_MAX_PROCESSOR_NAME]; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( ( retval = PAPI_create_eventset( &EventSet1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + PAPI_event_name_to_code( netevents[2], &evtcode ); + if ( ( retval = PAPI_query_event( evtcode ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_aquery_event", retval ); + } + if ( ( retval = PAPI_add_event( EventSet, evtcode ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + PAPI_event_name_to_code( netevents[3], &evtcode ); + if ( ( retval = PAPI_query_event( evtcode ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_aquery_event", retval ); + } + if ( ( retval = PAPI_add_event( EventSet, evtcode ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + if ( ( retval = PAPI_query_event( PAPI_FP_INS ) ) != PAPI_OK ) { + if ( ( retval = PAPI_query_event( PAPI_FP_OPS ) ) == PAPI_OK ) { + ins = 2; + if ( ( retval = + PAPI_add_event( EventSet1, PAPI_FP_OPS ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + } + } else { + ins = 1; + if ( ( retval = PAPI_add_event( EventSet1, PAPI_FP_INS ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + } + + if ( ( retval = PAPI_add_event( EventSet1, PAPI_TOT_CYC ) ) != PAPI_OK ) { + if ( retval != PAPI_ECNFLCT ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + MPI_Init( &argc, &argv ); + + MPI_Comm_size( MPI_COMM_WORLD, &numprocs ); + MPI_Comm_rank( MPI_COMM_WORLD, &myid ); + MPI_Get_processor_name( processor_name, &namelen ); + + fprintf( stdout, "Process %d of %d on %s\n", + myid, numprocs, processor_name ); + fflush( stdout ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + if ( ( retval = PAPI_start( EventSet1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + n = 0; + while ( !done ) { + if ( myid == 0 ) { + if ( n == 0 ) + n = 1000000; + else + n = 0; + + startwtime = MPI_Wtime( ); + } + MPI_Bcast( &n, 1, MPI_INT, 0, MPI_COMM_WORLD ); + if ( n == 0 ) + done = 1; + else { + h = 1.0 / ( double ) n; + sum = 0.0; + /* A slightly better approach starts from large i and works back */ + for ( i = myid + 1; i <= n; i += numprocs ) { + x = h * ( ( double ) i - 0.5 ); + sum += f( x ); + } + mypi = h * sum; + + MPI_Reduce( &mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD ); + + if ( myid == 0 ) { + printf( "pi is approximately %.16f, Error is %.16f\n", + pi, fabs( pi - PI25DT ) ); + endwtime = MPI_Wtime( ); + printf( "wall clock time = %f\n", endwtime - startwtime ); + fflush( stdout ); + } + } + } + + if ( ( retval = PAPI_stop( EventSet1, g1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( ( retval = PAPI_stop( EventSet, g2 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + MPI_Finalize( ); + + + printf( "ETH0_RX_BYTES: %lld ETH0_TX_BYTES: %lld\n", g2[0], g2[1] ); + if ( ins == 0 ) { + printf( "PAPI_TOT_CYC : %lld\n", g1[0] ); + } else if ( ins == 1 ) { + printf( "PAPI_FP_INS : %lld PAPI_TOT_CYC : %lld\n", g1[0], g1[1] ); + } else if ( ins == 2 ) { + printf( "PAPI_FP_OPS : %lld PAPI_TOT_CYC : %lld\n", g1[0], g1[1] ); + } + test_pass( __FILE__, NULL, 0 ); + return 0; +} diff --git a/src/ctests/net-mpi-test/cpi.pbs b/src/ctests/net-mpi-test/cpi.pbs new file mode 100644 index 0000000..96f691b --- /dev/null +++ b/src/ctests/net-mpi-test/cpi.pbs @@ -0,0 +1,44 @@ +#!/bin/bash +############################################################ +## Template PBS Job Script for Parallel Job on Myrinet Nodes +## +## Lines beginning with '#PBS' are PBS directives, see +## 'man qsub' for additional information. +############################################################ + +### Set the job name +#PBS -N cpi + +### Set the queue to submit this job: ALWAYS use the default queue +##PBS -q workq + +### Set the number of nodes that will be used, 4 in this case, +### use a single processor per node (ppn=1), and use Myrinet +#PBS -l nodes=4 + +### The following command computes the number of processors requested +### from the file containing the list of nodes assigned to the job +export NPROCS=`wc -l $PBS_NODEFILE |gawk '//{print $1}'` + +### The following statements dump some diagnostic information to +### the batch job's standard output. +echo The master node of this job is `hostname` +echo The working directory is `echo $PBS_O_WORKDIR` +echo The node file is $PBS_NODEFILE +echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" +echo This job runs on the following nodes: +echo `cat $PBS_NODEFILE` +echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" +echo This job has allocated $NPROCS nodes + +echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-" + +### Change to the working directory of the qsub command. +cd $PBS_O_WORKDIR + +### Execute the MPI job --- NOTE: It is *crucial* that the proper +### 'mpirun' command (there are several versions of the command +### on the cluster) be used to launch the job---it is safest to use +### the full pathname as is done here. +#/usr/local/mpich-mx/bin/mpirun -np $NPROCS -machinefile $PBS_NODEFILE cpi +/usr/local/mpich/bin/mpirun -np $NPROCS -machinefile $PBS_NODEFILE cpi diff --git a/src/ctests/nineth.c b/src/ctests/nineth.c new file mode 100644 index 0000000..cd85319 --- /dev/null +++ b/src/ctests/nineth.c @@ -0,0 +1,123 @@ +/* This file performs the following test: start, stop and timer functionality for derived events + + NOTE: This test becomes useless when rate events like PAPI_FLOPS are removed. + + - It tests the derived metric FLOPS using the following two counters. + They are counted in the default counting domain and default + granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include "papi_test.h" + +extern int TESTS_QUIET; /* Declared in test_utils.c */ + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 2, tmp; + int EventSet1 = PAPI_NULL; + int EventSet2 = PAPI_NULL; + int mask1 = 0x80001; /* FP_OPS and TOT_CYC */ + int mask2 = 0x8; /* FLOPS */ + int num_events1; + int num_events2; + long long **values; + int clockrate; + double test_flops; + + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + /* gotta count flops to run this test */ + if ( ( retval = PAPI_query_event( PAPI_FP_OPS ) ) != PAPI_OK ) + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + + EventSet1 = add_test_events( &num_events1, &mask1 ); +/* EventSet2 = add_test_events(&num_events2, &mask2); */ + + if ( num_events1 == 0 || num_events2 == 0 ) + test_skip( __FILE__, __LINE__, "add_test_events", PAPI_ENOEVNT ); + + /* num_events1 is greater than num_events2 so don't worry. */ + + values = allocate_test_space( num_tests, num_events1 ); + + clockrate = PAPI_get_opt( PAPI_CLOCKRATE, NULL ); + if ( clockrate < 1 ) + test_fail( __FILE__, __LINE__, "PAPI_get_opt", retval ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); +/* + retval = PAPI_start(EventSet2); + if (retval != PAPI_OK) + test_fail(__FILE__, __LINE__, "PAPI_start", retval); + + do_flops(NUM_FLOPS); + + retval = PAPI_stop(EventSet2, values[1]); + if (retval != PAPI_OK) + test_fail(__FILE__, __LINE__, "PAPI_stop", retval); +*/ + remove_test_events( &EventSet1, mask1 ); +/* remove_test_events(&EventSet2, mask2); */ + + test_flops = + ( double ) ( values[0] )[0] * + ( double ) clockrate *( double ) 1000000.0; + test_flops = test_flops / ( double ) ( values[0] )[1]; + + if ( !TESTS_QUIET ) { + printf( "Test case 9: start, stop for derived event PAPI_FLOPS.\n" ); + printf( "------------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : %12s%12s\n", "1", "2" ); + printf( TAB2, "PAPI_FP_OPS : ", ( values[0] )[0], ( long long ) 0 ); + printf( TAB2, "PAPI_TOT_CYC: ", ( values[0] )[1], ( long long ) 0 ); + printf( TAB2, "PAPI_FLOPS : ", ( long long ) 0, ( values[1] )[0] ); + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + printf( "Last number in row 3 approximately equals %f\n", test_flops ); + printf( "This test is no longer valid: PAPI_FLOPS is deprecated.\n" ); + } +/* { + double min, max; + min = values[1][0] * .9; + max = values[1][0] * 1.1; + if (test_flops > max || test_flops < min) + test_fail(__FILE__, __LINE__, "PAPI_FLOPS", 1); + } +*/ + test_pass( __FILE__, values, num_tests ); + exit( 1 ); +} diff --git a/src/ctests/omptough.c b/src/ctests/omptough.c new file mode 100644 index 0000000..4618aa4 --- /dev/null +++ b/src/ctests/omptough.c @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NITER (100000) + +int +main( int argc, char *argv[] ) +{ + int i; + int ret; + int nthreads; + int *evtset; + int *ctrcode; + + nthreads = omp_get_max_threads( ); + evtset = ( int * ) malloc( sizeof ( int ) * nthreads ); + ctrcode = ( int * ) malloc( sizeof ( int ) * nthreads ); + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + ret = PAPI_library_init( PAPI_VER_CURRENT ); + if ( ret != PAPI_VER_CURRENT && ret > 0 ) { + fprintf( stderr, "PAPI library version mismatch '%s'\n", + PAPI_strerror( ret ) ); + exit( 1 ); + } + + if ( ret < 0 ) { + fprintf( stderr, "PAPI initialization error '%s'\n", + PAPI_strerror( ret ) ); + exit( 1 ); + } + + if ( ( ret = + PAPI_thread_init( ( unsigned long ( * )( void ) ) pthread_self ) ) != + PAPI_OK ) { + fprintf( stderr, "PAPI thread initialization error '%s'\n", + PAPI_strerror( ret ) ); + exit( 1 ); + } + + for ( i = 0; i < nthreads; i++ ) { + evtset[i] = PAPI_NULL; + + if ( ( ret = PAPI_event_name_to_code( "PAPI_TOT_INS", &ctrcode[i] ) ) + != PAPI_OK ) { + fprintf( stderr, "PAPI evt-name-to-code error '%s'\n", + PAPI_strerror( ret ) ); + } + + } + + for ( i = 0; i < NITER; i++ ) { +#pragma omp parallel + { + int tid; + int pid; + tid = omp_get_thread_num( ); + + pid = pthread_self( ); + + if ( ( ret = PAPI_register_thread( ) ) != PAPI_OK ) { + if ( !TESTS_QUIET ) { + fprintf( stderr, + "[%5d] Error in register thread (tid=%d pid=%d) '%s'\n", + i, tid, pid, PAPI_strerror( ret ) ); + test_fail( __FILE__, __LINE__, "omptough", 1 ); + } + } + + evtset[tid] = PAPI_NULL; + if ( ( ret = PAPI_create_eventset( &( evtset[tid] ) ) ) != PAPI_OK ) { + if ( !TESTS_QUIET ) { + fprintf( stderr, + "[%5d] Error creating eventset (tid=%d pid=%d) '%s'\n", + i, tid, pid, PAPI_strerror( ret ) ); + test_fail( __FILE__, __LINE__, "omptough", 1 ); + } + } + + + if ( ( ret = + PAPI_destroy_eventset( &( evtset[tid] ) ) ) != PAPI_OK ) { + if ( !TESTS_QUIET ) { + fprintf( stderr, + "[%5d] Error destroying eventset (tid=%d pid=%d) '%s'\n", + i, tid, pid, PAPI_strerror( ret ) ); + evtset[tid] = PAPI_NULL; + test_fail( __FILE__, __LINE__, "omptough", 1 ); + } + } + + if ( ( ret = PAPI_unregister_thread( ) ) != PAPI_OK ) { + if ( !TESTS_QUIET ) { + fprintf( stderr, + "[%5d] Error in unregister thread (tid=%d pid=%d) ret='%s'\n", + i, tid, pid, PAPI_strerror( ret ) ); + test_fail( __FILE__, __LINE__, "omptough", 1 ); + } + } + } + } + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow.c b/src/ctests/overflow.c new file mode 100644 index 0000000..659a565 --- /dev/null +++ b/src/ctests/overflow.c @@ -0,0 +1,201 @@ +/* +* File: overflow.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: overflow dispatch + + The Eventset contains: + + PAPI_TOT_CYC + + PAPI_FP_INS (overflow monitor) + + - Start eventset 1 + - Do flops + - Stop and measure eventset 1 + - Set up overflow on eventset 1 + - Start eventset 1 + - Do flops + - Stop eventset 1 +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" +#define OUT_FMT "%-12s : %16lld%16lld\n" + +static int total = 0; /* total overflows */ + + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + total++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long ( values[2] )[2]; + long long min, max; + int num_flops = NUM_FLOPS, retval; + int PAPI_event, mythreshold = THRESHOLD; + char event_name1[PAPI_MAX_STR_LEN]; + const PAPI_hw_info_t *hw_info = NULL; + int num_events, mask; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Get hardware info */ + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + /* add PAPI_TOT_CYC and one of the events in */ + /* PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, */ + /* depending on the availability of the event on */ + /* the platform */ + EventSet = add_two_nonderived_events( &num_events, &PAPI_event, &mask ); + + if (num_events==0) { + if (!quiet) printf("Trouble adding event!\n"); + test_skip(__FILE__,__LINE__,"Event add",1); + } + + if (!quiet) { + printf("Using %#x for the overflow event\n",PAPI_event); + } + + if ( PAPI_event == PAPI_FP_INS ) { + mythreshold = THRESHOLD; + } + else { +#if defined(linux) + mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; +#else + mythreshold = THRESHOLD * 2; +#endif + } + + /* Start the run calibration run */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + /* stop the calibration run */ + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + + /* set up overflow handler */ + retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + /* Start overflow run */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( num_flops ); + + /* stop overflow run */ + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + if ( !TESTS_QUIET ) { + retval = PAPI_event_code_to_name( PAPI_event, event_name1 ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" ); + printf( "---------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", mythreshold ); + printf( "Using %d iterations of c += a*b\n", num_flops ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %16d%16d\n", 1, 2 ); + printf( OUT_FMT, event_name1, ( values[0] )[1], ( values[1] )[1] ); + printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[0], ( values[1] )[0] ); + printf( "Overflows : %16s%16d\n", "", total ); + printf( "-----------------------------------------------\n" ); + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if ( !TESTS_QUIET ) { + printf( "Verification:\n" ); +#if defined(linux) || defined(__ia64__) || defined(_POWER4) + num_flops *= 2; +#endif + if ( PAPI_event == PAPI_FP_INS || PAPI_event == PAPI_FP_OPS ) { + printf( "Row 1 approximately equals %d %d\n", num_flops, + num_flops ); + } + printf( "Column 1 approximately equals column 2\n" ); + printf( "Row 3 approximately equals %u +- %u %%\n", + ( unsigned ) ( ( values[0] )[1] / ( long long ) mythreshold ), + ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); + } +/* + min = (long long)((values[0])[1]*(1.0-TOLERANCE)); + max = (long long)((values[0])[1]*(1.0+TOLERANCE)); + if ( (values[0])[1] > max || (values[0])[1] < min ) + test_fail(__FILE__, __LINE__, event_name, 1); +*/ + + min = + ( long long ) ( ( ( double ) values[0][1] * ( 1.0 - OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + max = + ( long long ) ( ( ( double ) values[0][1] * ( 1.0 + OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + if (!quiet) { + printf( "Overflows: total(%d) > max(%lld) || " + "total(%d) < min(%lld) \n", total, max, total, min ); + } + if ( total > max || total < min ) { + test_fail( __FILE__, __LINE__, "Overflows", 1 ); + } + + test_pass( __FILE__ ); + return 0; + +} diff --git a/src/ctests/overflow2.c b/src/ctests/overflow2.c new file mode 100644 index 0000000..b99c687 --- /dev/null +++ b/src/ctests/overflow2.c @@ -0,0 +1,189 @@ +/* +* File: overflow.c +* Author: Nils Smeds [Based on tests/overflow.c by Philip Mucci] +* smeds@pdc.kth.se +*/ + +/* This file performs the following test: overflow dispatch + + The Eventset contains: + + PAPI_TOT_CYC (overflow monitor) + + PAPI_FP_INS + + - Start eventset 1 + - Do flops + - Stop and measure eventset 1 + - Set up overflow on eventset 1 + - Start eventset 1 + - Do flops + - Stop eventset 1 +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" +#define OUT_FMT "%-12s : %16lld%16lld\n" + +int total = 0; /* total overflows */ + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + total++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long ( values[2] )[2]; + long long min, max; + int num_flops, retval; + int PAPI_event, mythreshold; + char event_name[PAPI_MAX_STR_LEN]; + const PAPI_hw_info_t *hw_info = NULL; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + +#if defined(POWER3) || defined(__sparc__) + PAPI_event = PAPI_TOT_INS; +#else + /* query and set up the right instruction to monitor */ + PAPI_event = find_nonderived_event( ); +#endif + + if (PAPI_event==0) { + if (!quiet) printf("Trouble creating events\n"); + test_skip(__FILE__,__LINE__,"Creating event",1); + } + + if (( PAPI_event == PAPI_FP_OPS ) || ( PAPI_event == PAPI_FP_INS )) + mythreshold = THRESHOLD; + else +#if defined(linux) + mythreshold = ( int ) hw_info->cpu_max_mhz * 10000 * 2; +#else + mythreshold = THRESHOLD * 2; +#endif + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + retval = PAPI_add_event( EventSet, PAPI_event ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + num_flops = NUM_FLOPS; +#if defined(linux) || defined(__ia64__) || defined(_POWER4) + num_flops *= 2; +#endif + + if ( !quiet ) { + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + printf + ( "Test case: Overflow dispatch of 1st event in set with 2 events.\n" ); + printf + ( "---------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", mythreshold ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %16d%16d\n", 1, 2 ); + printf( OUT_FMT, event_name, ( values[0] )[0], ( values[1] )[0] ); + printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[1], ( values[1] )[1] ); + printf( "Overflows : %16s%16d\n", "", total ); + printf( "-----------------------------------------------\n" ); + + printf( "Verification:\n" ); +/* + if (PAPI_event == PAPI_FP_INS) + printf("Row 1 approximately equals %d %d\n", num_flops, num_flops); +*/ + /* Note that the second run prints output on stdout. On some systems + * this is costly. PAPI_TOT_INS or PAPI_TOT_CYC are likely to be _very_ + * different between the two runs. + * printf("Column 1 approximately equals column 2\n"); + */ + printf( "Row 3 approximately equals %u +- %u %%\n", + ( unsigned ) ( ( values[0] )[0] / ( long long ) mythreshold ), + ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); + } +/* + min = (long long)((values[0])[0]*(1.0-TOLERANCE)); + max = (long long)((values[0])[0]*(1.0+TOLERANCE)); + if ( (values[1])[0] > max || (values[1])[0] < min ) + test_fail(__FILE__, __LINE__, event_name, 1); +*/ + + min = + ( long long ) ( ( ( double ) values[0][0] * ( 1.0 - OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + max = + ( long long ) ( ( ( double ) values[0][0] * ( 1.0 + OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + if ( total > max || total < min ) + test_fail( __FILE__, __LINE__, "Overflows", 1 ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow3_pthreads.c b/src/ctests/overflow3_pthreads.c new file mode 100644 index 0000000..f63669e --- /dev/null +++ b/src/ctests/overflow3_pthreads.c @@ -0,0 +1,158 @@ +/* +* File: overflow3_pthreads.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file tests the overflow functionality when there are + * threads in which the application isn't calling PAPI (and only + * one thread that is calling PAPI.) + */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int total = 0; + +void * +thread_fn( void *dummy ) +{ + ( void ) dummy; + while ( 1 ) { + do_stuff( ); + } + return ( NULL ); +} + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + ( void ) overflow_vector; + ( void ) context; + if ( !TESTS_QUIET ) { + fprintf( stderr, "handler(%d ) Overflow at %p, thread %#lx!\n", + EventSet, address, PAPI_thread_id( ) ); + } + total++; +} + +void +mainloop( int arg ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int mask1 = 0x0; + int num_events1; + long long **values; + int PAPI_event; + char event_name[PAPI_MAX_STR_LEN]; + + ( void ) arg; + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depending on the availability of the event on the + platform */ + EventSet1 = add_two_nonderived_events( &num_events1, + &PAPI_event, &mask1 ); + + if (num_events1==0) { + if (!TESTS_QUIET) printf("Trouble creating events\n"); + test_skip(__FILE__,__LINE__,"Creating events",0); + } + + values = allocate_test_space( num_tests, num_events1 ); + + if ( ( retval = + PAPI_overflow( EventSet1, PAPI_event, THRESHOLD, 0, + handler ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet1, values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* clear the papi_overflow event */ + if ( ( retval = + PAPI_overflow( EventSet1, PAPI_event, 0, 0, NULL ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + if ( !TESTS_QUIET ) { + printf( "Thread %#x %s : \t%lld\n", ( int ) pthread_self( ), + event_name, ( values[0] )[0] ); + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", ( int ) pthread_self( ), + ( values[0] )[1] ); + } + + retval = PAPI_cleanup_eventset( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + free_test_space( values, num_tests ); + PAPI_shutdown( ); +} + +int +main( int argc, char **argv ) +{ + int i, rc, retval; + pthread_t id[NUM_THREADS]; + pthread_attr_t attr; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + if (!quiet) { + printf( "%s: Using %d threads\n\n", argv[0], NUM_THREADS ); + printf( "Does non-threaded overflow work " + "with extraneous threads present?\n" ); + } + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + + for ( i = 0; i < NUM_THREADS; i++ ) { + rc = pthread_create( &id[i], &attr, thread_fn, NULL ); + if ( rc ) + test_fail( __FILE__, __LINE__, "pthread_create", rc ); + } + pthread_attr_destroy( &attr ); + + mainloop( NUM_ITERS ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow_allcounters.c b/src/ctests/overflow_allcounters.c new file mode 100644 index 0000000..128f9ec --- /dev/null +++ b/src/ctests/overflow_allcounters.c @@ -0,0 +1,301 @@ +/* +* File: overflow_allcounters.c +* Author: Haihang You +* you@cs.utk.edu +* Mods: Vince Weaver +* vweaver1@eecs.utk.edu +*/ + +/* This file performs the following test: overflow all counters + to test availability of overflow of all counters + + - Start eventset 1 + - Do flops + - Stop and measure eventset 1 + - Set up overflow on eventset 1 + - Start eventset 1 + - Do flops + - Stop eventset 1 +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" +#define OUT_FMT "%-12s : %16lld%16lld\n" + +static int total = 0; /* total overflows */ + + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + + ( void ) context; + + if ( !TESTS_QUIET ) { + printf( OVER_FMT, EventSet, address, overflow_vector ); + } + total++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long *values; + int num_flops, retval, i, j; + int *events, mythreshold; + char **names; + const PAPI_hw_info_t *hw_info = NULL; + int num_events, *ovt; + char name[PAPI_MAX_STR_LEN]; + int using_perfmon = 0; + int using_aix = 0; + int cid; + int quiet; + long long value; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", retval ); + } + + cid = PAPI_get_component_index("perfmon"); + if (cid>=0) using_perfmon = 1; + + cid = PAPI_get_component_index("aix"); + if (cid>=0) using_aix = 1; + + /* add PAPI_TOT_CYC and one of the events in */ + /* PAPI_FP_INS, PAPI_FP_OPS PAPI_TOT_INS, */ + /* depending on the availability of the event*/ + /* on the platform */ + EventSet = enum_add_native_events( &num_events, &events, 1 , 1, 0); + + if (num_events==0) { + if (!quiet) printf("No events found\n"); + test_skip(__FILE__,__LINE__,"No events found",0); + } + + if (!quiet) printf("Trying %d events\n",num_events); + + names = ( char ** ) calloc( ( unsigned int ) num_events, + sizeof ( char * ) ); + + for ( i = 0; i < num_events; i++ ) { + if ( PAPI_event_code_to_name( events[i], name ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__,"PAPI_event_code_to_name", retval); + } + else { + names[i] = strdup( name ); + if (!quiet) printf("%i: %s\n",i,names[i]); + } + } + + values = ( long long * ) + calloc( ( unsigned int ) ( num_events * ( num_events + 1 ) ), + sizeof ( long long ) ); + ovt = ( int * ) calloc( ( unsigned int ) num_events, sizeof ( int ) ); + +#if defined(linux) + { + char *tmp = getenv( "THRESHOLD" ); + if ( tmp ) { + mythreshold = atoi( tmp ); + } + else if (hw_info->cpu_max_mhz!=0) { + mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; + if (!quiet) printf("Using a threshold of %d (20,000 * MHz)\n",mythreshold); + + } + else { + if (!quiet) printf("Using default threshold of %d\n",THRESHOLD); + mythreshold = THRESHOLD; + } + } +#else + mythreshold = THRESHOLD; +#endif + + num_flops = NUM_FLOPS * 2; + + /* initial test to make sure they all work */ + if (!quiet) printf("Testing that the events all work with no overflow\n"); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( num_flops ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* done with initial test */ + + /* keep adding events? */ + for ( i = 0; i < num_events; i++ ) { + + /* Enable overflow */ + if (!quiet) printf("Testing with overflow set on %s\n", + names[i]); + + retval = PAPI_overflow( EventSet, events[i], + mythreshold, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( num_flops ); + + retval = PAPI_stop( EventSet, values + ( i + 1 ) * num_events ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* Disable overflow */ + retval = PAPI_overflow( EventSet, events[i], 0, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + ovt[i] = total; + total = 0; + } + + if ( !quiet ) { + + printf("\nResults in Matrix-view:\n"); + printf( "Test Overflow on %d counters with %d events.\n", + num_events,num_events ); + printf( "-----------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", mythreshold ); + printf( "Using %d iterations of c += a*b\n", num_flops ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : " ); + for ( i = 0; i < num_events + 1; i++ ) { + printf( "%16d", i ); + } + printf( "\n" ); + for ( j = 0; j < num_events; j++ ) { + printf( "%-27s : ", names[j] ); + for ( i = 0; i < num_events + 1; i++ ) { + printf( "%16lld", *( values + j + num_events * i ) ); + } + printf( "\n" ); + } + printf( "Overflows : %16s", "" ); + for ( i = 0; i < num_events; i++ ) { + printf( "%16d", ovt[i] ); + } + printf( "\n" ); + printf( "-----------------------------------------------\n" ); + } + + /* validation */ + + if ( !quiet ) { + printf("\nResults broken out for validation\n"); + } + + if (!quiet) { + + for ( j = 0; j < num_events+1; j++ ) { + if (j==0) { + printf("Test results, no overflow:\n\t"); + } + else { + printf("Overflow of event %d, %s\n\t",j-1,names[j-1]); + } + for(i=0; i < num_events; i++) { + if (i==j-1) { + printf("*%lld* ",values[(num_events*j)+i]); + } + else { + printf("%lld ",values[(num_events*j)+i]); + } + } + printf("\n"); + if (j!=0) { + printf("\tOverflow should be %lld / %d = %lld\n", + values[(num_events*j)+(j-1)], + mythreshold, + values[(num_events*j)+(j-1)]/mythreshold); + printf("\tOverflow was %d\n",ovt[j-1]); + } + } + } + + for ( j = 0; j < num_events; j++ ) { + //printf("Validation: %lld / %d != %d (%lld)\n", + // *( values + j + num_events * (j+1) ) , + // mythreshold, + // ovt[j], + // *(values+j+num_events*(j+1))/mythreshold); + + value = values[j+num_events*(j+1)]; + + if ( value / mythreshold != ovt[j] ) { + char error_string[BUFSIZ]; + + if ( using_perfmon ) + test_warn( __FILE__, __LINE__, + "perfmon component handles overflow differently than perf_events", + 1 ); + else if ( using_aix ) + test_warn( __FILE__, __LINE__, + "AIX (pmapi) component handles overflow differently than various other components", + 1 ); + else { + sprintf( error_string, + "Overflow value differs from expected %lld / %d should be %lld, we got %d", + value , mythreshold, + value / mythreshold, + ovt[j] ); + test_fail( __FILE__, __LINE__, error_string, 1 ); + } + } + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + free( ovt ); + for ( i = 0; i < num_events; i++ ) + free( names[i] ); + free( names ); + free( events ); + free( values ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow_force_software.c b/src/ctests/overflow_force_software.c new file mode 100644 index 0000000..6b7f80d --- /dev/null +++ b/src/ctests/overflow_force_software.c @@ -0,0 +1,324 @@ +/* +* File: overflow_force_software.c +* Author: Kevin London +* london@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +* Philip Mucci +* mucci@cs.utk.edu +* Haihang You +* you@cs.utk.edu +* +* +*/ + +/* This file performs the following test: overflow dispatch of an eventset +with just a single event. Using both Hardware and software overflows + +The Eventset contains: ++ PAPI_FP_INS (overflow monitor) + +- Start eventset 1 +- Do flops +- Stop and measure eventset 1 +- Set up overflow on eventset 1 +- Start eventset 1 +- Do flops +- Stop eventset 1 +- Set up forced software overflow on eventset 1 +- Start eventset 1 +- Do flops +- Stop eventset 1 +*/ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d) Overflow at %p overflow_vector=%#llx!\n" +#define OUT_FMT "%-12s : %16lld%16d%16lld\n" + +#define SOFT_TOLERANCE 0.90 +#define MY_NUM_TESTS 5 + +static int total[MY_NUM_TESTS] = { 0, }; /* total overflows */ +static int use_total = 0; /* which total field to bump */ +static long long values[MY_NUM_TESTS] = { 0, }; + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + + total[use_total]++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long hard_min, hard_max, soft_min, soft_max; + int retval; + int PAPI_event = 0, mythreshold; + char event_name[PAPI_MAX_STR_LEN]; + PAPI_option_t opt; + PAPI_event_info_t info; + PAPI_option_t itimer; + const PAPI_hw_info_t *hw_info = NULL; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + /* query and set up the right instruction to monitor */ + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) { + PAPI_get_event_info( PAPI_FP_INS, &info ); + if ( info.count == 1 || + !strcmp( info.derived, "DERIVED_CMPD" ) ) + PAPI_event = PAPI_FP_INS; + } + } + if ( PAPI_event == 0 ) { + if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) { + PAPI_get_event_info( PAPI_FP_OPS, &info ); + if ( info.count == 1 || + !strcmp( info.derived, "DERIVED_CMPD" ) ) + PAPI_event = PAPI_FP_OPS; + } + } + if ( PAPI_event == 0 ) { + if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) { + PAPI_get_event_info( PAPI_TOT_INS, &info ); + if ( info.count == 1 || + !strcmp( info.derived, "DERIVED_CMPD" ) ) + PAPI_event = PAPI_TOT_INS; + } + } + + if ( PAPI_event == 0 ) + test_skip( __FILE__, __LINE__, "No suitable event for this test found!", + 0 ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + if ( PAPI_event == PAPI_FP_INS ) + mythreshold = THRESHOLD; + else +#if defined(linux) + mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; +#else + mythreshold = THRESHOLD * 2; +#endif + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + retval = PAPI_add_event( EventSet, PAPI_event ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + retval = PAPI_get_opt( PAPI_COMPONENTINFO, &opt ); + if ( retval != PAPI_OK ) + test_skip( __FILE__, __LINE__, + "Platform does not support Hardware overflow", 0 ); + + do_stuff( ); + + /* Do reference count */ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values[use_total] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + use_total++; + + /* Now do hardware overflow reference count */ + + retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values[use_total] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + use_total++; + + retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + /* Now do software overflow reference count, uses SIGPROF */ + + retval = + PAPI_overflow( EventSet, PAPI_event, mythreshold, + PAPI_OVERFLOW_FORCE_SW, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values[use_total] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + use_total++; + + retval = + PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, + handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + /* Now do software overflow with SIGVTALRM */ + + memset( &itimer, 0, sizeof ( itimer ) ); + itimer.itimer.itimer_num = ITIMER_VIRTUAL; + itimer.itimer.itimer_sig = SIGVTALRM; + + if ( PAPI_set_opt( PAPI_DEF_ITIMER, &itimer ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + retval = + PAPI_overflow( EventSet, PAPI_event, mythreshold, + PAPI_OVERFLOW_FORCE_SW, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values[use_total] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + use_total++; + + retval = + PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, + handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + /* Now do software overflow with SIGALRM */ + + memset( &itimer, 0, sizeof ( itimer ) ); + itimer.itimer.itimer_num = ITIMER_REAL; + itimer.itimer.itimer_sig = SIGALRM; + if ( PAPI_set_opt( PAPI_DEF_ITIMER, &itimer ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + retval = + PAPI_overflow( EventSet, PAPI_event, mythreshold, + PAPI_OVERFLOW_FORCE_SW, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + retval = PAPI_stop( EventSet, &values[use_total] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + use_total++; + + retval = + PAPI_overflow( EventSet, PAPI_event, 0, PAPI_OVERFLOW_FORCE_SW, + handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + if ( !TESTS_QUIET ) { + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + printf + ( "Test case: Software overflow of various types with 1 event in set.\n" ); + printf + ( "------------------------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", mythreshold ); + printf + ( "------------------------------------------------------------------------------\n" ); + + printf( "Test type : %11s%13s%13s%13s%13s\n", "Reference", "Hardware", + "ITIMER_PROF", "ITIMER_VIRT", "ITIMER_REAL" ); + printf( "%-12s: %11lld%13lld%13lld%13lld%13lld\n", info.symbol, + values[0], values[1], values[2], values[3], values[4] ); + printf( "Overflows : %11d%13d%13d%13d%13d\n", total[0], total[1], + total[2], total[3], total[4] ); + printf + ( "------------------------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + + printf + ( "Overflow in Column 2 greater than or equal to overflows in Columns 3, 4, 5\n" ); + printf( "Overflow in Columns 3, 4, 5 greater than 0\n" ); + } + + hard_min = + ( long long ) ( ( ( double ) values[0] * ( 1.0 - OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + hard_max = + ( long long ) ( ( ( double ) values[0] * ( 1.0 + OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + soft_min = + ( long long ) ( ( ( double ) values[0] * ( 1.0 - SOFT_TOLERANCE ) ) / + ( double ) mythreshold ); + soft_max = + ( long long ) ( ( ( double ) values[0] * ( 1.0 + SOFT_TOLERANCE ) ) / + ( double ) mythreshold ); + + if ( total[1] > hard_max || total[1] < hard_min ) + test_fail( __FILE__, __LINE__, "Hardware Overflows outside limits", 1 ); + + if ( total[2] > soft_max || total[3] > soft_max || total[4] > soft_max ) + test_fail( __FILE__, __LINE__, + "Software Overflows exceed theoretical maximum", 1 ); + + if ( total[2] < soft_min || total[3] < soft_min || total[4] < soft_min ) + printf( "WARNING: Software Overflow occuring but suspiciously low\n" ); + + if ( ( total[2] == 0 ) || ( total[3] == 0 ) || ( total[4] == 0 ) ) + test_fail( __FILE__, __LINE__, "Software Overflows", 1 ); + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/overflow_index.c b/src/ctests/overflow_index.c new file mode 100644 index 0000000..08c6a07 --- /dev/null +++ b/src/ctests/overflow_index.c @@ -0,0 +1,183 @@ +/* +* File: overflow_index.c +* Author: min@cs.utk.edu +* Min Zhou +*/ + +/* This file performs the following test: overflow dispatch on 2 counters. */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d) Overflow at %p! vector=%#llx\n" +#define OUT_FMT "%-12s : %16lld%16lld\n" +#define INDEX_FMT "Overflows vector %#llx: \n" + +typedef struct +{ + long long mask; + int count; +} ocount_t; + +/* there are three possible vectors, one counter overflows, the other + counter overflows, both overflow */ +static ocount_t overflow_counts[3] = { {0, 0}, {0, 0}, {0, 0} }; +static int total_unknown = 0; + +static void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + int i; + + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + + /* Look for the overflow_vector entry */ + + for ( i = 0; i < 3; i++ ) { + if ( overflow_counts[i].mask == overflow_vector ) { + overflow_counts[i].count++; + return; + } + } + + /* Didn't find it so add it. */ + + for ( i = 0; i < 3; i++ ) { + if ( overflow_counts[i].mask == ( long long ) 0 ) { + overflow_counts[i].mask = overflow_vector; + overflow_counts[i].count = 1; + return; + } + } + + /* Unknown entry!?! */ + + total_unknown++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long ( values[3] )[2]; + int retval; + int PAPI_event, k, i; + char event_name[PAPI_MAX_STR_LEN]; + int index_array[2], number; + int num_events1, mask1; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depends on the availability of the event on the + platform */ + EventSet = add_two_nonderived_events( &num_events1, &PAPI_event, &mask1 ); + + if (num_events1==0) { + if (!quiet) printf("Trouble adding events\n"); + test_skip(__FILE__,__LINE__,"Adding events",0); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_overflow( EventSet, PAPI_event, THRESHOLD, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + retval = PAPI_overflow( EventSet, PAPI_TOT_CYC, THRESHOLD, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if (!quiet) { + printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" ); + printf( "---------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", THRESHOLD ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %16d%16d\n", 1, 2 ); + printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[0], ( values[1] )[0] ); + printf( OUT_FMT, event_name, ( values[0] )[1], ( values[1] )[1] ); + } + + if ( overflow_counts[0].count == 0 && overflow_counts[1].count == 0 ) { + test_fail( __FILE__, __LINE__, + "one counter had no overflows", 1 ); + } + + for ( k = 0; k < 3; k++ ) { + if ( overflow_counts[k].mask ) { + number = 2; + retval = PAPI_get_overflow_event_index( EventSet, + overflow_counts[k].mask, + index_array, &number ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_get_overflow_event_index", retval ); + } + if (!quiet) { + printf( INDEX_FMT, ( long long ) overflow_counts[k].mask ); + printf( " counts: %d ", overflow_counts[k].count ); + for ( i = 0; i < number; i++ ) + printf( " Event Index %d ", index_array[i] ); + printf( "\n" ); + } + } + } + + if (!quiet) { + printf( "Case 2 %s Overflows: %d\n", "Unknown", total_unknown ); + printf( "-----------------------------------------------\n" ); + } + + if ( total_unknown > 0 ) { + test_fail( __FILE__, __LINE__, "Unknown counter had overflows", 1 ); + } + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow_one_and_read.c b/src/ctests/overflow_one_and_read.c new file mode 100644 index 0000000..6defb5b --- /dev/null +++ b/src/ctests/overflow_one_and_read.c @@ -0,0 +1,139 @@ +/* +* File: overflow_one_and_read.c : based on overflow_twoevents.c +* Mods: Philip Mucci +* mucci@cs.utk.edu +* Kevin London +* london@cs.utk.edu +*/ + +/* This file performs the following test: overflow dispatch on 1 counter. + * In the handler read events. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d) Overflow at %p! vector=%#llx\n" +#define OUT_FMT "%-12s : %16lld%16lld\n" + +typedef struct +{ + long long mask; + int count; +} ocount_t; + +/* there are three possible vectors, one counter overflows, the other + counter overflows, both overflow */ +/*not used*/ ocount_t overflow_counts[3] = { {0, 0}, {0, 0}, {0, 0} }; +/*not used*/ int total_unknown = 0; + +/*added*/ long long dummyvalues[2]; + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + int retval; + + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + + if ( ( retval = PAPI_read( EventSet, dummyvalues ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + + if ( !TESTS_QUIET ) { + fprintf( stderr, TWO12, dummyvalues[0], dummyvalues[1], + "(Reading counters)\n" ); + } + if ( dummyvalues[1] == 0 ) + test_fail( __FILE__, __LINE__, "Total Cycles == 0", 1 ); +} + +int +main( int argc, char **argv ) +{ + int EventSet; + long long **values = NULL; + int retval; + int PAPI_event; + char event_name[PAPI_MAX_STR_LEN]; + int num_events1, mask1; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depends on the availability of the event on the + platform */ +/* NOTE: Only adding one overflow on PAPI_event -- no overflow for PAPI_TOT_CYC*/ + EventSet = add_two_nonderived_events( &num_events1, + &PAPI_event, &mask1 ); + if (num_events1==0) { + if (!quiet) printf("Trouble adding events\n"); + test_skip(__FILE__,__LINE__,"Adding event",1); + } + + values = allocate_test_space( 2, num_events1 ); + + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_overflow( EventSet, PAPI_event, THRESHOLD, 0, handler ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + remove_test_events( &EventSet, mask1 ); + + + if ( !TESTS_QUIET ) { + printf + ( "Test case: Overflow dispatch of 1st event in set with 2 events.\n" ); + printf + ( "---------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", THRESHOLD ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %16d%16d\n", 1, 2 ); + printf( OUT_FMT, event_name, ( values[0] )[0], ( values[1] )[0] ); + printf( OUT_FMT, "PAPI_TOT_CYC", ( values[0] )[1], ( values[1] )[1] ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow_pthreads.c b/src/ctests/overflow_pthreads.c new file mode 100644 index 0000000..e9539ac --- /dev/null +++ b/src/ctests/overflow_pthreads.c @@ -0,0 +1,225 @@ +/* This file performs the following test: overflow dispatch with pthreads + + - This tests the dispatch of overflow calls from PAPI. These are counted + in the default counting domain and default granularity, depending on + the platform. Usually this is the user domain (PAPI_DOM_USER) and + thread context (PAPI_GRN_THR). + + The Eventset contains: + + PAPI_FP_INS (overflow monitor) + + PAPI_TOT_CYC + + - Set up overflow + - Start eventset 1 + - Do flops + - Stop eventset 1 +*/ + +#include +#include +#include +#include + +#include "papi.h" +#include "do_loops.h" +#include "papi_test.h" + +static const PAPI_hw_info_t *hw_info = NULL; +static int total[NUM_THREADS]; +static int expected[NUM_THREADS]; +static pthread_t myid[NUM_THREADS]; + +static void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ +#if 0 + printf( "handler(%d,%#lx,%llx) Overflow %d in thread %lx\n", + EventSet, ( unsigned long ) address, overflow_vector, + total[EventSet], PAPI_thread_id( ) ); + printf( "%lx vs %lx\n", myid[EventSet], PAPI_thread_id( ) ); +#else /* eliminate unused parameter warning message */ + ( void ) address; + ( void ) overflow_vector; + ( void ) context; +#endif + total[EventSet]++; +} + +static long long mythreshold=0; + +static void * +Thread( void *arg ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int mask1, papi_event; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN]; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depends on the availability of the event on the + platform */ + EventSet1 = + add_two_nonderived_events( &num_events1, &papi_event, &mask1 ); + + if (EventSet1 < 0) return NULL; + + /* Wait, we're indexing a per-thread array with the EventSet number? */ + /* does that make any sense at all???? -- vmw */ + expected[EventSet1] = *( int * ) arg / mythreshold; + myid[EventSet1] = PAPI_thread_id( ); + + values = allocate_test_space( num_tests, num_events1 ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + if ((retval = PAPI_overflow( EventSet1, papi_event, + mythreshold, 0, handler ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + /* start_timer(1); */ + if ( ( retval = PAPI_start( EventSet1 ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet1, values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_overflow( EventSet1, papi_event, 0, 0, NULL ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + remove_test_events( &EventSet1, mask1 ); + + retval = PAPI_event_code_to_name( papi_event, event_name ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + if ( !TESTS_QUIET ) { + printf( "Thread %#x %s : \t%lld\n", ( int ) pthread_self( ), + event_name, ( values[0] )[0] ); + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", ( int ) pthread_self( ), + ( values[0] )[1] ); + printf( "Thread %#x Real usec : \t%lld\n", ( int ) pthread_self( ), + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", ( int ) pthread_self( ), + elapsed_cyc ); + } + free_test_space( values, num_tests ); + retval = PAPI_unregister_thread( ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + return ( NULL ); +} + +int +main( int argc, char **argv ) +{ + pthread_t id[NUM_THREADS]; + int flops[NUM_THREADS]; + int i, rc, retval; + pthread_attr_t attr; + float ratio; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + memset( total, 0x0, NUM_THREADS * sizeof ( *total ) ); + memset( expected, 0x0, NUM_THREADS * sizeof ( *expected ) ); + memset( myid, 0x0, NUM_THREADS * sizeof ( *myid ) ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) + ( pthread_self ) ); + if (retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } +#if defined(linux) + mythreshold = ((long long)hw_info->cpu_max_mhz) * 10000 * 2; +#else + mythreshold = THRESHOLD * 2; +#endif + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + + for ( i = 0; i < NUM_THREADS; i++ ) { + flops[i] = NUM_FLOPS * ( i + 1 ); + rc = pthread_create( &id[i], &attr, Thread, ( void * ) &flops[i] ); + if ( rc ) + test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + } + for ( i = 0; i < NUM_THREADS; i++ ) + pthread_join( id[i], NULL ); + + pthread_attr_destroy( &attr ); + + { + long long t = 0, r = 0; + for ( i = 0; i < NUM_THREADS; i++ ) { + t += ( NUM_FLOPS * ( i + 1 ) ) / mythreshold; + r += total[i]; + } + if (!quiet) { + printf( "Expected total overflows: %lld\n", t ); + printf( "Received total overflows: %lld\n", r ); + } + } +// FIXME: are we actually testing this properly? + +/* ratio = (float)total[0] / (float)expected[0]; */ +/* printf("Ratio of total to expected: %f\n",ratio); */ + ratio = 1.0; + for ( i = 0; i < NUM_THREADS; i++ ) { + if (!quiet) printf( "Overflows thread %d: %d, expected %d\n", + i, total[i], ( int ) ( ratio * ( float ) expected[i] ) ); + } + + for ( i = 0; i < NUM_THREADS; i++ ) { + if ( total[i] < ( int ) ( ( ratio * ( float ) expected[i] ) / 2.0 ) ) + test_fail( __FILE__, __LINE__, "not enough overflows", PAPI_EMISC ); + } + + test_pass( __FILE__ ); + + pthread_exit( NULL ); + + return 0; + +} diff --git a/src/ctests/overflow_single_event.c b/src/ctests/overflow_single_event.c new file mode 100644 index 0000000..976f73b --- /dev/null +++ b/src/ctests/overflow_single_event.c @@ -0,0 +1,199 @@ +/* +* File: overflow_single_event.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: overflow dispatch of an eventset + with just a single event. + + The Eventset contains: + + PAPI_FP_INS (overflow monitor) + + - Start eventset 1 + - Do flops + - Stop and measure eventset 1 + - Set up overflow on eventset 1 + - Start eventset 1 + - Do flops + - Stop eventset 1 +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d ) Overflow at %p overflow_vector=%#llx!\n" +#define OUT_FMT "%-12s : %16lld%16lld\n" + +static int total = 0; /* total overflows */ + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + ( void ) context; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, EventSet, address, overflow_vector ); + } + + total++; +} + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long values[2] = { 0, 0 }; + long long min, max; + int num_flops = NUM_FLOPS, retval; + int PAPI_event = 0, mythreshold; + char event_name[PAPI_MAX_STR_LEN]; + const PAPI_hw_info_t *hw_info = NULL; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + /* Ugh */ + if ( ( !strncmp( hw_info->model_string, "UltraSPARC", 10 ) && + !( strncmp( hw_info->vendor_string, "SUN", 3 ) ) ) || + ( !strncmp( hw_info->model_string, "AMD K7", 6 ) ) || + ( !strncmp( hw_info->vendor_string, "Cray", 4 ) ) || + ( strstr( hw_info->model_string, "POWER3" ) ) ) { + /* query and set up the right instruction to monitor */ + if ( PAPI_query_event( PAPI_TOT_INS ) == PAPI_OK ) { + PAPI_event = PAPI_TOT_INS; + } else { + test_fail( __FILE__, __LINE__, + "PAPI_TOT_INS not available on this Sun platform!", 0 ); + } + } else { + /* query and set up the right instruction to monitor */ + PAPI_event = find_nonderived_event( ); + } + + if (PAPI_event==0) { + if (!quiet) printf("Trouble adding event\n"); + test_skip(__FILE__,__LINE__,"Event trouble",1); + } + + if (( PAPI_event == PAPI_FP_OPS ) || ( PAPI_event == PAPI_FP_INS )) { + mythreshold = THRESHOLD; + } + else { +#if defined(linux) + mythreshold = ( int ) hw_info->cpu_max_mhz * 20000; +#else + mythreshold = THRESHOLD * 2; +#endif + } + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + retval = PAPI_add_event( EventSet, PAPI_event ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, &values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, &values[1] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* double ugh */ +#if defined(linux) || defined(__ia64__) || defined(_POWER4) + num_flops *= 2; +#endif + + if ( !quiet ) { + if ( ( retval = + PAPI_event_code_to_name( PAPI_event, event_name ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + printf + ( "Test case: Overflow dispatch of 1st event in set with 1 event.\n" ); + printf + ( "--------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", mythreshold ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %16d%16d\n", 1, 2 ); + printf( OUT_FMT, event_name, values[0], values[1] ); + printf( "Overflows : %16s%16d\n", "", total ); + printf( "-----------------------------------------------\n" ); + + printf( "Verification:\n" ); +/* + if (PAPI_event == PAPI_FP_INS) + printf("Row 1 approximately equals %d %d\n", num_flops, num_flops); + printf("Column 1 approximately equals column 2\n"); +*/ + printf( "Row 3 approximately equals %u +- %u %%\n", + ( unsigned ) ( ( values[0] ) / ( long long ) mythreshold ), + ( unsigned ) ( OVR_TOLERANCE * 100.0 ) ); + + } + +/* + min = (long long)(values[0]*(1.0-TOLERANCE)); + max = (long long)(values[0]*(1.0+TOLERANCE)); + if ( values[1] > max || values[1] < min ) + test_fail(__FILE__, __LINE__, event_name, 1); +*/ + + min = + ( long long ) ( ( ( double ) values[0] * ( 1.0 - OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + max = + ( long long ) ( ( ( double ) values[0] * ( 1.0 + OVR_TOLERANCE ) ) / + ( double ) mythreshold ); + if ( total > max || total < min ) + test_fail( __FILE__, __LINE__, "Overflows", 1 ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/overflow_twoevents.c b/src/ctests/overflow_twoevents.c new file mode 100644 index 0000000..4ec27e4 --- /dev/null +++ b/src/ctests/overflow_twoevents.c @@ -0,0 +1,284 @@ +/* +* File: overflow_twoevents.c +* Author: min@cs.utk.edu +* Min Zhou +* Mods: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: overflow dispatch on 2 counters. */ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define OVER_FMT "handler(%d) Overflow at %p! vector=%#llx\n" +#define OUT_FMT "%-12s : %18lld%18lld%18lld\n" +#define VEC_FMT " at vector %#llx, event %-12s : %6d\n" + +typedef struct +{ + long long mask; + int count; +} ocount_t; + +/* there are two experiments: batch and interleaf; for each experiment there + are three possible vectors, one counter overflows, the other + counter overflows, both overflow */ +static ocount_t overflow_counts[2][3] = + { {{0, 0}, {0, 0}, {0, 0}}, {{0, 0}, {0, 0}, {0, 0}} }; +static int total_unknown = 0; + +static void +handler( int mode, void *address, long long overflow_vector, void *context ) +{ + ( void ) context; /*unused */ + int i; + + if ( !TESTS_QUIET ) { + fprintf( stderr, OVER_FMT, mode, address, overflow_vector ); + } + + /* Look for the overflow_vector entry */ + + for ( i = 0; i < 3; i++ ) { + if ( overflow_counts[mode][i].mask == overflow_vector ) { + overflow_counts[mode][i].count++; + return; + } + } + + /* Didn't find it so add it. */ + + for ( i = 0; i < 3; i++ ) { + if ( overflow_counts[mode][i].mask == ( long long ) 0 ) { + overflow_counts[mode][i].mask = overflow_vector; + overflow_counts[mode][i].count = 1; + return; + } + } + + /* Unknown entry!?! */ + + total_unknown++; +} + +static void +handler_batch( int EventSet, void *address, long long overflow_vector, + void *context ) +{ + ( void ) EventSet; /*unused */ + handler( 0, address, overflow_vector, context ); +} + +static void +handler_interleaf( int EventSet, void *address, long long overflow_vector, + void *context ) +{ + ( void ) EventSet; /*unused */ + handler( 1, address, overflow_vector, context ); +} + + +int +main( int argc, char **argv ) +{ + int EventSet = PAPI_NULL; + long long ( values[3] )[2]; + int retval; + int PAPI_event, k, idx[4]; + char event_name[3][PAPI_MAX_STR_LEN]; + int num_events1; + int threshold = THRESHOLD; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_create_eventset( &EventSet ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* decide which of PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS to add, + depending on the availability and derived status of the event on + this platform */ + if ( ( PAPI_event = find_nonderived_event( ) ) == 0 ) { + if (!quiet) printf("No events found!\n"); + test_skip( __FILE__, __LINE__, "no PAPI_event", 0 ); + } + + if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* Set both overflows after adding both events (batch) */ + retval = PAPI_overflow( EventSet, PAPI_event, threshold, 0, + handler_batch ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + retval = PAPI_overflow( EventSet, PAPI_TOT_CYC, threshold, 0, + handler_batch ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + } + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + num_events1 = 1; + retval = + PAPI_get_overflow_event_index( EventSet, 1, &idx[0], &num_events1 ); + if ( retval != PAPI_OK ) { + printf( "PAPI_get_overflow_event_index error: %s\n", + PAPI_strerror( retval ) ); + } + + num_events1 = 1; + retval = + PAPI_get_overflow_event_index( EventSet, 2, &idx[1], &num_events1 ); + if ( retval != PAPI_OK ) { + printf( "PAPI_get_overflow_event_index error: %s\n", + PAPI_strerror( retval ) ); + } + + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + /* Add each event and set its overflow (interleaved) */ + if ( ( retval = PAPI_add_event( EventSet, PAPI_event ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + if ( ( retval = + PAPI_overflow( EventSet, PAPI_event, threshold, 0, + handler_interleaf ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + if ( ( retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + if ( ( retval = + PAPI_overflow( EventSet, PAPI_TOT_CYC, threshold, 0, + handler_interleaf ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + if ( ( retval = PAPI_stop( EventSet, values[2] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + num_events1 = 1; + retval = + PAPI_get_overflow_event_index( EventSet, 1, &idx[2], &num_events1 ); + if ( retval != PAPI_OK ) { + printf( "PAPI_get_overflow_event_index error: %s\n", + PAPI_strerror( retval ) ); + } + + num_events1 = 1; + retval = PAPI_get_overflow_event_index( EventSet, 2, &idx[3], + &num_events1 ); + if ( retval != PAPI_OK ) { + printf( "PAPI_get_overflow_event_index error: %s\n", + PAPI_strerror( retval ) ); + } + + if ( ( retval = PAPI_cleanup_eventset( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name[0] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + retval = PAPI_event_code_to_name( PAPI_TOT_CYC, event_name[1] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + strcpy( event_name[2], "Unknown" ); + + if (!TESTS_QUIET) { + printf( "Test case: Overflow dispatch of both events in set with 2 events.\n" ); + printf( "---------------------------------------------------------------\n" ); + printf( "Threshold for overflow is: %d\n", threshold ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-----------------------------------------------\n" ); + + printf( "Test type : %18s%18s%18s\n", "1 (no overflow)", "2 (batch)", + "3 (interleaf)" ); + printf( OUT_FMT, event_name[0], ( values[0] )[0], ( values[1] )[0], + ( values[2] )[0] ); + printf( OUT_FMT, event_name[1], ( values[0] )[1], ( values[1] )[1], + ( values[2] )[1] ); + printf( "\n" ); + + printf( "Predicted overflows at event %-12s : %6d\n", event_name[0], + ( int ) ( ( values[0] )[0] / threshold ) ); + printf( "Predicted overflows at event %-12s : %6d\n", event_name[1], + ( int ) ( ( values[0] )[1] / threshold ) ); + + printf( "\nBatch overflows (add, add, over, over):\n" ); + for ( k = 0; k < 2; k++ ) { + if ( overflow_counts[0][k].mask ) { + printf( VEC_FMT, ( long long ) overflow_counts[0][k].mask, + event_name[idx[k]], overflow_counts[0][k].count ); + } + } + + printf( "\nInterleaved overflows (add, over, add, over):\n" ); + for ( k = 0; k < 2; k++ ) { + if ( overflow_counts[1][k].mask ) + printf( VEC_FMT, + ( long long ) overflow_counts[1][k].mask, + event_name[idx[k + 2]], + overflow_counts[1][k].count ); + } + + printf( "\nCases 2+3 Unknown overflows: %d\n", total_unknown ); + printf( "-----------------------------------------------\n" ); + } + + if ( overflow_counts[0][0].count == 0 || overflow_counts[0][1].count == 0 ) + test_fail( __FILE__, __LINE__, "a batch counter had no overflows", 1 ); + + if ( overflow_counts[1][0].count == 0 || overflow_counts[1][1].count == 0 ) + test_fail( __FILE__, __LINE__, + "an interleaved counter had no overflows", 1 ); + + if ( total_unknown > 0 ) + test_fail( __FILE__, __LINE__, "Unknown counter had overflows", 1 ); + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/ctests/overflow_values.c b/src/ctests/overflow_values.c new file mode 100644 index 0000000..203bb54 --- /dev/null +++ b/src/ctests/overflow_values.c @@ -0,0 +1,169 @@ +/* +* File: overflow_values.c +* CVS: $Id$ +* Author: Harald Servat +* harald@cepba.upc.edu +* Mods: +* +*/ + +/* This file performs the following test: overflow values check + + The Eventset contains: + + PAPI_TOT_INS (overflow monitor) + + PAPI_TOT_CYC + + PAPI_L1_DCM + + - Start eventset + - Read and report event counts mod 1000 + - report overflow event counts + - visually inspect for consistency + - Stop eventset +*/ + +#include "papi_test.h" + +#define OVRFLOW 5000000 +#define LOWERFLOW (OVRFLOW - (OVRFLOW/100)) +#define UPPERFLOW (OVRFLOW/100) +#define ERRORFLOW (UPPERFLOW/5) +static long long ovrflow = 0; + +void +handler( int EventSet, void *address, long long overflow_vector, void *context ) +{ + int ret; + int i; + long long vals[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + printf( "\nOverflow at %p! bit=%#llx \n", address, overflow_vector ); + ret = PAPI_read( EventSet, vals ); + printf( "Overflow read vals :" ); + for ( i = 0; i < 3 /* 8 */ ; i++ ) + printf( "%lld ", vals[i] ); + printf( "\n\n" ); + ovrflow = vals[0]; +} + +int +main( int argc, char *argv[] ) +{ + int EventSet = PAPI_NULL; + int retval, i, dash = 0, evt3 = PAPI_L1_DCM; + PAPI_option_t options; + PAPI_option_t options2; + const PAPI_hw_info_t *hwinfo; + long long lwrflow = 0, error, max_error = 0; + long long vals[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT && retval > 0 ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + retval = PAPI_get_opt( PAPI_HWINFO, &options ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_get_opt", retval ); + printf( "ovf_info = %d (%#x)\n", options.ovf_info.type, + options.ovf_info.type ); + + retval = PAPI_get_opt( PAPI_SUBSTRATEINFO, &options2 ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_get_opt", retval ); + printf( "sub_info->hardware_intr = %d\n\n", + options2.sub_info->hardware_intr ); + + if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", PAPI_EMISC ); + + printf( "Architecture %s, %d\n", hwinfo->model_string, hwinfo->model ); + +/* processing exceptions is a pain */ +#if ((defined(linux) && (defined(__i386__) || (defined __x86_64__))) ) + if ( !strncmp( hwinfo->model_string, "Intel Pentium 4", 15 ) ) { + evt3 = PAPI_L2_TCM; + } else if ( !strncmp( hwinfo->model_string, "AMD K7", 6 ) ) { + /* do nothing */ + } else if ( !strncmp( hwinfo->model_string, "AMD K8", 6 ) ) { + /* do nothing */ + } else if ( !strncmp( hwinfo->model_string, "Intel Core", 10 ) ) { + evt3 = 0; + } else + evt3 = 0; /* for default PIII */ +#endif + + retval = PAPI_create_eventset( &EventSet ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + retval = PAPI_add_event( EventSet, PAPI_TOT_INS ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_add_event:PAPI_TOT_INS", retval ); + retval = PAPI_add_event( EventSet, PAPI_TOT_CYC ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_add_event:PAPI_TOT_CYC", retval ); + if ( evt3 ) { + retval = PAPI_add_event( EventSet, evt3 ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_add_event:evt3", retval ); + } + retval = PAPI_overflow( EventSet, PAPI_TOT_INS, OVRFLOW, 0, handler ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_overflow", retval ); + + retval = PAPI_start( EventSet ); + if ( retval < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + for ( i = 0; i < 1000000; i++ ) { + if ( i % 1000 == 0 ) { + int i; + + PAPI_read( EventSet, vals ); + if ( vals[0] % OVRFLOW > LOWERFLOW || + vals[0] % OVRFLOW < UPPERFLOW ) { + dash = 0; + printf( "Main loop read vals :" ); + for ( i = 0; i < 3 /* 8 */ ; i++ ) + printf( "%lld ", vals[i] ); + printf( "\n" ); + if ( ovrflow ) { + error = ovrflow - ( lwrflow + vals[0] ) / 2; + printf( "Difference: %lld\n", error ); + ovrflow = 0; + if ( abs( error ) > max_error ) + max_error = abs( error ); + } + lwrflow = vals[0]; + } else if ( vals[0] % OVRFLOW > UPPERFLOW && !dash ) { + dash = 1; + printf( "---------------------\n" ); + } + } + } + + retval = PAPI_stop( EventSet, vals ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_cleanup_eventset( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + printf( "Verification:\n" ); + printf + ( "Maximum absolute difference between overflow value\nand adjacent measured values is: %lld\n", + max_error ); + if ( max_error >= ERRORFLOW ) { + printf( "This exceeds the error limit: %d\n", ERRORFLOW ); + test_fail( __FILE__, __LINE__, "Overflows", 1 ); + } + printf( "This is within the error limit: %d\n", ERRORFLOW ); + test_pass( __FILE__, NULL, 0 ); + exit( 1 ); +} diff --git a/src/ctests/p4_lst_ins.c b/src/ctests/p4_lst_ins.c new file mode 100644 index 0000000..c6a9911 --- /dev/null +++ b/src/ctests/p4_lst_ins.c @@ -0,0 +1,232 @@ +/* This code demonstrates the behavior of PAPI_LD_INS, PAPI_SR_INS and PAPI_LST_INS + on a Pentium 4 processor. Because of the way these events are implemented in + hardware, LD and SR cannot be counted in the presence of either of the other + two events. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 6, tmp; + long long **values; + int EventSet = PAPI_NULL; + const PAPI_hw_info_t *hw_info; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + if ( hw_info->vendor == PAPI_VENDOR_INTEL ) { + /* Check for Pentium4 */ + if ( hw_info->cpuid_family != 15 ) { + test_skip( __FILE__, __LINE__, + "This test is intended only for Pentium 4.", 1 ); + } + } else { + test_skip( __FILE__, __LINE__, + "This test is intended only for Pentium 4.", 1 ); + } + + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + values = allocate_test_space( num_tests, 2 ); + +/* First test: just PAPI_LD_INS */ + retval = PAPI_add_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_LD_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_remove_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_LD_INS", + retval ); + +/* Second test: just PAPI_SR_INS */ + retval = PAPI_add_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_SR_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_remove_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_SR_INS", + retval ); + +/* Third test: just PAPI_LST_INS */ + retval = PAPI_add_event( EventSet, PAPI_LST_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_LST_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[2] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + +/* Fourth test: PAPI_LST_INS and PAPI_LD_INS */ + retval = PAPI_add_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_LD_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[3] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_remove_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_LD_INS", + retval ); + +/* Fifth test: PAPI_LST_INS and PAPI_SR_INS */ + retval = PAPI_add_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_SR_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[4] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_remove_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_SR_INS", + retval ); + + retval = PAPI_remove_event( EventSet, PAPI_LST_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_LST_INS", + retval ); + +/* Sixth test: PAPI_LD_INS and PAPI_SR_INS */ + retval = PAPI_add_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_LD_INS", retval ); + + retval = PAPI_add_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event: PAPI_SR_INS", retval ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS / 10 ); + + retval = PAPI_stop( EventSet, values[5] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_remove_event( EventSet, PAPI_LD_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_LD_INS", + retval ); + + retval = PAPI_remove_event( EventSet, PAPI_SR_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_remove_event: PAPI_SR_INS", + retval ); + + + + if ( !TESTS_QUIET ) { + printf( "Pentium 4 Load / Store tests.\n" ); + printf + ( "These PAPI events are counted by setting a tag at the front of the pipeline,\n" ); + printf + ( "and counting tags at the back of the pipeline. All the tags are the same 'color'\n" ); + printf + ( "and can't be distinguished from each other. Therefore, PAPI_LD_INS and PAPI_SR_INS\n" ); + printf + ( "cannot be counted with the other two events, or the answer will always == PAPI_LST_INS.\n" ); + printf + ( "-------------------------------------------------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS / 10 ); + printf + ( "-------------------------------------------------------------------------------------------\n" ); + + printf + ( "Test: 1 2 3 4 5 6\n" ); + printf( "%s %12lld %12s %12s %12lld %12s %12lld\n", "PAPI_LD_INS: ", + ( values[0] )[0], "------", "------", ( values[3] )[1], + "------", ( values[5] )[0] ); + printf( "%s %12s %12lld %12s %12s %12lld %12lld\n", "PAPI_SR_INS: ", + "------", ( values[1] )[0], "------", "------", + ( values[4] )[1], ( values[5] )[1] ); + printf( "%s %12s %12s %12lld %12lld %12lld %12s\n", "PAPI_LST_INS:", + "------", "------", ( values[2] )[0], ( values[3] )[0], + ( values[4] )[0], "------" ); + printf + ( "-------------------------------------------------------------------------------------------\n" ); + + printf( "Test 1: PAPI_LD_INS only.\n" ); + printf( "Test 2: PAPI_SR_INS only.\n" ); + printf( "Test 3: PAPI_LST_INS only.\n" ); + printf( "Test 4: PAPI_LD_INS and PAPI_LST_INS.\n" ); + printf( "Test 5: PAPI_SR_INS and PAPI_LST_INS.\n" ); + printf( "Test 6: PAPI_LD_INS and PAPI_SR_INS.\n" ); + printf + ( "Verification: Values within each column should be the same.\n" ); + printf( " R3C3 ~= (R1C1 + R2C2) ~= all other entries.\n" ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/pernode.c b/src/ctests/pernode.c new file mode 100644 index 0000000..8269eca --- /dev/null +++ b/src/ctests/pernode.c @@ -0,0 +1,123 @@ +/* This file performs the following test: + + - make an event set with PAPI_TOT_INS and PAPI_TOT_CYC. + - enable per node counting + - enable full domain counting + - sleeps for 5 seconds + - print the results +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "papi_test.h" + +int +main( ) +{ + int ncpu, nctr, i, actual_domain; + int retval; + int EventSet = PAPI_NULL; + long long *values; + long long elapsed_us, elapsed_cyc; + PAPI_option_t options; + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + fprintf( stderr, "Library mismatch: code %d, library %d\n", retval, + PAPI_VER_CURRENT ); + exit( 1 ); + } + + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) + exit( 1 ); + + /* Set the domain as high as it will go. */ + + options.domain.eventset = EventSet; + options.domain.domain = PAPI_DOM_ALL; + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + exit( 1 ); + actual_domain = options.domain.domain; + + /* This should only happen to an empty eventset */ + + options.granularity.eventset = EventSet; + options.granularity.granularity = PAPI_GRN_SYS_CPU; + retval = PAPI_set_opt( PAPI_GRANUL, &options ); + if ( retval != PAPI_OK ) + exit( 1 ); + + /* Malloc the output array */ + + ncpu = PAPI_get_opt( PAPI_MAX_CPUS, NULL ); + nctr = PAPI_get_opt( PAPI_MAX_HWCTRS, NULL ); + values = ( long long * ) malloc( ncpu * nctr * sizeof ( long long ) ); + memset( values, 0x0, ( ncpu * nctr * sizeof ( long long ) ) ); + + /* Add the counters */ + + if ( PAPI_add_event( EventSet, PAPI_TOT_CYC ) != PAPI_OK ) + exit( 1 ); + + if ( PAPI_add_event( EventSet, PAPI_TOT_INS ) != PAPI_OK ) + exit( 1 ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) + exit( 1 ); + + sleep( 5 ); + + retval = PAPI_stop( EventSet, values ); + if ( retval != PAPI_OK ) + exit( 1 ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + printf( "Test case: per node\n" ); + printf( "-------------------\n\n" ); + + printf( "This machine has %d cpus, each with %d counters.\n", ncpu, nctr ); + printf( "Test case asked for: PAPI_DOM_ALL\n" ); + printf( "Test case got: " ); + if ( actual_domain & PAPI_DOM_USER ) + printf( "PAPI_DOM_USER " ); + if ( actual_domain & PAPI_DOM_KERNEL ) + printf( "PAPI_DOM_KERNEL " ); + if ( actual_domain & PAPI_DOM_OTHER ) + printf( "PAPI_DOM_OTHER " ); + printf( "\n" ); + + for ( i = 0; i < ncpu; i++ ) { + printf( "CPU %d\n", i ); + printf( "PAPI_TOT_CYC: \t%lld\n", values[0 + i * nctr] ); + printf( "PAPI_TOT_INS: \t%lld\n", values[1 + i * nctr] ); + } + + printf + ( "\n-------------------------------------------------------------------------\n" ); + + printf( "Real usec : \t%lld\n", elapsed_us ); + printf( "Real cycles : \t%lld\n", elapsed_cyc ); + + printf + ( "-------------------------------------------------------------------------\n" ); + + free( values ); + + PAPI_shutdown( ); + + exit( 0 ); +} diff --git a/src/ctests/prof_utils.c b/src/ctests/prof_utils.c new file mode 100644 index 0000000..1a77084 --- /dev/null +++ b/src/ctests/prof_utils.c @@ -0,0 +1,318 @@ +/* +* File: prof_utils.c +* Author: Dan Terpstra +* terpstra@cs.utk.edu +*/ + +/* This file contains utility functions useful for all profiling tests + It can be used by: + - profile.c, + - sprofile.c, + - profile_pthreads.c, + - profile_twoevents.c, + - earprofile.c, + - future profiling tests. +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#include "prof_utils.h" + +/* variables global to profiling tests */ +long long **values; +char event_name[PAPI_MAX_STR_LEN]; +int PAPI_event; +int EventSet = PAPI_NULL; +void *profbuf[5]; + + +/* Many profiling tests count one of {FP_INS, FP_OPS, TOT_INS} and TOT_CYC. + This function creates an event set containing the appropriate pair of events. + It also initializes the global event_name string to the event selected. + Assumed globals: EventSet, PAPI_event, event_name. +*/ +int +prof_events( int num_tests) +{ + int retval; + int num_events, mask; + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depends on the availability of the event on the + platform */ + EventSet = add_two_nonderived_events( &num_events, &PAPI_event, &mask ); + + if (num_events==0) { + return 0; + } + + values = allocate_test_space( num_tests, num_events ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + return mask; +} + +/* This function displays info from the prginfo structure in a standardized format. +*/ +void +prof_print_address( const char *title, const PAPI_exe_info_t * prginfo ) +{ + printf( "%s\n", title ); + printf + ( "----------------------------------------------------------------\n" ); + printf( "Text start: %p, Text end: %p, Text length: %#x\n", + prginfo->address_info.text_start, prginfo->address_info.text_end, + ( unsigned int ) ( prginfo->address_info.text_end - + prginfo->address_info.text_start ) ); + printf( "Data start: %p, Data end: %p\n", prginfo->address_info.data_start, + prginfo->address_info.data_end ); + printf( "BSS start : %p, BSS end : %p\n", prginfo->address_info.bss_start, + prginfo->address_info.bss_end ); + + printf + ( "----------------------------------------------------------------\n" ); +} + +/* This function displays profining information useful for several profile tests. + It (probably inappropriately) assumes use of a common THRESHOLD. This should + probably be a passed parameter. + Assumed globals: event_name, start, stop. +*/ +void +prof_print_prof_info( caddr_t start, caddr_t end, int threshold, + char *event_name ) +{ + printf( "Profiling event : %s\n", event_name ); + printf( "Profile Threshold: %d\n", threshold ); + printf( "Profile Iters : %d\n", + ( getenv( "NUM_ITERS" ) ? atoi( getenv( "NUM_ITERS" ) ) : + NUM_ITERS ) ); + printf( "Profile Range : %p to %p\n", start, end ); + printf + ( "----------------------------------------------------------------\n" ); + printf( "\n" ); +} + +/* Most profile tests begin by counting the eventset with no profiling enabled. + This function does that work. It assumes that the 'work' routine is do_both(). + A better implementation would pass a pointer to the work function. + Assumed globals: EventSet, values, event_name. +*/ +void +do_no_profile( int quiet ) +{ + int retval; + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( getenv( "NUM_ITERS" ) ? atoi( getenv( "NUM_ITERS" ) ) : + NUM_ITERS ); + + if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if (!quiet) { + printf( "Test type : \t%s\n", "No profiling" ); + printf( TAB1, event_name, ( values[0] )[0] ); + printf( TAB1, "PAPI_TOT_CYC", ( values[0] )[1] ); + } +} + +/* This routine allocates and initializes up to 5 equal sized profiling buffers. + They need to be freed when profiling is completed. + The number and size are passed parameters. + The profbuf[] array of void * pointers is an assumed global. + It should be cast to the required type by the parent routine. +*/ +void +prof_alloc( int num, unsigned long blength ) +{ + int i; + + for ( i = 0; i < num; i++ ) { + profbuf[i] = malloc( blength ); + if ( profbuf[i] == NULL ) { + test_fail( __FILE__, __LINE__, "malloc", PAPI_ESYS ); + } + memset( profbuf[i], 0x00, blength ); + } +} + +/* Given the profiling type (16, 32, or 64) this function returns the + bucket size in bytes. NOTE: the bucket size does not ALWAYS correspond + to the expected value, esp on architectures like Cray with weird data types. + This is necessary because the posix_profile routine in extras.c relies on + the data types and sizes produced by the compiler. +*/ +int +prof_buckets( int bucket ) +{ + int bucket_size; + switch ( bucket ) { + case PAPI_PROFIL_BUCKET_16: + bucket_size = sizeof ( short ); + break; + case PAPI_PROFIL_BUCKET_32: + bucket_size = sizeof ( int ); + break; + case PAPI_PROFIL_BUCKET_64: + bucket_size = sizeof ( unsigned long long ); + break; + default: + bucket_size = 0; + break; + } + return ( bucket_size ); +} + +/* A standardized header printing routine. No assumed globals. +*/ +void +prof_head( unsigned long blength, int bucket, int num_buckets, const char *header ) +{ + int bucket_size = prof_buckets( bucket ); + printf + ( "\n------------------------------------------------------------\n" ); + printf( "PAPI_profil() hash table, Bucket size: %d bits.\n", + bucket_size * 8 ); + printf( "Number of buckets: %d.\nLength of buffer: %ld bytes.\n", + num_buckets, blength ); + printf( "------------------------------------------------------------\n" ); + printf( "%s\n", header ); +} + +/* This function prints a standardized profile output based on the bucket size. + A row consisting of an address and 'n' data elements is displayed for each + address with at least one non-zero bucket. + Assumes global profbuf[] array pointers. +*/ +void +prof_out( caddr_t start, int n, int bucket, int num_buckets, + unsigned int scale ) +{ + int i, j; + unsigned short buf_16; + unsigned int buf_32; + unsigned long long buf_64; + unsigned short **buf16 = ( unsigned short ** ) profbuf; + unsigned int **buf32 = ( unsigned int ** ) profbuf; + unsigned long long **buf64 = ( unsigned long long ** ) profbuf; + + if ( !TESTS_QUIET ) { + /* printf("%#lx\n",(unsigned long) start + (unsigned long) (2 * i)); */ + /* printf("start: %p; i: %#x; scale: %#x; i*scale: %#x; i*scale >>15: %#x\n", start, i, scale, i*scale, (i*scale)>>15); */ + switch ( bucket ) { + case PAPI_PROFIL_BUCKET_16: + for ( i = 0; i < num_buckets; i++ ) { + for ( j = 0, buf_16 = 0; j < n; j++ ) + buf_16 |= ( buf16[j] )[i]; + if ( buf_16 ) { +/* On 32bit builds with gcc 4.3 gcc complained about casting caddr_t => long long + * Thus the unsigned long to long long cast */ + printf( "%#-16llx", + (long long) (unsigned long)start + + ( ( ( long long ) i * scale ) >> 15 ) ); + for ( j = 0, buf_16 = 0; j < n; j++ ) + printf( "\t%d", ( buf16[j] )[i] ); + printf( "\n" ); + } + } + break; + case PAPI_PROFIL_BUCKET_32: + for ( i = 0; i < num_buckets; i++ ) { + for ( j = 0, buf_32 = 0; j < n; j++ ) + buf_32 |= ( buf32[j] )[i]; + if ( buf_32 ) { + printf( "%#-16llx", + (long long) (unsigned long)start + + ( ( ( long long ) i * scale ) >> 15 ) ); + for ( j = 0, buf_32 = 0; j < n; j++ ) + printf( "\t%d", ( buf32[j] )[i] ); + printf( "\n" ); + } + } + break; + case PAPI_PROFIL_BUCKET_64: + for ( i = 0; i < num_buckets; i++ ) { + for ( j = 0, buf_64 = 0; j < n; j++ ) + buf_64 |= ( buf64[j] )[i]; + if ( buf_64 ) { + printf( "%#-16llx", + (long long) (unsigned long)start + + ( ( ( long long ) i * scale ) >> 15 ) ); + for ( j = 0, buf_64 = 0; j < n; j++ ) + printf( "\t%lld", ( buf64[j] )[i] ); + printf( "\n" ); + } + } + break; + } + printf + ( "------------------------------------------------------------\n\n" ); + } +} + +/* This function checks to make sure that some buffer value somewhere is nonzero. + If all buffers are empty, zero is returned. This usually indicates a profiling + failure. Assumes global profbuf[]. +*/ +int +prof_check( int n, int bucket, int num_buckets ) +{ + int i, j; + int retval = 0; + unsigned short **buf16 = ( unsigned short ** ) profbuf; + unsigned int **buf32 = ( unsigned int ** ) profbuf; + unsigned long long **buf64 = ( unsigned long long ** ) profbuf; + + switch ( bucket ) { + case PAPI_PROFIL_BUCKET_16: + for ( i = 0; i < num_buckets; i++ ) + for ( j = 0; j < n; j++ ) + retval = retval || buf16[j][i]; + break; + case PAPI_PROFIL_BUCKET_32: + for ( i = 0; i < num_buckets; i++ ) + for ( j = 0; j < n; j++ ) + retval = retval || buf32[j][i]; + break; + case PAPI_PROFIL_BUCKET_64: + for ( i = 0; i < num_buckets; i++ ) + for ( j = 0; j < n; j++ ) + retval = retval || buf64[j][i]; + break; + } + return ( retval ); +} + +/* Computes the length (in bytes) of the buffer required for profiling. + 'plength' is the profile length, or address range to be profiled. + By convention, it is assumed that there are half as many buckets as addresses. + The scale factor is a fixed point fraction in which 0xffff = ~1 + 0x8000 = 1/2 + 0x4000 = 1/4, etc. + Thus, the number of profile buckets is (plength/2) * (scale/65536), + and the length (in bytes) of the profile buffer is buckets * bucket size. + */ +unsigned long +prof_size( unsigned long plength, unsigned scale, int bucket, int *num_buckets ) +{ + unsigned long blength; + long long llength = ( ( long long ) plength * scale ); + int bucket_size = prof_buckets( bucket ); + *num_buckets = ( int ) ( llength / 65536 / 2 ); + blength = ( unsigned long ) ( *num_buckets * bucket_size ); + return ( blength ); +} diff --git a/src/ctests/prof_utils.h b/src/ctests/prof_utils.h new file mode 100644 index 0000000..04b3c29 --- /dev/null +++ b/src/ctests/prof_utils.h @@ -0,0 +1,56 @@ +/* +* File: prof_utils.h +* Author: Dan Terpstra +* terpstra@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +/* This file contains utility definitions useful for all profiling tests + It should be #included in: + - profile.c, + - sprofile.c, + - profile_pthreads.c, + - profile_twoevents.c, + - earprofile.c, + - future profiling tests. +*/ + +/* value for scale parameter that sets scale to 1 */ +#define FULL_SCALE 65536 + +/* Internal prototype */ +int prof_events(int num_tests); +void prof_print_address(const char *title, const PAPI_exe_info_t *prginfo); +void prof_print_prof_info(caddr_t start, caddr_t end, int threshold, char *event_name); +void prof_alloc(int num, unsigned long plength); +void prof_head(unsigned long blength, int bucket_size, int num_buckets, const char *header); +void prof_out(caddr_t start, int n, int bucket, int num_buckets, unsigned int scale); +unsigned long prof_size(unsigned long plength, unsigned scale, int bucket, int *num_buckets); +int prof_check(int n, int bucket, int num_buckets); +int prof_buckets(int bucket); +void do_no_profile(int quiet); + +/* variables global to profiling tests */ +extern long long **values; +extern char event_name[PAPI_MAX_STR_LEN]; +extern int PAPI_event; +extern int EventSet; +extern void *profbuf[5]; + +/* Itanium returns function descriptors instead of function addresses. + I couldn't find the following structure in a header file, + so I duplicated it below. +*/ +#if (defined(ITANIUM1) || defined(ITANIUM2)) + struct fdesc { + void *ip; /* entry point (code address) */ + void *gp; /* global-pointer */ + }; +#elif defined(__powerpc64__) + struct fdesc { + void * ip; // function entry point + void * toc; + void * env; + }; +#endif diff --git a/src/ctests/profile.c b/src/ctests/profile.c new file mode 100644 index 0000000..c5df359 --- /dev/null +++ b/src/ctests/profile.c @@ -0,0 +1,225 @@ +/* +* File: profile.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Dan Terpstra +* terpstra@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +/* This file performs the following test: + profiling and program info option call + + - This tests the SVR4 profiling interface of PAPI. These are counted + in the default counting domain and default granularity, depending on + the platform. Usually this is the user domain (PAPI_DOM_USER) and + thread context (PAPI_GRN_THR). + + The Eventset contains: + + PAPI_FP_INS (to profile) + + PAPI_TOT_CYC + + - Set up profile + - Start eventset 1 + - Do both (flops and reads) + - Stop eventset 1 +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "prof_utils.h" + +#include "do_loops.h" + +#define PROFILE_ALL + +static int +do_profile( caddr_t start, unsigned long plength, unsigned scale, int thresh, + int bucket ) +{ + int i, retval; + unsigned long blength; + int num_buckets; + + const char *profstr[5] = { "PAPI_PROFIL_POSIX", + "PAPI_PROFIL_RANDOM", + "PAPI_PROFIL_WEIGHTED", + "PAPI_PROFIL_COMPRESS", + "PAPI_PROFIL_" + }; + + int profflags[5] = { PAPI_PROFIL_POSIX, + PAPI_PROFIL_POSIX | PAPI_PROFIL_RANDOM, + PAPI_PROFIL_POSIX | PAPI_PROFIL_WEIGHTED, + PAPI_PROFIL_POSIX | PAPI_PROFIL_COMPRESS, + PAPI_PROFIL_POSIX | PAPI_PROFIL_WEIGHTED | + PAPI_PROFIL_RANDOM | PAPI_PROFIL_COMPRESS + }; + + do_no_profile( TESTS_QUIET ); + blength = prof_size( plength, scale, bucket, &num_buckets ); + prof_alloc( 5, blength ); + + for ( i = 0; i < 5; i++ ) { + if ( !TESTS_QUIET ) { + printf( "Test type : \t%s\n", profstr[i] ); + } + +#ifndef SWPROFILE + if ( ( retval = + PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, + EventSet, PAPI_event, thresh, + profflags[i] | bucket ) ) != PAPI_OK ) { + if (retval==PAPI_ENOSUPP) { + char warning[BUFSIZ]; + + sprintf(warning,"PAPI_profil %s not supported", + profstr[i]); + test_warn( __FILE__, __LINE__, warning, 1 ); + } + else { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + } +#else + if ( ( retval = + PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, + EventSet, PAPI_event, thresh, + profflags[i] | bucket | PAPI_PROFIL_FORCE_SW ) ) != + PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } +#endif + + if ( retval != PAPI_OK ) + break; + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( getenv( "NUM_FLOPS" ) ? atoi( getenv( "NUM_FLOPS" ) ) : + NUM_FLOPS ); + + if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !TESTS_QUIET ) { + printf( TAB1, event_name, ( values[1] )[0] ); + printf( TAB1, "PAPI_TOT_CYC", ( values[1] )[1] ); + } + retval = PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, + EventSet, PAPI_event, 0, + profflags[i] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + } + + if ( retval == PAPI_OK ) { + if (!TESTS_QUIET) prof_head( blength, bucket, num_buckets, + "address\t\t\tflat\trandom\tweight\tcomprs\tall\n" ); + if (!TESTS_QUIET) prof_out( start, 5, bucket, num_buckets, scale ); + retval = prof_check( 5, bucket, num_buckets ); + } + + for ( i = 0; i < 5; i++ ) { + free( profbuf[i] ); + } + + return retval; +} + + +int +main( int argc, char **argv ) +{ + int num_tests = 6; + long length; + int mask; + int retval; + int mythreshold = THRESHOLD; + const PAPI_exe_info_t *prginfo; + caddr_t start, end; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + } + + retval = PAPI_query_event(PAPI_TOT_CYC); + if (retval!=PAPI_OK) { + if (!quiet) printf("No events found\n"); + test_skip(__FILE__, __LINE__,"No events found",1); + } + + mask = prof_events( num_tests ); + +#ifdef PROFILE_ALL +/* use these lines to profile entire code address space */ + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; +#else +/* use these lines to profile only do_flops address space */ + start = ( caddr_t ) do_flops; + end = ( caddr_t ) fdo_flops; +/* Itanium and ppc64 processors return function descriptors instead of function addresses. + You must dereference the descriptor to get the address. +*/ +#if defined(ITANIUM1) || defined(ITANIUM2) || defined(__powerpc64__) + start = ( caddr_t ) ( ( ( struct fdesc * ) start )->ip ); + end = ( caddr_t ) ( ( ( struct fdesc * ) end )->ip ); +#endif +#endif + +#if defined(linux) + { + char *tmp = getenv( "THRESHOLD" ); + if ( tmp ) + mythreshold = atoi( tmp ); + } +#endif + + length = end - start; + if ( length < 0 ) { + test_fail( __FILE__, __LINE__, "Profile length < 0!", ( int ) length ); + } + + if (!quiet) { + prof_print_address( "Test case profile: " + "POSIX compatible profiling with hardware counters.\n", + prginfo ); + prof_print_prof_info( start, end, mythreshold, event_name ); + } + + retval = do_profile( start, ( unsigned long ) length, FULL_SCALE, + mythreshold, PAPI_PROFIL_BUCKET_16 ); + if ( retval == PAPI_OK ) { + retval = do_profile( start, ( unsigned long ) length, + FULL_SCALE, mythreshold, + PAPI_PROFIL_BUCKET_32 ); + } + if ( retval == PAPI_OK ) { + retval = do_profile( start, ( unsigned long ) length, + FULL_SCALE, mythreshold, + PAPI_PROFIL_BUCKET_64 ); + } + + remove_test_events( &EventSet, mask ); + + test_pass( __FILE__ ); + + return 0; +} + diff --git a/src/ctests/profile_pthreads.c b/src/ctests/profile_pthreads.c new file mode 100644 index 0000000..ab31918 --- /dev/null +++ b/src/ctests/profile_pthreads.c @@ -0,0 +1,213 @@ +/* This file performs the following test: profile for pthreads */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define THR 1000000 +#define FLOPS 100000000 + +unsigned int length; +caddr_t my_start, my_end; + +void * +Thread( void *arg ) +{ + int retval, num_tests = 1, i; + int EventSet1 = PAPI_NULL, mask1, PAPI_event; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + unsigned short *profbuf; + char event_name[PAPI_MAX_STR_LEN]; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + profbuf = ( unsigned short * ) malloc( length * sizeof ( unsigned short ) ); + if ( profbuf == NULL ) { + test_fail(__FILE__, __LINE__, "Allocate memory",0); + } + + memset( profbuf, 0x00, length * sizeof ( unsigned short ) ); + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depends on the availability of the event on the + platform */ + EventSet1 = add_two_nonderived_events( &num_events1, &PAPI_event, &mask1 ); + + values = allocate_test_space( num_tests, num_events1 ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_profil( profbuf, length, my_start, 65536, + EventSet1, PAPI_event, THR, PAPI_PROFIL_POSIX ); + if ( retval ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + + retval = PAPI_start( EventSet1 ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( *( int * ) arg ); + + retval = PAPI_stop( EventSet1, values[0] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + /* to remove the profile flag */ + retval = PAPI_profil( profbuf, length, my_start, 65536, + EventSet1, PAPI_event, 0, PAPI_PROFIL_POSIX ); + if ( retval ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + + remove_test_events( &EventSet1, mask1 ); + + if ( !TESTS_QUIET ) { + if ( mask1 == 0x3 ) { + printf( "Thread %#x PAPI_TOT_INS : \t%lld\n", + ( int ) pthread_self( ), ( values[0] )[0] ); + } else { + printf( "Thread %#x PAPI_FP_INS : \t%lld\n", + ( int ) pthread_self( ), ( values[0] )[0] ); + } + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", ( int ) pthread_self( ), + ( values[0] )[1] ); + printf( "Thread %#x Real usec : \t%lld\n", ( int ) pthread_self( ), + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", ( int ) pthread_self( ), + elapsed_cyc ); + + printf( "Test case: PAPI_profil() for pthreads\n" ); + printf( "----Profile buffer for Thread %#x---\n", + ( int ) pthread_self( ) ); + for ( i = 0; i < ( int ) length; i++ ) { + if ( profbuf[i] ) + printf( "%#lx\t%d\n", ( unsigned long ) ( my_start + 2 * i ), + profbuf[i] ); + } + } + for ( i = 0; i < ( int ) length; i++ ) + if ( profbuf[i] ) + break; + + if ( i >= ( int ) length ) { + test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); + } + free_test_space( values, num_tests ); + + retval = PAPI_unregister_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + } + + return NULL; +} + +int +main( int argc, char **argv ) +{ + pthread_t id[NUM_THREADS]; + int flops[NUM_THREADS]; + int i, rc, retval; + pthread_attr_t attr; + long long elapsed_us, elapsed_cyc; + const PAPI_exe_info_t *prginfo = NULL; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_query_event(PAPI_TOT_CYC); + if (retval != PAPI_OK) { + + if (!quiet) printf("Trouble adding event\n"); + test_skip(__FILE__,__LINE__,"No events",0); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self )); + if (retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + retval = 1; + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", retval ); + } + + my_start = prginfo->address_info.text_start; + my_end = prginfo->address_info.text_end; + length = ( unsigned int ) ( my_end - my_start ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + + for ( i = 0; i < NUM_THREADS; i++ ) { + flops[i] = FLOPS * ( i + 1 ); + rc = pthread_create( &id[i], &attr, Thread, ( void * ) &flops[i] ); + if ( rc ) + return ( FAILURE ); + } + for ( i = 0; i < NUM_THREADS; i++ ) + pthread_join( id[i], NULL ); + + pthread_attr_destroy( &attr ); + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + if ( !quiet ) { + printf( "Master real usec : \t%lld\n", elapsed_us ); + printf( "Master real cycles : \t%lld\n", elapsed_cyc ); + } + + test_pass( __FILE__ ); + + pthread_exit( NULL ); + + return 0; + +} diff --git a/src/ctests/profile_twoevents.c b/src/ctests/profile_twoevents.c new file mode 100644 index 0000000..8de2ac2 --- /dev/null +++ b/src/ctests/profile_twoevents.c @@ -0,0 +1,133 @@ +/* +* File: profile_twoevents.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +/* This file performs the following test: profiling two events */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "prof_utils.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int i, num_tests = 6; + unsigned long length, blength; + int num_buckets, mask; + char title[80]; + int retval; + const PAPI_exe_info_t *prginfo; + caddr_t start, end; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + } + + mask = prof_events( num_tests ); + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; + + /* Must have at least FP instr or Tot ins */ + + if ( ( ( mask & MASK_FP_INS ) == 0 ) && ( ( mask & MASK_TOT_INS ) == 0 ) ) { + if (!quiet) printf("No events could be added\n"); + test_skip( __FILE__, __LINE__, "No FP or Total Ins. event", 1 ); + } + + if ( start > end ) + test_fail( __FILE__, __LINE__, "Profile length < 0!", 0 ); + length = ( unsigned long ) ( end - start ); + + if (!quiet) { + prof_print_address( "Test case profile: POSIX compatible profiling with two events.\n", + prginfo ); + prof_print_prof_info( start, end, THRESHOLD, event_name ); + } + + prof_alloc( 2, length ); + + blength = + prof_size( length, FULL_SCALE, PAPI_PROFIL_BUCKET_16, &num_buckets ); + do_no_profile( quiet ); + + if ( !quiet ) { + printf( "Test type : \tPAPI_PROFIL_POSIX\n" ); + } + if ( ( retval = + PAPI_profil( profbuf[0], ( unsigned int ) blength, start, FULL_SCALE, + EventSet, PAPI_event, THRESHOLD, + PAPI_PROFIL_POSIX ) ) != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + } + if ( ( retval = + PAPI_profil( profbuf[1], ( unsigned int ) blength, start, FULL_SCALE, + EventSet, PAPI_TOT_CYC, THRESHOLD, + PAPI_PROFIL_POSIX ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !quiet ) { + printf( TAB1, event_name, ( values[1] )[0] ); + printf( TAB1, "PAPI_TOT_CYC:", ( values[1] )[1] ); + } + if ( ( retval = + PAPI_profil( profbuf[0], ( unsigned int ) blength, start, FULL_SCALE, + EventSet, PAPI_event, 0, + PAPI_PROFIL_POSIX ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + + if ( ( retval = + PAPI_profil( profbuf[1], ( unsigned int ) blength, start, FULL_SCALE, + EventSet, PAPI_TOT_CYC, 0, + PAPI_PROFIL_POSIX ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); + + sprintf( title, " \t\t %s\tPAPI_TOT_CYC\naddress\t\t\tcounts\tcounts\n", + event_name ); + + if (!quiet) { + prof_head( blength, PAPI_PROFIL_BUCKET_16, num_buckets, title ); + prof_out( start, 2, PAPI_PROFIL_BUCKET_16, num_buckets, FULL_SCALE ); + } + + remove_test_events( &EventSet, mask ); + + retval = prof_check( 2, PAPI_PROFIL_BUCKET_16, num_buckets ); + + for ( i = 0; i < 2; i++ ) { + free( profbuf[i] ); + } + + if ( retval == 0 ) { + test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/pthrtough.c b/src/ctests/pthrtough.c new file mode 100644 index 0000000..8a80b80 --- /dev/null +++ b/src/ctests/pthrtough.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NITER 1000 + +void * +Thread( void *data ) +{ + int i, ret, evtset; + + ( void ) data; + + for ( i = 0; i < NITER; i++ ) { + if ( ( ret = PAPI_register_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); + + evtset = PAPI_NULL; + if ( ( ret = PAPI_create_eventset( &evtset ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", ret ); + + if ( ( ret = PAPI_destroy_eventset( &evtset ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); + + if ( ( ret = PAPI_unregister_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); + } + return ( NULL ); +} + +int +main( int argc, char *argv[] ) +{ + int j; + pthread_t *th = NULL; + pthread_attr_t attr; + int ret; + long nthr; + const PAPI_hw_info_t *hwinfo; + + tests_quiet( argc, argv ); /*Set TESTS_QUIET variable */ + + ret = PAPI_library_init( PAPI_VER_CURRENT ); + if ( ret != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", ret ); + + if ( ( ret = + PAPI_thread_init( ( unsigned + long ( * )( void ) ) ( pthread_self ) ) ) != + PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + ret=pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( ret != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", ret ); + +#endif + + if ( ( hwinfo = PAPI_get_hardware_info( ) ) == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 0 ); + + nthr = hwinfo->ncpu; + + if ( !TESTS_QUIET ) { + printf( "Creating %ld threads for %d iterations each of:\n", nthr, + NITER ); + printf( "\tregister\n" ); + printf( "\tcreate_eventset\n" ); + printf( "\tdestroy_eventset\n" ); + printf( "\tunregister\n" ); + } + + th = ( pthread_t * ) malloc( ( size_t ) nthr * sizeof ( pthread_t ) ); + if ( th == NULL ) + test_fail( __FILE__, __LINE__, "malloc", PAPI_ESYS ); + + for ( j = 0; j < nthr; j++ ) { + ret = pthread_create( &th[j], &attr, &Thread, NULL ); + if ( ret ) + test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + } + + for ( j = 0; j < nthr; j++ ) { + pthread_join( th[j], NULL ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/pthrtough2.c b/src/ctests/pthrtough2.c new file mode 100644 index 0000000..28c173c --- /dev/null +++ b/src/ctests/pthrtough2.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define NITER 2000 + +void * +Thread( void *data ) +{ + int ret, evtset; + + ( void ) data; + + if ( ( ret = PAPI_register_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); + + evtset = PAPI_NULL; + if ( ( ret = PAPI_create_eventset( &evtset ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", ret ); + + if ( ( ret = PAPI_destroy_eventset( &evtset ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", ret ); + + if ( ( ret = PAPI_unregister_thread( ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", ret ); + + return ( NULL ); +} + +int +main( int argc, char *argv[] ) +{ + int j; + pthread_t *th = NULL; + pthread_attr_t attr; + int ret; + long nthr; + + tests_quiet( argc, argv ); /*Set TESTS_QUIET variable */ + + ret = PAPI_library_init( PAPI_VER_CURRENT ); + if ( ret != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", ret ); + + if ( ( ret = + PAPI_thread_init( ( unsigned + long ( * )( void ) ) ( pthread_self ) ) ) != + PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_thread_init", ret ); + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + ret = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( ret != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", ret ); + +#endif + + nthr = NITER; + + if ( !TESTS_QUIET ) { + printf( "Creating %d threads for %d iterations each of:\n", + ( int ) nthr, 1 ); + printf( "\tregister\n" ); + printf( "\tcreate_eventset\n" ); + printf( "\tdestroy_eventset\n" ); + printf( "\tunregister\n" ); + } + th = ( pthread_t * ) malloc( ( size_t ) nthr * sizeof ( pthread_t ) ); + if ( th == NULL ) + test_fail( __FILE__, __LINE__, "malloc", PAPI_ESYS ); + + for ( j = 0; j < nthr; j++ ) { + ret = pthread_create( &th[j], &attr, &Thread, NULL ); + if ( ret ) { + printf( "Failed to create thread: %d\n", j ); + if ( j < 10 ) + test_fail( __FILE__, __LINE__, "pthread_create", PAPI_ESYS ); + printf( "Continuing test with %d threads.\n", j - 1 ); + nthr = j - 1; + th = ( pthread_t * ) realloc( th, + ( size_t ) nthr * + sizeof ( pthread_t ) ); + break; + } + } + + for ( j = 0; j < nthr; j++ ) { + pthread_join( th[j], NULL ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/realtime.c b/src/ctests/realtime.c new file mode 100644 index 0000000..6de859d --- /dev/null +++ b/src/ctests/realtime.c @@ -0,0 +1,87 @@ +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + long long elapsed_us, elapsed_cyc; + const PAPI_hw_info_t *hw_info; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + if (!quiet) { + printf( "Testing real time clock. (CPU Max %d MHz, CPU Min %d MHz)\n", + hw_info->cpu_max_mhz, hw_info->cpu_min_mhz ); + printf( "Sleeping for 10 seconds.\n" ); + } + + sleep( 10 ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + if (!quiet) { + printf( "%lld us. %lld cyc.\n", elapsed_us, elapsed_cyc ); + printf( "%f Computed MHz.\n", + ( float ) elapsed_cyc / ( float ) elapsed_us ); + } + +/* Elapsed microseconds and elapsed cycles are not as unambiguous as they appear. + On Pentium III and 4, for example, cycles is a measured value, while useconds + is computed from cycles and mhz. MHz is read from /proc/cpuinfo (on linux). + Thus, any error in MHz is propagated to useconds. + Conversely, on ultrasparc useconds are extracted from a system call (gethrtime()) + and cycles are computed from useconds. Also, MHz comes from a scan of system info, + Thus any error in gethrtime() propagates to both cycles and useconds, and cycles + can be further impacted by errors in reported MHz. + Without knowing the error bars on these system values, we can't really specify + error ranges for our reported values, but we *DO* know that errors for at least + one instance of Pentium 4 (torc17@utk) are on the order of one part per thousand. + Newer multicore Intel processors seem to have broken the relationship between the + clock rate reported in /proc/cpuinfo and the actual computed clock. To accomodate + this artifact, the test no longer fails, but merely reports results out of range. +*/ + + + + if ( elapsed_us < 9000000 ) { + if (!quiet) printf( "NOTE: Elapsed real time less than 9 seconds (%lld us)!\n",elapsed_us ); + test_fail(__FILE__,__LINE__,"Real time too short",1); + } + + if ( elapsed_us > 11000000 ) { + if (!quiet) printf( "NOTE: Elapsed real time greater than 11 seconds! (%lld us)\n", elapsed_us ); + test_fail(__FILE__,__LINE__,"Real time too long",1); + } + + if ( ( float ) elapsed_cyc < 9.0 * hw_info->cpu_max_mhz * 1000000.0 ) + if (!quiet) printf( "NOTE: Elapsed real cycles less than 9*MHz*1000000.0!\n" ); + if ( ( float ) elapsed_cyc > 11.0 * hw_info->cpu_max_mhz * 1000000.0 ) + if (!quiet) printf( "NOTE: Elapsed real cycles greater than 11*MHz*1000000.0!\n" ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/remove_events.c b/src/ctests/remove_events.c new file mode 100644 index 0000000..6cc453c --- /dev/null +++ b/src/ctests/remove_events.c @@ -0,0 +1,132 @@ +/* This test checks if removing events works properly at the low level + + by Vince Weaver (vweaver1@eecs.utk.edu) + + */ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval; + int EventSet = PAPI_NULL; + long long values1[2],values2[2]; + const char *event_names[] = {"PAPI_TOT_CYC","PAPI_TOT_INS"}; + char add_event_str[PAPI_MAX_STR_LEN]; + double instructions_error; + long long old_instructions; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Create an empty event set */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* add the events named above */ + retval = PAPI_add_named_event( EventSet, event_names[0] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); + if (!quiet) printf("Trouble %s\n",add_event_str); + test_skip( __FILE__, __LINE__, add_event_str, retval ); + } + + retval = PAPI_add_named_event( EventSet, event_names[1] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our test code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + + old_instructions=values1[1]; + + if ( !quiet ) { + + printf( "========================\n" ); + + /* cycles is first, other event second */ + sprintf( add_event_str, "%-12s : \t", event_names[0] ); + printf( TAB1, add_event_str, values1[0] ); + sprintf( add_event_str, "%-12s : \t", event_names[1] ); + printf( TAB1, add_event_str, values1[1] ); + } + + + /* remove PAPI_TOT_CYC */ + retval = PAPI_remove_named_event( EventSet, event_names[0] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our test code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + + /* test if after removing the event, the second event */ + /* still points to the proper native event */ + + /* this only works if IPC != 1 */ + + if ( !quiet ) { + + printf( "==========================\n" ); + printf( "After removing PAP_TOT_CYC\n"); + sprintf( add_event_str, "%-12s : \t", event_names[1] ); + printf( TAB1, add_event_str, values2[0] ); + + instructions_error=((double)old_instructions - (double)values2[0])/ + (double)old_instructions; + if (instructions_error>10.0) { + printf("Error of %.2f%%\n",instructions_error); + test_fail( __FILE__, __LINE__, "validation", 0 ); + } + + } + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/reset.c b/src/ctests/reset.c new file mode 100644 index 0000000..bea74f0 --- /dev/null +++ b/src/ctests/reset.c @@ -0,0 +1,301 @@ +/* This file performs the following test: start, read, stop and again functionality + + - It attempts to use the following three counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS or PAPI_TOT_INS if PAPI_FP_INS doesn't exist + + PAPI_TOT_CYC + + 1 + - Start counters + - Do flops + - Stop counters + + 2 + - Start counters + - Do flops + - Stop counters (should duplicate above) + + 3 + - Reset counters (should be redundant if stop works properly) + - Start counters + - Do flops + - Stop counters + + 4 + - Start counters + - Do flops/2 + - Read counters (flops/2;counters keep counting) + + 5 + - Do flops/2 + - Read counters (2flops/2; counters keep counting) + + 6 + - Do flops/2 + - Read counters (3*flops/2; counters keep counting) + - Accum counters (2*(3*flops.2); counters clear and counting) + + 7 + - Do flops/2 + - Read counters (flops/2; counters keep counting) + + 8 + - Reset (counters set to zero; still counting) + - Stop counters (flops/2; counters stopped) + + 9 + - Reset (counters set to zero; still counting) + - Do flops/2 + - Stop counters (flops/2; counters stopped) + + 9 + - Reset (counters set to zero and stopped) + - Read counters (should be zero) +*/ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 9, num_events, tmp, i; + long long **values; + int EventSet = PAPI_NULL; + int PAPI_event, mask; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet = add_two_events( &num_events, &PAPI_event, &mask ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + values = allocate_test_space( num_tests, num_events ); + + /*===== Test 1: Start/Stop =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 2 Start/Stop =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 3: Reset/Start/Stop =======================*/ + + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[2] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 4: Start/Read =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[3] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 5: Read =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[4] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 6: Read/Accum =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[5] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + retval = PAPI_accum( EventSet, values[5] ); + + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); + } + + /*===== Test 7: Read =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[6] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 8 Reset/Stop =======================*/ + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_stop( EventSet, values[7] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 9: Reset/Read =======================*/ + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + retval = PAPI_read( EventSet, values[8] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + remove_test_events( &EventSet, mask ); + + if (!quiet) { + printf( "Test case: Start/Stop/Read/Accum/Reset.\n" ); + printf( "----------------------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + sprintf( add_event_str, "%s:", event_name ); + printf( " PAPI_TOT_CYC %s\n", event_name ); + printf( "1. start,ops,stop %10lld %10lld\n", values[0][0], + values[0][1] ); + printf( "2. start,ops,stop %10lld %10lld\n", values[1][0], + values[1][1] ); + printf( "3. reset,start,ops,stop %10lld %10lld\n", values[2][0], + values[2][1] ); + printf( "4. start,ops/2,read %10lld %10lld\n", values[3][0], + values[3][1] ); + printf( "5. ops/2,read %10lld %10lld\n", values[4][0], + values[4][1] ); + printf( "6. ops/2,accum %10lld %10lld\n", values[5][0], + values[5][1] ); + printf( "7. ops/2,read %10lld %10lld\n", values[6][0], + values[6][1] ); + printf( "8. reset,ops/2,stop %10lld %10lld\n", values[7][0], + values[7][1] ); + printf( "9. reset,read %10lld %10lld\n", values[8][0], + values[8][1] ); + printf( "-------------------------------------------------------------------------\n" ); + printf( "Verification:\n" ); + printf( "Row 1 approximately equals rows 2 and 3 \n" ); + printf( "Row 4 approximately equals 1/2 of row 3\n" ); + printf( "Row 5 approximately equals twice row 4\n" ); + printf( "Row 6 approximately equals 6 times row 4\n" ); + printf( "Rows 7 and 8 approximately equal row 4\n" ); + printf( "Row 9 equals 0\n" ); + printf( "%% difference between %s 1 & 2: %.2f\n", "PAPI_TOT_CYC", + 100.0 * ( float ) values[0][0] / ( float ) values[1][0] ); + printf( "%% difference between %s 1 & 2: %.2f\n", add_event_str, + 100.0 * ( float ) values[0][1] / ( float ) values[1][1] ); + } + + for ( i = 0; i <= 1; i++ ) { + if ( !approx_equals + ( ( double ) values[0][i], ( double ) values[1][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[1][i], ( double ) values[2][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[2][i], ( double ) values[3][i] * 2.0 ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[2][i], ( double ) values[4][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[5][i], ( double ) values[3][i] * 6.0 ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[6][i], ( double ) values[3][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[7][i], ( double ) values[3][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( values[8][i] != 0LL ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/reset_multiplex.c b/src/ctests/reset_multiplex.c new file mode 100644 index 0000000..1896e19 --- /dev/null +++ b/src/ctests/reset_multiplex.c @@ -0,0 +1,268 @@ +/* This file performs the same tests as the reset test + but does it with the events multiplexed. + + This is mostly to test perf_event, where resetting + multiplexed events is handled differently than grouped events. + +*/ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 9, num_events, tmp, i; + long long **values; + int EventSet = PAPI_NULL; + int PAPI_event, mask; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_multiplex_init( ); + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + else if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_multiplex_init", retval ); + } + + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet = add_two_events( &num_events, &PAPI_event, &mask ); + + /* Set multiplexing on the eventset */ + + retval = PAPI_set_multiplex( EventSet ); + if ( retval != PAPI_OK) { + test_fail(__FILE__, __LINE__, "Setting multiplex", retval); + } + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + values = allocate_test_space( num_tests, num_events ); + + /*===== Test 1: Start/Stop =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 2 Start/Stop =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[1] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 3: Reset/Start/Stop =======================*/ + + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet, values[2] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 4: Start/Read =======================*/ + + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[3] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 5: Read =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[4] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 6: Read/Accum =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[5] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + retval = PAPI_accum( EventSet, values[5] ); + + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_accum", retval ); + } + + /*===== Test 7: Read =======================*/ + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_read( EventSet, values[6] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + /*===== Test 8 Reset/Stop =======================*/ + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + do_flops( NUM_FLOPS / 2 ); + + retval = PAPI_stop( EventSet, values[7] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /*===== Test 9: Reset/Read =======================*/ + retval = PAPI_reset( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); + } + + retval = PAPI_read( EventSet, values[8] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_read", retval ); + } + + remove_test_events( &EventSet, mask ); + + if (!quiet) { + printf( "Test case: Start/Stop/Read/Accum/Reset.\n" ); + printf( "----------------------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + sprintf( add_event_str, "%s:", event_name ); + printf( " PAPI_TOT_CYC %s\n", event_name ); + printf( "1. start,ops,stop %10lld %10lld\n", values[0][0], + values[0][1] ); + printf( "2. start,ops,stop %10lld %10lld\n", values[1][0], + values[1][1] ); + printf( "3. reset,start,ops,stop %10lld %10lld\n", values[2][0], + values[2][1] ); + printf( "4. start,ops/2,read %10lld %10lld\n", values[3][0], + values[3][1] ); + printf( "5. ops/2,read %10lld %10lld\n", values[4][0], + values[4][1] ); + printf( "6. ops/2,accum %10lld %10lld\n", values[5][0], + values[5][1] ); + printf( "7. ops/2,read %10lld %10lld\n", values[6][0], + values[6][1] ); + printf( "8. reset,ops/2,stop %10lld %10lld\n", values[7][0], + values[7][1] ); + printf( "9. reset,read %10lld %10lld\n", values[8][0], + values[8][1] ); + printf( "-------------------------------------------------------------------------\n" ); + printf( "Verification:\n" ); + printf( "Row 1 approximately equals rows 2 and 3 \n" ); + printf( "Row 4 approximately equals 1/2 of row 3\n" ); + printf( "Row 5 approximately equals twice row 4\n" ); + printf( "Row 6 approximately equals 6 times row 4\n" ); + printf( "Rows 7 and 8 approximately equal row 4\n" ); + printf( "Row 9 equals 0\n" ); + printf( "%% difference between %s 1 & 2: %.2f\n", "PAPI_TOT_CYC", + 100.0 * ( float ) values[0][0] / ( float ) values[1][0] ); + printf( "%% difference between %s 1 & 2: %.2f\n", add_event_str, + 100.0 * ( float ) values[0][1] / ( float ) values[1][1] ); + } + + for ( i = 0; i <= 1; i++ ) { + if ( !approx_equals + ( ( double ) values[0][i], ( double ) values[1][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[1][i], ( double ) values[2][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[2][i], ( double ) values[3][i] * 2.0 ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[2][i], ( double ) values[4][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[5][i], ( double ) values[3][i] * 6.0 ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[6][i], ( double ) values[3][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( !approx_equals + ( ( double ) values[7][i], ( double ) values[3][i] ) ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + if ( values[8][i] != 0LL ) + test_fail( __FILE__, __LINE__, + ( ( i == 0 ) ? "PAPI_TOT_CYC" : add_event_str ), 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/sdsc-mpx.c b/src/ctests/sdsc-mpx.c new file mode 100644 index 0000000..5a32597 --- /dev/null +++ b/src/ctests/sdsc-mpx.c @@ -0,0 +1,324 @@ +/* + * Test example for multiplex functionality, originally + * provided by Timothy Kaiser, SDSC. It was modified to fit the + * PAPI test suite by Nils Smeds, . + * + * This example verifies the accuracy of multiplexed events + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define REPEATS 5 +#define MAXEVENTS 14 +#define SLEEPTIME 100 +#define MINCOUNTS 100000 +#define MPX_TOLERANCE 0.20 +#define NUM_FLOPS 20000000 + +void +check_values( int eventset, int *events, int nevents, long long *values, + long long *refvalues ) +{ + double spread[MAXEVENTS]; + int i = nevents, j = 0; + + if ( !TESTS_QUIET ) { + printf( "\nRelative accuracy:\n" ); + for ( j = 0; j < nevents; j++ ) + printf( " Event %.2d", j + 1 ); + printf( "\n" ); + } + + for ( j = 0; j < nevents; j++ ) { + spread[j] = abs( (int) ( refvalues[j] - values[j] ) ); + if ( values[j] ) + spread[j] /= ( double ) values[j]; + if ( !TESTS_QUIET ) + printf( "%10.3g ", spread[j] ); + /* Make sure that NaN get counted as errors */ + if ( spread[j] < MPX_TOLERANCE ) { + i--; + } + else if ( refvalues[j] < MINCOUNTS ) { /* Neglect inprecise results with low counts */ + i--; + } + else { + char buff[BUFSIZ]; + if (!TESTS_QUIET) { + printf("reference = %lld, value = %lld, diff = %lld\n", + refvalues[j],values[j],refvalues[j] - values[j] ); + } + sprintf(buff,"Error on %d, spread %lf > threshold %lf AND count %lld > minimum size threshold %d\n",j,spread[j],MPX_TOLERANCE, + refvalues[j],MINCOUNTS); + + test_fail( __FILE__, __LINE__, buff, 1 ); + } + } + if (!TESTS_QUIET) printf( "\n\n" ); +#if 0 + if ( !TESTS_QUIET ) { + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + printf( "Event %.2d: ref=", j ); + printf( LLDFMT10, refvalues[j] ); + printf( ", diff/ref=%7.2g -- %s\n", spread[j], info.short_descr ); + printf( "\n" ); + } + printf( "\n" ); + } +#else + ( void ) eventset; + ( void ) events; +#endif + + +} + +void +ref_measurements( int iters, int *eventset, int *events, int nevents, + long long *refvalues ) +{ + PAPI_event_info_t info; + int i, retval; + double x = 1.1, y; + long long t1, t2; + + if (!TESTS_QUIET) printf( "PAPI reference measurements:\n" ); + + if ( ( retval = PAPI_create_eventset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + for ( i = 0; i < nevents; i++ ) { + if ( ( retval = PAPI_add_event( *eventset, events[i] ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + x = 1.0; + + t1 = PAPI_get_real_usec( ); + if ( ( retval = PAPI_start( *eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + y = do_flops3( x, iters, 1 ); + if ( ( retval = PAPI_stop( *eventset, &refvalues[i] ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + t2 = PAPI_get_real_usec( ); + + if (!TESTS_QUIET) { + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", ( ( float ) y / ( t2 - t1 ) ) ); + } + + PAPI_get_event_info( events[i], &info ); + if (!TESTS_QUIET) { + printf( "%20s = ", info.short_descr ); + printf( LLDFMT, refvalues[i] ); + printf( "\n" ); + } + + if ( ( retval = PAPI_cleanup_eventset( *eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + } + if ( ( retval = PAPI_destroy_eventset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + *eventset = PAPI_NULL; +} + + + +int +main( int argc, char **argv ) +{ + PAPI_event_info_t info; + int i, j, retval; + int iters = NUM_FLOPS; + double x = 1.1, y; + long long t1, t2; + long long values[MAXEVENTS], refvalues[MAXEVENTS]; + int sleep_time = SLEEPTIME; + int nevents = MAXEVENTS; + int eventset = PAPI_NULL; + int events[MAXEVENTS]; + int quiet; + + quiet = tests_quiet( argc, argv ); + + if ( argc > 1 ) { + if ( !strcmp( argv[1], "TESTS_QUIET" ) ) { + } + else { + sleep_time = atoi( argv[1] ); + if ( sleep_time <= 0 ) + sleep_time = SLEEPTIME; + } + } + + events[0] = PAPI_FP_INS; + events[1] = PAPI_TOT_INS; + events[2] = PAPI_INT_INS; + events[3] = PAPI_TOT_CYC; + events[4] = PAPI_STL_CCY; + events[5] = PAPI_BR_INS; + events[6] = PAPI_SR_INS; + events[7] = PAPI_LD_INS; + events[8] = PAPI_TOT_IIS; + events[9] = PAPI_FAD_INS; + events[10] = PAPI_BR_TKN; + events[11] = PAPI_BR_MSP; + events[12] = PAPI_L1_ICA; + events[13] = PAPI_L1_DCA; + + for ( i = 0; i < MAXEVENTS; i++ ) { + values[i] = 0; + } + + + if ( !quiet ) { + printf( "\nAccuracy check of multiplexing routines.\n" ); + printf( "Comparing a multiplex measurement with separate measurements.\n\n" ); + } + + /* Initialize PAPI */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Iterate through event list and remove those that aren't suitable */ + nevents = MAXEVENTS; + for ( i = 0; i < nevents; i++ ) { + if (( PAPI_get_event_info( events[i], &info ) == PAPI_OK ) && + (info.count && (strcmp( info.derived, "NOT_DERIVED")==0))) { + if (!quiet) printf( "Added %s\n", info.symbol ); + } + else { + for ( j = i; j < MAXEVENTS-1; j++ ) { + events[j] = events[j + 1]; + } + nevents--; + i--; + } + } + + /* Skip test if not enough events available */ + if ( nevents < 2 ) { + test_skip( __FILE__, __LINE__, "Not enough events to multiplex...", 0 ); + } + + if (!quiet) printf( "Using %d events\n\n", nevents ); + + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI multiplex init fail\n", retval ); + } + + /* Find a reasonable number of iterations (each + * event active 20 times) during the measurement + */ + /* Target: 10000 usec/multiplex, 20 repeats */ + t2 = 10000 * 20 * nevents; + if ( t2 > 30e6 ) { + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); + } + + /* Warmup? */ + y = do_flops3( x, iters, 1 ); + + /* Measure time of one run */ + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + t1 = PAPI_get_real_usec( ) - t1; + + if (t1==0) { + test_fail(__FILE__, __LINE__, + "do_flops3 takes no time to run!\n", retval); + } + + /* Scale up execution time to match t2 */ + if ( t2 > t1 ) { + iters = iters * ( int ) ( t2 / t1 ); + if (!quiet) { + printf( "Modified iteration count to %d\n\n", iters ); + } + } + + if (!quiet) fprintf(stdout,"y=%lf\n",y); + + /* Now loop through the items one at a time */ + + ref_measurements( iters, &eventset, events, nevents, refvalues ); + + /* Now check multiplexed */ + + if ( ( retval = PAPI_create_eventset( &eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( eventset, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + if ( ( retval = PAPI_set_multiplex( eventset ) ) ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } + + if ( ( retval = PAPI_add_events( eventset, events, nevents ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_add_events", retval ); + + if (!quiet) printf( "\nPAPI multiplexed measurements:\n" ); + x = 1.0; + t1 = PAPI_get_real_usec( ); + if ( ( retval = PAPI_start( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + y = do_flops3( x, iters, 1 ); + if ( ( retval = PAPI_stop( eventset, values ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + t2 = PAPI_get_real_usec( ); + + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + if ( !quiet ) { + printf( "%20s = ", info.short_descr ); + printf( LLDFMT, values[j] ); + printf( "\n" ); + } + } + + check_values( eventset, events, nevents, values, refvalues ); + + if ( ( retval = PAPI_remove_events( eventset, events, nevents ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_remove_events", retval ); + if ( ( retval = PAPI_cleanup_eventset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + if ( ( retval = PAPI_destroy_eventset( &eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + eventset = PAPI_NULL; + + /* Now loop through the items one at a time */ + + ref_measurements( iters, &eventset, events, nevents, refvalues ); + + check_values( eventset, events, nevents, values, refvalues ); + + test_pass( __FILE__ ); + + return 0; +} + + diff --git a/src/ctests/sdsc2.c b/src/ctests/sdsc2.c new file mode 100644 index 0000000..20bf2b5 --- /dev/null +++ b/src/ctests/sdsc2.c @@ -0,0 +1,272 @@ +/* + * Test example for multiplex functionality, originally + * provided by Timothy Kaiser, SDSC. It was modified to fit the + * PAPI test suite by Nils Smeds, . + * + * This example verifies the PAPI_reset function for + * multiplexed events + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define REPEATS 5 +#define MAXEVENTS 9 +#define SLEEPTIME 100 +#define MINCOUNTS 100000 +#define MPX_TOLERANCE 0.20 +#define NUM_FLOPS 20000000 + + +int +main( int argc, char **argv ) +{ + PAPI_event_info_t info; + int i, j, retval; + int iters = NUM_FLOPS; + double x = 1.1, y, dtmp; + long long t1, t2; + long long values[MAXEVENTS]; + int sleep_time = SLEEPTIME; +#ifdef STARTSTOP + long long dummies[MAXEVENTS]; +#endif + double valsample[MAXEVENTS][REPEATS]; + double valsum[MAXEVENTS]; + double avg[MAXEVENTS]; + double spread[MAXEVENTS]; + int nevents = MAXEVENTS; + int eventset = PAPI_NULL; + int events[MAXEVENTS]; + int fails; + int quiet; + + /* Set the quiet variable */ + quiet = tests_quiet( argc, argv ); + + /* Parse command line */ + if ( argc > 1 ) { + if ( !strcmp( argv[1], "TESTS_QUIET" ) ) { + } + else { + sleep_time = atoi( argv[1] ); + if ( sleep_time <= 0 ) + sleep_time = SLEEPTIME; + } + } + + events[0] = PAPI_FP_INS; + events[1] = PAPI_TOT_INS; + events[2] = PAPI_INT_INS; + events[3] = PAPI_TOT_CYC; + events[4] = PAPI_STL_CCY; + events[5] = PAPI_BR_INS; + events[6] = PAPI_SR_INS; + events[7] = PAPI_LD_INS; + events[8] = PAPI_TOT_IIS; + + for ( i = 0; i < MAXEVENTS; i++ ) { + values[i] = 0; + valsum[i] = 0; + } + + + if ( !quiet ) { + printf( "\nAccuracy check of multiplexing routines.\n" ); + printf( "Investigating the variance of multiplexed measurements.\n\n" ); + } + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + +#ifdef MPX + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI multiplex init fail\n", retval ); + } +#endif + + if ( ( retval = PAPI_create_eventset( &eventset ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + +#ifdef MPX + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( eventset, 0 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + } + + if ( ( retval = PAPI_set_multiplex( eventset ) ) ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } +#endif + + /* Iterate through event list and remove those that aren't available */ + nevents = MAXEVENTS; + for ( i = 0; i < nevents; i++ ) { + if ( ( retval = PAPI_add_event( eventset, events[i] ) ) ) { + for ( j = i; j < MAXEVENTS-1; j++ ) { + events[j] = events[j + 1]; + } + nevents--; + i--; + } + } + + /* Skip test if not enough events available */ + if ( nevents < 2 ) { + test_skip( __FILE__, __LINE__, "Not enough events left...", 0 ); + } + + /* Find a reasonable number of iterations (each + * event active 20 times) during the measurement + */ + + /* Target: 10000 usec/multiplex, 20 repeats */ + t2 = 10000 * 20 * nevents; + if ( t2 > 30e6 ) { + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); + } + + /* Measure time of one iteration */ + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + t1 = PAPI_get_real_usec( ) - t1; + + /* Scale up execution time to match t2 */ + if ( t2 > t1 ) { + iters = iters * ( int ) ( t2 / t1 ); + } + /* Make sure execution time is < 30s per repeated test */ + else if ( t1 > 30e6 ) { + test_skip( __FILE__, __LINE__, "This test takes too much time", + retval ); + } + + if ( ( retval = PAPI_start( eventset ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + for ( i = 1; i <= REPEATS; i++ ) { + x = 1.0; + +#ifndef STARTSTOP + if ( ( retval = PAPI_reset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); +#else + if ( ( retval = PAPI_stop( eventset, dummies ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + if ( ( retval = PAPI_start( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); +#endif + + if ( !quiet ) { + printf( "\nTest %d (of %d):\n", i, REPEATS ); + } + + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + PAPI_read( eventset, values ); + t2 = PAPI_get_real_usec( ); + + if ( !quiet ) { + printf( "\n(calculated independent of PAPI)\n" ); + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", + ( y / ( double ) ( t2 - t1 ) ) ); + printf( "PAPI measurements:\n" ); + + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + printf( "%20s = ", info.short_descr ); + printf( "%lld", values[j] ); + printf( "\n" ); + } + printf( "\n" ); + } + + /* Calculate values */ + for ( j = 0; j < nevents; j++ ) { + dtmp = ( double ) values[j]; + valsum[j] += dtmp; + valsample[j][i - 1] = dtmp; + } + } + + if ( ( retval = PAPI_stop( eventset, values ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + if ( !quiet ) { + printf( "\n\nEstimated variance relative " + "to average counts:\n" ); + for ( j = 0; j < nevents; j++ ) + printf( " Event %.2d", j ); + printf( "\n" ); + } + + fails = nevents; + /* Due to limited precision of floating point cannot really use + typical standard deviation compuation for large numbers with + very small variations. Instead compute the std devation + problems with precision. + */ + for ( j = 0; j < nevents; j++ ) { + avg[j] = valsum[j] / REPEATS; + spread[j] = 0; + for ( i = 0; i < REPEATS; ++i ) { + double diff = ( valsample[j][i] - avg[j] ); + spread[j] += diff * diff; + } + spread[j] = sqrt( spread[j] / REPEATS ) / avg[j]; + if ( !quiet ) + printf( "%9.2g ", spread[j] ); + /* Make sure that NaN get counted as errors */ + if ( spread[j] < MPX_TOLERANCE ) { + --fails; + } + /* Neglect inprecise results with low counts */ + else if ( valsum[j] < MINCOUNTS ) { + --fails; + } + } + + if ( !quiet ) { + printf( "\n\n" ); + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + printf( "Event %.2d: mean=%10.0f, " + "sdev/mean=%7.2g nrpt=%2d -- %s\n", + j, avg[j], spread[j], + REPEATS, info.short_descr ); + } + printf( "\n\n" ); + } + + if ( fails ) { + test_fail( __FILE__, __LINE__, + "Values outside threshold", fails ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/sdsc4-mpx.c b/src/ctests/sdsc4-mpx.c new file mode 100644 index 0000000..6126cbf --- /dev/null +++ b/src/ctests/sdsc4-mpx.c @@ -0,0 +1,432 @@ +/* + * Test example for multiplex functionality, originally + * provided by Timothy Kaiser, SDSC. It was modified to fit the + * PAPI test suite by Nils Smeds, . + * + * This example verifies the adding and removal of multiplexed + * events in an event set. + */ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define MAXEVENTS 9 +#define REPEATS (MAXEVENTS * 4) +#define SLEEPTIME 100 +#define MINCOUNTS 100000 +#define MPX_TOLERANCE 0.20 +#define NUM_FLOPS 20000000 + +int +main( int argc, char **argv ) +{ + PAPI_event_info_t info; + char name2[PAPI_MAX_STR_LEN]; + int i, j, retval, idx, repeats; + int iters = NUM_FLOPS; + double x = 1.1, y, dtmp; + long long t1, t2; + long long values[MAXEVENTS], refvals[MAXEVENTS]; + int nsamples[MAXEVENTS], truelist[MAXEVENTS], ntrue; +#ifdef STARTSTOP + long long dummies[MAXEVENTS]; +#endif + int sleep_time = SLEEPTIME; + double valsample[MAXEVENTS][REPEATS]; + double valsum[MAXEVENTS]; + double avg[MAXEVENTS]; + double spread[MAXEVENTS]; + int nevents = MAXEVENTS, nev1; + int eventset = PAPI_NULL; + int events[MAXEVENTS]; + int eventidx[MAXEVENTS]; + int eventmap[MAXEVENTS]; + int fails; + int quiet; + + quiet = tests_quiet( argc, argv ); + + if ( argc > 1 ) { + if ( !strcmp( argv[1], "quiet" ) ) { + } + else { + sleep_time = atoi( argv[1] ); + if ( sleep_time <= 0 ) + sleep_time = SLEEPTIME; + } + } + + events[0] = PAPI_FP_INS; + events[1] = PAPI_TOT_CYC; + events[2] = PAPI_TOT_INS; + events[3] = PAPI_TOT_IIS; + events[4] = PAPI_INT_INS; + events[5] = PAPI_STL_CCY; + events[6] = PAPI_BR_INS; + events[7] = PAPI_SR_INS; + events[8] = PAPI_LD_INS; + + for ( i = 0; i < MAXEVENTS; i++ ) { + values[i] = 0; + valsum[i] = 0; + nsamples[i] = 0; + } + + /* Print test summary */ + if ( !quiet ) { + printf( "\nFunctional check of multiplexing routines.\n" ); + printf( "Adding and removing events from an event set.\n\n" ); + } + + /* Init the library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Enable multiplexing */ +#ifdef MPX + retval = PAPI_multiplex_init( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI multiplex init fail\n", retval ); + } +#endif + + /* Create an eventset */ + if ( ( retval = PAPI_create_eventset( &eventset ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* Enable multiplexing on the eventset */ +#ifdef MPX + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. + 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( eventset, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + if ( ( retval = PAPI_set_multiplex( eventset ) ) ) { + if ( retval == PAPI_ENOSUPP) { + test_skip(__FILE__, __LINE__, "Multiplex not supported", 1); + } + test_fail( __FILE__, __LINE__, "PAPI_set_multiplex", retval ); + } +#endif + + /* See which events are available and remove the ones that aren't */ + nevents = MAXEVENTS; + for ( i = 0; i < nevents; i++ ) { + if ( ( retval = PAPI_add_event( eventset, events[i] ) ) ) { + for ( j = i; j < MAXEVENTS-1; j++ ) + events[j] = events[j + 1]; + nevents--; + i--; + } + } + + /* We want at least three events? */ + /* Seems arbitrary. Might be because intel machines used to */ + /* Only have two event slots */ + if ( nevents < 3 ) { + test_skip( __FILE__, __LINE__, "Not enough events left...", 0 ); + } + + /* Find a reasonable number of iterations (each + * event active 20 times) during the measurement + */ + + /* TODO: find Linux multiplex interval */ + /* not sure if 10ms is close or not */ + /* Target: 10000 usec/multiplex, 20 repeats */ + t2 = 10000 * 20 * nevents; + if ( t2 > 30e6 ) { + test_skip( __FILE__, __LINE__, + "This test takes too much time", retval ); + } + + /* Measure one run */ + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + t1 = PAPI_get_real_usec( ) - t1; + + /* Scale up execution time to match t2 */ + if ( t2 > t1 ) { + iters = iters * ( int ) ( t2 / t1 ); + } + /* Make sure execution time is < 30s per repeated test */ + else if ( t1 > 30e6 ) { + test_skip( __FILE__, __LINE__, + "This test takes too much time", retval ); + } + + /* Split the events up by odd and even? */ + j = nevents; + for ( i = 1; i < nevents; i = i + 2 ) + eventidx[--j] = i; + for ( i = 0; i < nevents; i = i + 2 ) + eventidx[--j] = i; + assert( j == 0 ); + + /* put event mapping in eventmap? */ + for ( i = 0; i < nevents; i++ ) + eventmap[i] = i; + + x = 1.0; + + /* Make a reference run */ + if ( !quiet ) { + printf( "\nReference run:\n" ); + } + + t1 = PAPI_get_real_usec( ); + if ( ( retval = PAPI_start( eventset ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + y = do_flops3( x, iters, 1 ); + PAPI_read( eventset, refvals ); + t2 = PAPI_get_real_usec( ); + + /* Print results */ + ntrue = nevents; + PAPI_list_events( eventset, truelist, &ntrue ); + if ( !quiet ) { + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", ( y / ( double ) ( t2 - t1 ) ) ); + printf( "%20s %16s %-15s %-15s\n", "PAPI measurement:", + "Acquired count", "Expected event", "PAPI_list_events" ); + + for ( j = 0; j < nevents; j++ ) { + PAPI_get_event_info( events[j], &info ); + PAPI_event_code_to_name( truelist[j], name2 ); + printf( "%20s = %16lld %-15s %-15s %s\n", + info.short_descr, refvals[j], + info.symbol, name2, + strcmp( info.symbol,name2 ) ? + "*** MISMATCH ***" : "" ); + } + printf( "\n" ); + } + + /* Make repeated runs while removing/readding events */ + + nev1 = nevents; + repeats = nevents * 4; + + /* Repeat four times for each event? */ + + for ( i = 0; i < repeats; i++ ) { + + /* What's going on here? as example, nevents=4, repeats=16*/ + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 == i*/ + /* 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 == i%nevents */ + /* 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 == (i%nevents)+1 */ + /* 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 */ + /* so we skip nevery NEVENTS time through the loop? */ + if ( ( i % nevents ) + 1 == nevents ) continue; + + if ( !quiet ) { + printf( "\nTest %d (of %d):\n", + i + 1 - (i / nevents), repeats - 4 ); + } + + /* Stop the counter, it's been left running */ + if ( ( retval = PAPI_stop( eventset, values ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* We run through a 4-way pattern */ + /* 1st quarter, remove events */ + /* 2nd quarter, add back events */ + /* 3rd quarter, remove events again */ + /* 4th wuarter, re-add events */ + j = eventidx[i % nevents]; + if ( ( i / nevents ) % 2 == 0 ) { + + /* Remove event */ + PAPI_get_event_info( events[j], &info ); + if ( !quiet ) { + printf( "Removing event[%d]: %s\n", + j, info.short_descr ); + } + + retval = PAPI_remove_event( eventset, events[j] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_remove_event", retval ); + } + + /* Update the complex event mapping */ + nev1--; + for ( idx = 0; eventmap[idx] != j; idx++ ); + for ( j = idx; j < nev1; j++ ) + eventmap[j] = eventmap[j + 1]; + } else { + + /* Add an event back in */ + PAPI_get_event_info( events[j], &info ); + if ( !quiet ) { + printf( "Adding event[%d]: %s\n", + j, info.short_descr ); + } + retval = PAPI_add_event( eventset, events[j] ); + if (retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, + "PAPI_add_event", retval ); + } + + eventmap[nev1] = j; + nev1++; + } + + if ( ( retval = PAPI_start( eventset ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + x = 1.0; + + // This startstop is leftover from sdsc2? */ +#ifndef STARTSTOP + if ( ( retval = PAPI_reset( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_reset", retval ); +#else + if ( ( retval = PAPI_stop( eventset, dummies ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + if ( ( retval = PAPI_start( eventset ) ) ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); +#endif + + /* Run the actual workload */ + t1 = PAPI_get_real_usec( ); + y = do_flops3( x, iters, 1 ); + PAPI_read( eventset, values ); + t2 = PAPI_get_real_usec( ); + + /* Print approximate flops plus header */ + if ( !quiet ) { + printf( "\n(calculated independent of PAPI)\n" ); + printf( "\tOperations= %.1f Mflop", y * 1e-6 ); + printf( "\t(%g Mflop/s)\n\n", + ( y / ( double ) ( t2 - t1 ) ) ); + + printf( "%20s %16s %-15s %-15s\n", + "PAPI measurement:", + "Acquired count", + "Expected event", + "PAPI_list_events" ); + + + ntrue = nev1; + PAPI_list_events( eventset, truelist, &ntrue ); + for ( j = 0; j < nev1; j++ ) { + idx = eventmap[j]; + /* printf("Mapping: Counter %d -> slot %d.\n",j,idx); */ + PAPI_get_event_info( events[idx], &info ); + PAPI_event_code_to_name( truelist[j], name2 ); + printf( "%20s = %16lld %-15s %-15s %s\n", + info.short_descr, values[j], + info.symbol, name2, + strcmp( info.symbol, name2 ) ? + "*** MISMATCH ***" : "" ); + } + printf( "\n" ); + } + + /* Calculate results */ + for ( j = 0; j < nev1; j++ ) { + idx = eventmap[j]; + dtmp = ( double ) values[j]; + valsum[idx] += dtmp; + valsample[idx][nsamples[idx]] = dtmp; + nsamples[idx]++; + } + } + + /* Stop event for good */ + if ( ( retval = PAPI_stop( eventset, values ) ) ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + if ( !quiet ) { + printf( "\n\nEstimated variance relative " + "to average counts:\n" ); + for ( j = 0; j < nev1; j++ ) { + printf( " Event %.2d", j ); + } + printf( "\n" ); + } + + fails = nevents; + + /* Due to limited precision of floating point cannot really use + typical standard deviation compuation for large numbers with + very small variations. Instead compute the std devation + problems with precision. + */ + + /* Update so that if our event count is small (<1000 or so) */ + /* Then don't fail with high variation. Since we're multiplexing */ + /* it's hard to capture such small counts, and it makes the test */ + /* fail on machines such as Haswell and the PAPI_SR_INS event */ + + for ( j = 0; j < nev1; j++ ) { + + avg[j] = valsum[j] / nsamples[j]; + spread[j] = 0; + for ( i = 0; i < nsamples[j]; ++i ) { + double diff = ( valsample[j][i] - avg[j] ); + spread[j] += diff * diff; + } + spread[j] = sqrt( spread[j] / nsamples[j] ) / avg[j]; + if ( !quiet ) { + printf( "%9.2g ", spread[j] ); + } + } + + for ( j = 0; j < nev1; j++ ) { + + /* Make sure that NaN get counted as errors */ + if ( spread[j] < MPX_TOLERANCE ) { + if (!quiet) printf("Event %d tolerance good\n",j); + fails--; + } + /* Neglect inprecise results with low counts */ + else if ( avg[j] < MINCOUNTS ) { + if (!quiet) printf("Event %d too small to fail\n",j); + fails--; + } + else { + if (!quiet) printf("Event %d failed!\n",j); + } + } + + if ( !quiet ) { + printf( "\n\n" ); + for ( j = 0; j < nev1; j++ ) { + PAPI_get_event_info( events[j], &info ); + printf( "Event %.2d: mean=%10.0f, " + "sdev/mean=%7.2g nrpt=%2d -- %s\n", + j, avg[j], spread[j], + nsamples[j], info.short_descr ); + } + printf( "\n\n" ); + } + + if ( fails ) { + test_fail( __FILE__, __LINE__, "Values differ from reference", fails ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/second.c b/src/ctests/second.c new file mode 100644 index 0000000..f0bc59f --- /dev/null +++ b/src/ctests/second.c @@ -0,0 +1,585 @@ +/* This file performs the following test: counter domain testing + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. + + PAPI_TOT_INS + + PAPI_TOT_CYC + - Start system domain counters + - Do flops + - Stop and read system domain counters + - Start kernel domain counters + - Do flops + - Stop and read kernel domain counters + - Start user domain counters + - Do flops + - Stop and read user domain counters +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define TAB_DOM "%s%12lld%15lld%17lld\n" + +#define CASE2 0 +#define CREATE 1 +#define ADD 2 +#define MIDDLE 3 +#define CHANGE 4 +#define SUPERVISOR 5 + +void +dump_and_verify( int test_case, long long **values ) +{ + long long min, max, min2, max2; + + if (!TESTS_QUIET) { + printf( "-----------------------------------------------------------------\n" ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------\n" ); + } + if ( test_case == CASE2 ) { + if (!TESTS_QUIET) { + printf( "Test type : Before Create Before Add Between Adds\n" ); + printf( TAB_DOM, "PAPI_TOT_INS: ", ( values[0] )[0], ( values[1] )[0], + ( values[2] )[0] ); + printf( TAB_DOM, "PAPI_TOT_CYC: ", ( values[0] )[1], ( values[1] )[1], + ( values[2] )[1] ); + printf( "-------------------------------------------------------------\n" ); + printf( "Verification:\n" ); + printf( "Both rows equal 'n N N' where n << N\n" ); + return; + } + } else if ( test_case == CHANGE ) { + min = ( long long ) ( ( double ) values[0][0] * ( 1 - TOLERANCE ) ); + max = ( long long ) ( ( double ) values[0][0] * ( 1 + TOLERANCE ) ); + if ( values[1][0] > max || values[1][0] < min ) + test_fail( __FILE__, __LINE__, "PAPI_TOT_INS", 1 ); + + min = ( long long ) ( ( double ) values[1][1] * ( 1 - TOLERANCE ) ); + max = ( long long ) ( ( double ) values[1][1] * ( 1 + TOLERANCE ) ); + if ( ( values[2][1] + values[0][1] ) > max || + ( values[2][1] + values[0][1] ) < min ) + test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); + + if (!TESTS_QUIET) { + printf( "Test type : PAPI_DOM_ALL PAPI_DOM_KERNEL PAPI_DOM_USER\n" ); + printf( TAB_DOM, "PAPI_TOT_INS: ", ( values[1] )[0], ( values[2] )[0], + ( values[0] )[0] ); + printf( TAB_DOM, "PAPI_TOT_CYC: ", ( values[1] )[1], ( values[2] )[1], + ( values[0] )[1] ); + printf( "-------------------------------------------------------------\n" ); + + printf( "Verification:\n" ); + printf( "Both rows approximately equal '(N+n) n N', where n << N\n" ); + printf( "Column 1 approximately equals column 2 plus column 3\n" ); + } + } else if ( test_case == SUPERVISOR ) { + if (!TESTS_QUIET) { + printf( "Test type : PAPI_DOM_ALL All-minus-supervisor Supervisor-only\n" ); + printf( TAB_DOM, "PAPI_TOT_INS: ", ( values[0] )[0], ( values[1] )[0], + ( values[2] )[0] ); + printf( TAB_DOM, "PAPI_TOT_CYC: ", ( values[0] )[1], ( values[1] )[1], + ( values[2] )[1] ); + printf( "-------------------------------------------------------------\n" ); + printf( "Verification:\n" ); + printf( "Both rows approximately equal '(N+n) n N', where n << N\n" ); + printf( "Column 1 approximately equals column 2 plus column 3\n" ); + } + } else { + min = ( long long ) ( ( double ) values[2][0] * ( 1 - TOLERANCE ) ); + max = ( long long ) ( ( double ) values[2][0] * ( 1 + TOLERANCE ) ); + + min2 = ( long long ) ( ( double ) values[0][1] * ( 1 - TOLERANCE ) ); + max2 = + ( long long ) ( ( double ) ( double ) values[0][1] * + ( 1 + TOLERANCE ) ); + + if (!TESTS_QUIET) { + printf( "Test type : PAPI_DOM_ALL PAPI_DOM_KERNEL PAPI_DOM_USER\n" ); + printf( TAB_DOM, "PAPI_TOT_INS: ", ( values[0] )[0], ( values[1] )[0], + ( values[2] )[0] ); + printf( TAB_DOM, "PAPI_TOT_CYC: ", ( values[0] )[1], ( values[1] )[1], + ( values[2] )[1] ); + printf( "-------------------------------------------------------------\n" ); + printf( "Verification:\n" ); + printf( "Both rows approximately equal '(N+n) n N', where n << N\n" ); + printf( "Column 1 approximately equals column 2 plus column 3\n" ); + } + if ( values[0][0] > max || values[0][0] < min ) + test_fail( __FILE__, __LINE__, "PAPI_TOT_INS", 1 ); + + if ( ( values[1][1] + values[2][1] ) > max2 || + ( values[1][1] + values[2][1] ) < min2 ) + test_fail( __FILE__, __LINE__, "PAPI_TOT_CYC", 1 ); + } + + if ( values[0][0] == 0 || values[0][1] == 0 || + values[1][0] == 0 || values[1][1] == 0 ) + test_fail( __FILE__, __LINE__, + "Verify non-zero count for all domain types", 1 ); + + if ( values[2][0] == 0 || values[2][1] == 0 ) { + if ( test_case == SUPERVISOR ) { + if (!TESTS_QUIET) printf( "WARNING: No events counted in supervisor context. This is expected in a non-virtualized environment.\n" ); + } else { + test_fail( __FILE__, __LINE__, + "Verify non-zero count for all domain types", 1 ); + } + } +} + +/* Do the set_domain on the eventset before adding events */ + +void +case1( int num ) +{ + int retval, num_tests = 3; + long long **values; + int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL, EventSet3 = PAPI_NULL; + PAPI_option_t options; + const PAPI_component_info_t *cmpinfo; + + memset( &options, 0x0, sizeof ( options ) ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + /* get info from cpu component */ + cmpinfo = PAPI_get_component_info( 0 ); + if ( cmpinfo == NULL ) { + test_fail( __FILE__, __LINE__,"PAPI_get_component_info", PAPI_ECMP); + } + + if ( ( retval = PAPI_query_event( PAPI_TOT_INS ) ) != PAPI_OK ) + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + + if ( ( retval = PAPI_query_event( PAPI_TOT_CYC ) ) != PAPI_OK ) + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + + retval = PAPI_create_eventset( &EventSet1 ); + if ( retval == PAPI_OK ) + retval = PAPI_create_eventset( &EventSet2 ); + if ( retval == PAPI_OK ) + retval = PAPI_create_eventset( &EventSet3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* In Component PAPI, EventSets must be assigned a component index + before you can fiddle with their internals. 0 is always the cpu component */ + retval = PAPI_assign_eventset_component( EventSet1, 0 ); + if ( retval == PAPI_OK ) + retval = PAPI_assign_eventset_component( EventSet2, 0 ); + if ( retval == PAPI_OK ) + retval = PAPI_assign_eventset_component( EventSet3, 0 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_assign_eventset_component", + retval ); + + if ( num == CREATE ) { + if (!TESTS_QUIET) printf( "\nTest case CREATE: Call PAPI_set_opt(PAPI_DOMAIN) on EventSet before add\n" ); + options.domain.eventset = EventSet1; + options.domain.domain = PAPI_DOM_ALL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + options.domain.eventset = EventSet2; + options.domain.domain = PAPI_DOM_KERNEL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + options.domain.eventset = EventSet3; + options.domain.domain = PAPI_DOM_USER; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + + retval = PAPI_add_event( EventSet1, PAPI_TOT_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); + + retval = PAPI_add_event( EventSet1, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_CYC)", retval ); + + retval = PAPI_add_event( EventSet2, PAPI_TOT_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); + + retval = PAPI_add_event( EventSet2, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_CYC)", retval ); + + retval = PAPI_add_event( EventSet3, PAPI_TOT_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); + + if ( num == MIDDLE ) { + if (!TESTS_QUIET) printf( "\nTest case MIDDLE: Call PAPI_set_opt(PAPI_DOMAIN) on EventSet between adds\n" ); + options.domain.eventset = EventSet1; + options.domain.domain = PAPI_DOM_ALL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK && retval != PAPI_ECMP ) { + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + + options.domain.eventset = EventSet2; + options.domain.domain = PAPI_DOM_KERNEL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + options.domain.eventset = EventSet3; + options.domain.domain = PAPI_DOM_USER; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + + retval = PAPI_add_event( EventSet3, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_CYC)", retval ); + + if ( num == ADD ) { + if (!TESTS_QUIET) printf( "\nTest case ADD: Call PAPI_set_opt(PAPI_DOMAIN) on EventSet after add\n" ); + options.domain.eventset = EventSet1; + options.domain.domain = PAPI_DOM_ALL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK && retval != PAPI_ECMP ) { + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + options.domain.eventset = EventSet2; + options.domain.domain = PAPI_DOM_KERNEL; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + + options.domain.eventset = EventSet3; + options.domain.domain = PAPI_DOM_USER; + + retval = PAPI_set_opt( PAPI_DOMAIN, &options ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_opt", retval ); + } + + /* 2 events */ + + values = allocate_test_space( num_tests, 2 ); + + if ( num == CHANGE ) { + /* This testcase is dependent on the CREATE testcase running immediately before it, using + * domain settings of "All", "Kernel" and "User", on event sets 1, 2, and 3, respectively. + */ + PAPI_option_t option; + if (!TESTS_QUIET) printf( "\nTest case CHANGE 1: Change domain on EventSet between runs, using generic domain options:\n" ); + PAPI_start( EventSet1 ); + PAPI_stop( EventSet1, values[0] ); + + // change EventSet1 domain from All to User + option.domain.domain = PAPI_DOM_USER; + option.domain.eventset = EventSet1; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + + PAPI_start( EventSet2 ); + PAPI_stop( EventSet2, values[1] ); + + // change EventSet2 domain from Kernel to All + option.domain.domain = PAPI_DOM_ALL; + option.domain.eventset = EventSet2; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + + PAPI_start( EventSet3 ); + PAPI_stop( EventSet3, values[2] ); + + // change EventSet3 domain from User to Kernel + option.domain.domain = PAPI_DOM_KERNEL; + option.domain.eventset = EventSet3; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + + free_test_space( values, num_tests ); + values = allocate_test_space( num_tests, 2 ); + + } + + if ( num == SUPERVISOR && + ( cmpinfo->available_domains & PAPI_DOM_SUPERVISOR ) ) { + PAPI_option_t option; + + if (!TESTS_QUIET) printf( "\nTest case CHANGE 2: Change domain on EventSets to include/exclude supervisor events:\n" ); + + option.domain.domain = PAPI_DOM_ALL; + option.domain.eventset = EventSet1; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain ALL ", retval ); + + option.domain.domain = PAPI_DOM_ALL ^ PAPI_DOM_SUPERVISOR; + option.domain.eventset = EventSet2; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) { + + /* DOM_ALL is special-cased as domains_available */ + /* in papi.c . Some machines don't like DOM_OTHER */ + /* so try that if the above case fails. */ + option.domain.domain ^= PAPI_DOM_OTHER; + option.domain.eventset = EventSet2; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + + if (retval != PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_set_domain ALL^SUPERVISOR ", retval ); + } + } + + option.domain.domain = PAPI_DOM_SUPERVISOR; + option.domain.eventset = EventSet3; + retval = PAPI_set_opt( PAPI_DOMAIN, &option ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain SUPERVISOR ", retval ); + + free_test_space( values, num_tests ); + values = allocate_test_space( num_tests, 2 ); + } + /* Warm it up dude */ + + PAPI_start( EventSet1 ); + do_flops( NUM_FLOPS ); + PAPI_stop( EventSet1, NULL ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_start( EventSet2 ); + + do_flops( NUM_FLOPS ); + + if ( retval == PAPI_OK ) { + retval = PAPI_stop( EventSet2, values[1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } else { + values[1][0] = retval; + values[1][1] = retval; + } + + retval = PAPI_start( EventSet3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet3, values[2] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_cleanup_eventset( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup", retval ); + + retval = PAPI_destroy_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy", retval ); + + retval = PAPI_cleanup_eventset( EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup", retval ); + + retval = PAPI_destroy_eventset( &EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy", retval ); + + retval = PAPI_cleanup_eventset( EventSet3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup", retval ); + + retval = PAPI_destroy_eventset( &EventSet3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy", retval ); + + dump_and_verify( num, values ); + + free(values); + + PAPI_shutdown( ); +} + +void +case2( int num, int domain, long long *values ) +{ + int retval; + int EventSet1 = PAPI_NULL; + PAPI_option_t options; + + memset( &options, 0x0, sizeof ( options ) ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + if ( ( retval = PAPI_query_event( PAPI_TOT_INS ) ) != PAPI_OK ) + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + + if ( ( retval = PAPI_query_event( PAPI_TOT_CYC ) ) != PAPI_OK ) + test_skip( __FILE__, __LINE__, "PAPI_query_event", retval ); + + if ( num == CREATE ) { + if (!TESTS_QUIET) { + printf( "\nTest case 2, CREATE: Call PAPI_set_domain(%s) before create\n", + stringify_domain( domain ) ); + printf( "This should override the domain setting for this EventSet.\n" ); + } + retval = PAPI_set_domain( domain ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + } + + retval = PAPI_create_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + if ( num == ADD ) { + if (!TESTS_QUIET) { + printf( "\nTest case 2, ADD: Call PAPI_set_domain(%s) before add\n", + stringify_domain( domain ) ); + printf( "This should have no effect on the domain setting for this EventSet.\n" ); + } + + retval = PAPI_set_domain( domain ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + } + + retval = PAPI_add_event( EventSet1, PAPI_TOT_INS ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_INS)", retval ); + + if ( num == MIDDLE ) { + if (!TESTS_QUIET) { + printf( "\nTest case 2, MIDDLE: Call PAPI_set_domain(%s) between adds\n", + stringify_domain( domain ) ); + printf( "This should have no effect on the domain setting for this EventSet.\n" ); + } + + retval = PAPI_set_domain( domain ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_set_domain", retval ); + } + + retval = PAPI_add_event( EventSet1, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event(PAPI_TOT_CYC)", retval ); + + + /* Warm it up dude */ + + PAPI_start( EventSet1 ); + do_flops( NUM_FLOPS ); + PAPI_stop( EventSet1, NULL ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet1, values ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_cleanup_eventset( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup", retval ); + + retval = PAPI_destroy_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy", retval ); + + PAPI_shutdown( ); +} + +void +case2_driver( void ) +{ + long long **values; + + /* 3 tests, 2 events */ + + values = allocate_test_space( 3, 2 ); + + case2( CREATE, PAPI_DOM_KERNEL, values[0] ); + case2( ADD, PAPI_DOM_KERNEL, values[1] ); + case2( MIDDLE, PAPI_DOM_KERNEL, values[2] ); + + dump_and_verify( CASE2, values ); + + free(values); +} + +void +case1_driver( void ) +{ + case1( ADD ); + case1( MIDDLE ); + case1( CREATE ); + case1( CHANGE ); + case1( SUPERVISOR ); +} + +int +main( int argc, char **argv ) +{ + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + +#if defined(sgi) && defined(host_mips) + uid_t id; + id = getuid( ); + if ( id != 0 ) { + printf( "IRIX requires root for PAPI_DOM_KERNEL and PAPI_DOM_ALL.\n" ); + test_skip( __FILE__, __LINE__, "", 1 ); + } +#endif + + if (!TESTS_QUIET) { + printf( "Test second.c: set domain of eventset via PAPI_set_domain and PAPI_set_opt.\n\n" ); + printf( "* PAPI_set_domain(DOMAIN) sets the default domain \napplied to subsequently created EventSets.\n" ); + printf( "It should have no effect on existing EventSets.\n\n" ); + printf( "* PAPI_set_opt(DOMAIN,xxx) sets the domain for a specific EventSet.\n" ); + printf( "It should always override the default setting for that EventSet.\n" ); + } + + case2_driver( ); + case1_driver( ); + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/shlib.c b/src/ctests/shlib.c new file mode 100644 index 0000000..bc8bd11 --- /dev/null +++ b/src/ctests/shlib.c @@ -0,0 +1,191 @@ +/* +* File: profile.c +* Author: Philip Mucci +* mucci@cs.utk.edu +*/ + +#include +#include +#include +#include + +#if (!defined(NO_DLFCN) && !defined(_BGL) && !defined(_BGP)) +#include +#endif + +#include "papi.h" +#include "papi_test.h" + +void print_shlib_info_map(const PAPI_shlib_info_t *shinfo, int quiet) +{ + PAPI_address_map_t *map = shinfo->map; + int i; + if (NULL == map) { + test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info", 1); + } + + if (!quiet) for ( i = 0; i < shinfo->count; i++ ) { + printf( "Library: %s\n", map->name ); + printf( "Text start: %p, Text end: %p\n", map->text_start, + map->text_end ); + printf( "Data start: %p, Data end: %p\n", map->data_start, + map->data_end ); + printf( "Bss start: %p, Bss end: %p\n", map->bss_start, map->bss_end ); + + if ( strlen( &(map->name[0]) ) == 0 ) + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + if ( ( map->text_start == 0x0 ) || ( map->text_end == 0x0 ) || + ( map->text_start >= map->text_end ) ) + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); +/* + if ((map->data_start == 0x0) || (map->data_end == 0x0) || + (map->data_start >= map->data_end)) + test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info",1); + if (((map->bss_start) && (!map->bss_end)) || + ((!map->bss_start) && (map->bss_end)) || + (map->bss_start > map->bss_end)) + test_fail(__FILE__, __LINE__, "PAPI_get_shared_lib_info",1); +*/ + + map++; + } +} + +void display( char *msg ) +{ + int i; + for (i=0; i<64; i++) + { + printf( "%1d", (msg[i] ? 1 : 0) ); + } + printf("\n"); +} + +int +main( int argc, char **argv ) +{ + int retval,quiet; + + const PAPI_shlib_info_t *shinfo; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( shinfo = PAPI_get_shared_lib_info( ) ) == NULL ) { + test_skip( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + } + + if ( ( shinfo->count == 0 ) && ( shinfo->map ) ) { + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + } + + print_shlib_info_map(shinfo, quiet); + + /* Needed for debugging, so you can ^Z and stop the process, */ + /* inspect /proc to see if it's right */ + sleep( 1 ); + +#ifndef NO_DLFCN + { + + const char *_libname = "libcrypt.so"; + void *handle; + void ( *setkey) (const char *key); + void ( *encrypt) (char block[64], int edflag); + char key[64]={ + 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, + 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, + 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, + 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, + }; /* bit pattern for key */ + char orig[64]; /* bit pattern for messages */ + char txt[64]; /* bit pattern for messages */ + + int oldcount; + + handle = dlopen( _libname, RTLD_NOW ); + if ( !handle ) { + printf( "dlopen: %s\n", dlerror( ) ); + if (!quiet) printf( "Did you forget to set the environmental " + "variable LIBPATH (in AIX) or " + "LD_LIBRARY_PATH (in linux) ?\n" ); + test_fail( __FILE__, __LINE__, "dlopen", 1 ); + } + + setkey = dlsym( handle, "setkey" ); + encrypt = dlsym( handle, "encrypt" ); + if ( setkey == NULL || encrypt == NULL) { + if (!quiet) printf( "dlsym: %s\n", dlerror( ) ); + test_fail( __FILE__, __LINE__, "dlsym", 1 ); + } + + memset(orig,0,64); + memcpy(txt,orig,64); + setkey(key); + + if (!quiet) { + printf("original "); + display(txt); + } + + encrypt(txt, 0); /* encode */ + + if (!quiet) { + printf("encrypted "); + display(txt); + } + + if (!memcmp(txt,orig,64)) { + test_fail( __FILE__, __LINE__, "encode", 1 ); + } + + encrypt(txt, 1); /* decode */ + + if (!quiet) { + printf("decrypted "); + display(txt); + } + + if (memcmp(txt,orig,64)) { + test_fail( __FILE__, __LINE__, "decode", 1 ); + } + + oldcount = shinfo->count; + + if ( ( shinfo = PAPI_get_shared_lib_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + } + + /* Needed for debugging, so you can ^Z and stop the process, */ + /* inspect /proc to see if it's right */ + sleep( 1 ); + + if ( ( shinfo->count == 0 ) && ( shinfo->map ) ) { + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + } + + if ( shinfo->count <= oldcount ) { + test_fail( __FILE__, __LINE__, "PAPI_get_shared_lib_info", 1 ); + } + + print_shlib_info_map(shinfo, quiet); + + /* Needed for debugging, so you can ^Z and stop the process, */ + /* inspect /proc to see if it's right */ + sleep( 1 ); + + dlclose( handle ); + + } +#endif + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/sprofile.c b/src/ctests/sprofile.c new file mode 100644 index 0000000..9d74ceb --- /dev/null +++ b/src/ctests/sprofile.c @@ -0,0 +1,164 @@ +/* +* File: sprofile.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" +#include "prof_utils.h" + +#include "do_loops.h" + +/* These architectures use Function Descriptors as Function Pointers */ + +#if (defined(linux) && defined(__ia64__)) || (defined(_AIX)) \ + || ((defined(__powerpc64__) && (_CALL_ELF != 2))) +/* PPC64 Big Endian is ELF version 1 which uses function descriptors */ +#define DO_READS (unsigned long)(*(void **)do_reads) +#define DO_FLOPS (unsigned long)(*(void **)do_flops) +#else +/* PPC64 Little Endian is ELF version 2 which does not use + * function descriptors + */ +#define DO_READS (unsigned long)(do_reads) +#define DO_FLOPS (unsigned long)(do_flops) +#endif + +/* This file performs the following test: sprofile */ + + +int +main( int argc, char **argv ) +{ + int i, num_events, num_tests = 6, mask = 0x1; + int EventSet = PAPI_NULL; + unsigned short **buf = ( unsigned short ** ) profbuf; + unsigned long length, blength; + int num_buckets; + PAPI_sprofil_t sprof[3]; + int retval; + const PAPI_exe_info_t *prginfo; + caddr_t start, end; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if (retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( prginfo = PAPI_get_executable_info( ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_executable_info", 1 ); + } + + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; + if ( start > end ) { + test_fail( __FILE__, __LINE__, "Profile length < 0!", PAPI_ESYS ); + } + length = ( unsigned long ) ( end - start ); + if (!quiet) { + prof_print_address( "Test case sprofile: POSIX compatible profiling over multiple regions.\n", + prginfo ); + } + + blength = prof_size( length, FULL_SCALE, PAPI_PROFIL_BUCKET_16, + &num_buckets ); + prof_alloc( 3, blength ); + + /* First half */ + sprof[0].pr_base = buf[0]; + sprof[0].pr_size = ( unsigned int ) blength; + sprof[0].pr_off = ( caddr_t ) DO_FLOPS; +#if defined(linux) && defined(__ia64__) + if ( !quiet ) + fprintf( stderr, "do_flops is at %p %p\n", &do_flops, sprof[0].pr_off ); +#endif + sprof[0].pr_scale = FULL_SCALE; + /* Second half */ + sprof[1].pr_base = buf[1]; + sprof[1].pr_size = ( unsigned int ) blength; + sprof[1].pr_off = ( caddr_t ) DO_READS; +#if defined(linux) && defined(__ia64__) + if ( !quiet ) + fprintf( stderr, "do_reads is at %p %p\n", &do_reads, sprof[1].pr_off ); +#endif + sprof[1].pr_scale = FULL_SCALE; + /* Overflow bin */ + sprof[2].pr_base = buf[2]; + sprof[2].pr_size = 1; + sprof[2].pr_off = 0; + sprof[2].pr_scale = 0x2; + + EventSet = add_test_events( &num_events, &mask, 1 ); + + values = allocate_test_space( num_tests, num_events ); + + retval = PAPI_sprofil( sprof, 3, EventSet, PAPI_TOT_CYC, THRESHOLD, + PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16 ); + if (retval != PAPI_OK ) { + if (retval == PAPI_ENOEVNT) { + if (!quiet) printf("Trouble creating events\n"); + test_skip(__FILE__,__LINE__,"PAPI_sprofil",retval); + } + test_fail( __FILE__, __LINE__, "PAPI_sprofil", retval ); + } + + do_stuff( ); + + if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_stuff( ); + + if ( ( retval = PAPI_stop( EventSet, values[1] ) ) != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + /* clear the profile flag before removing the event */ + if ( ( retval = PAPI_sprofil( sprof, 3, EventSet, PAPI_TOT_CYC, 0, + PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16 ) ) + != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_sprofil", retval ); + + remove_test_events( &EventSet, mask ); + + + + if ( !quiet ) { + printf( "Test case: PAPI_sprofil()\n" ); + printf( "---------Buffer 1--------\n" ); + for ( i = 0; i < ( int ) length / 2; i++ ) { + if ( buf[0][i] ) + printf( "%#lx\t%d\n", DO_FLOPS + 2 * ( unsigned long ) i, + buf[0][i] ); + } + printf( "---------Buffer 2--------\n" ); + for ( i = 0; i < ( int ) length / 2; i++ ) { + if ( buf[1][i] ) + printf( "%#lx\t%d\n", DO_READS + 2 * ( unsigned long ) i, + buf[1][i] ); + } + printf( "-------------------------\n" ); + printf( "%u samples fell outside the regions.\n", *buf[2] ); + } + retval = prof_check( 2, PAPI_PROFIL_BUCKET_16, num_buckets ); + + for ( i = 0; i < 3; i++ ) { + free( profbuf[i] ); + } + if ( retval == 0 ) { + test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/system_child_overflow.c b/src/ctests/system_child_overflow.c new file mode 100644 index 0000000..9aba54e --- /dev/null +++ b/src/ctests/system_child_overflow.c @@ -0,0 +1,206 @@ +/* + * Use "system() to run child_overflow + * Test PAPI with fork() and exec(). + */ + +#include +#include +#include +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" + +#define MAX_EVENTS 3 + +static int Event[MAX_EVENTS] = { + PAPI_TOT_CYC, + PAPI_FP_INS, + PAPI_FAD_INS, +}; + +static int Threshold[MAX_EVENTS] = { + 8000000, + 4000000, + 4000000, +}; + +static int num_events = 1; +static int EventSet = PAPI_NULL; +static const char *name = "unknown"; +static struct timeval start, last; +static long count, total; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + count++; + total++; +} + +static void +zero_count( void ) +{ + gettimeofday( &start, NULL ); + last = start; + count = 0; + total = 0; +} + +static void +print_here( const char *str) { + + if (!TESTS_QUIET) printf("[%d] %s, %s\n", getpid(), name, str); +} + +static void +print_rate( const char *str ) +{ + static int last_count = -1; + struct timeval now; + double st_secs, last_secs; + + gettimeofday( &now, NULL ); + st_secs = ( double ) ( now.tv_sec - start.tv_sec ) + + ( ( double ) ( now.tv_usec - start.tv_usec ) ) / 1000000.0; + last_secs = ( double ) ( now.tv_sec - last.tv_sec ) + + ( ( double ) ( now.tv_usec - last.tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%d] %s, time = %.3f, total = %ld, last = %ld, rate = %.1f/sec\n", + getpid( ), str, st_secs, total, count, + ( ( double ) count ) / last_secs ); + } + + if ( last_count != -1 ) { + if ( count < .1 * last_count ) { + test_fail( name, __LINE__, "Interrupt rate changed!", 1 ); + exit( 1 ); + } + } + last_count = ( int ) count; + count = 0; + last = now; +} + +static void +do_cycles( int program_time ) +{ + struct timeval start, now; + double x, sum; + + gettimeofday( &start, NULL ); + + for ( ;; ) { + sum = 1.0; + for ( x = 1.0; x < 250000.0; x += 1.0 ) + sum += x; + if ( sum < 0.0 ) + printf( "==>> SUM IS NEGATIVE !! <<==\n" ); + + gettimeofday( &now, NULL ); + if ( now.tv_sec >= start.tv_sec + program_time ) + break; + } +} + +static void +my_papi_start( void ) +{ + int ev; + + EventSet = PAPI_NULL; + + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_create_eventset failed", 1 ); + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_add_event( EventSet, Event[ev] ) != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble adding event\n"); + test_skip( name, __LINE__, "PAPI_add_event failed", 1 ); + } + } + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_overflow( EventSet, Event[ev], Threshold[ev], 0, my_handler ) + != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_overflow failed", 1 ); + } + } + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_start failed", 1 ); +} + +static void +run( const char *str, int len ) +{ + int n; + + for ( n = 1; n <= len; n++ ) { + do_cycles( 1 ); + print_rate( str ); + } +} + +int +main( int argc, char **argv ) +{ + char buf[100]; + + int quiet,retval,result=0; + + /* Used to be able to set this via command line */ + num_events=1; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + do_cycles( 1 ); + + zero_count( ); + + /* Init library */ + retval=PAPI_library_init( PAPI_VER_CURRENT ); + if (retval!=PAPI_VER_CURRENT) { + test_fail( name, __LINE__, "PAPI_library_init failed", 1 ); + } + + name = argv[0]; + if (!quiet) printf( "[%d] %s, num_events = %d\n", getpid( ), name, num_events ); + sprintf( buf, "%d", num_events ); + my_papi_start( ); + run( name, 3 ); + + print_here( "system(./child_overflow)" ); + + if ( access( "./child_overflow", X_OK ) == 0 ) { + if ( quiet) result=system( "./child_overflow TESTS_QUIET" ); + else result=system( "./child_overflow" ); + } + else if ( access( "./ctests/child_overflow", X_OK ) == 0 ) { + if ( quiet) result=system( "./ctests/child_overflow TESTS_QUIET" ); + else result=system( "./ctests/child_overflow" ); + } + + if (result<0) { + test_fail(__FILE__,__LINE__,"system failed\n",1); + } + + if (!quiet) printf("Successfully returned from system\n"); + + // Rely on test_pass from child_overflow + // otherwise the run_tests.sh output is ugly + //test_pass(__FILE__); + + return 0; +} diff --git a/src/ctests/system_overflow.c b/src/ctests/system_overflow.c new file mode 100644 index 0000000..4f318bd --- /dev/null +++ b/src/ctests/system_overflow.c @@ -0,0 +1,193 @@ +/* + * Test PAPI with fork() and exec(). + */ + +#include +#include +#include +#include +#include +#include +#include +#include "papi.h" +#include "papi_test.h" + +#define MAX_EVENTS 3 + +static int Event[MAX_EVENTS] = { + PAPI_TOT_CYC, + PAPI_FP_INS, + PAPI_FAD_INS, +}; + +static int Threshold[MAX_EVENTS] = { + 8000000, + 4000000, + 4000000, +}; + +static int num_events = 1; +static int EventSet = PAPI_NULL; +static const char *name = "unknown"; +static struct timeval start, last; +static long count, total; + +static void +my_handler( int EventSet, void *pc, long long ovec, void *context ) +{ + ( void ) EventSet; + ( void ) pc; + ( void ) ovec; + ( void ) context; + + count++; + total++; +} + +static void +zero_count( void ) +{ + gettimeofday( &start, NULL ); + last = start; + count = 0; + total = 0; +} + +static void +print_here( const char *str) { + + if (!TESTS_QUIET) printf("[%d] %s, %s\n", getpid(), name, str); +} + +static void +print_rate( const char *str ) +{ + static int last_count = -1; + struct timeval now; + double st_secs, last_secs; + + gettimeofday( &now, NULL ); + st_secs = ( double ) ( now.tv_sec - start.tv_sec ) + + ( ( double ) ( now.tv_usec - start.tv_usec ) ) / 1000000.0; + last_secs = ( double ) ( now.tv_sec - last.tv_sec ) + + ( ( double ) ( now.tv_usec - last.tv_usec ) ) / 1000000.0; + if ( last_secs <= 0.001 ) + last_secs = 0.001; + + if (!TESTS_QUIET) { + printf( "[%d] %s, time = %.3f, total = %ld, last = %ld, rate = %.1f/sec\n", + getpid( ), str, st_secs, total, count, + ( ( double ) count ) / last_secs ); + } + + if ( last_count != -1 ) { + if ( count < .1 * last_count ) { + test_fail( name, __LINE__, "Interrupt rate changed!", 1 ); + exit( 1 ); + } + } + last_count = ( int ) count; + count = 0; + last = now; +} + +static void +do_cycles( int program_time ) +{ + struct timeval start, now; + double x, sum; + + gettimeofday( &start, NULL ); + + for ( ;; ) { + sum = 1.0; + for ( x = 1.0; x < 250000.0; x += 1.0 ) + sum += x; + if ( sum < 0.0 ) + printf( "==>> SUM IS NEGATIVE !! <<==\n" ); + + gettimeofday( &now, NULL ); + if ( now.tv_sec >= start.tv_sec + program_time ) + break; + } +} + +static void +my_papi_start( void ) +{ + int ev; + + EventSet = PAPI_NULL; + + if ( PAPI_create_eventset( &EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_create_eventset failed", 1 ); + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_add_event( EventSet, Event[ev] ) != PAPI_OK ) { + if (!TESTS_QUIET) printf("Trouble adding event\n"); + test_skip( name, __LINE__, "PAPI_add_event failed", 1 ); + } + } + + for ( ev = 0; ev < num_events; ev++ ) { + if ( PAPI_overflow( EventSet, Event[ev], Threshold[ev], 0, my_handler ) + != PAPI_OK ) { + test_fail( name, __LINE__, "PAPI_overflow failed", 1 ); + } + } + + if ( PAPI_start( EventSet ) != PAPI_OK ) + test_fail( name, __LINE__, "PAPI_start failed", 1 ); +} + +static void +run( const char *str, int len ) +{ + int n; + + for ( n = 1; n <= len; n++ ) { + do_cycles( 1 ); + print_rate( str ); + } +} + +int +main( int argc, char **argv ) +{ + char buf[100]; + + int quiet,retval; + + /* Used to be able to set this via command line */ + num_events=1; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + do_cycles( 1 ); + + zero_count( ); + + + retval=PAPI_library_init( PAPI_VER_CURRENT ); + if (retval!=PAPI_VER_CURRENT) { + test_fail( name, __LINE__, "PAPI_library_init failed", 1 ); + } + + name = argv[0]; + if (!quiet) printf( "[%d] %s, num_events = %d\n", getpid( ), name, num_events ); + sprintf( buf, "%d", num_events ); + my_papi_start( ); + run( name, 3 ); + + print_here( "system(./burn)" ); + if ( access( "./burn", X_OK ) == 0 ) + ( quiet ? system( "./burn TESTS_QUIET" ) : system( "./burn" ) ); + else if ( access( "./ctests/burn", X_OK ) == 0 ) + ( quiet ? system( "./ctests/burn TESTS_QUIET" ) : + system( "./ctests/burn" ) ); + + test_pass(__FILE__); + + return 0; +} diff --git a/src/ctests/tenth.c b/src/ctests/tenth.c new file mode 100644 index 0000000..af20fd0 --- /dev/null +++ b/src/ctests/tenth.c @@ -0,0 +1,241 @@ +/* +* File: tenth.c +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#define ITERS 100 + +/* This file performs the following test: start, stop and timer functionality for + PAPI_L1_TCM derived event + + - They are counted in the default counting domain and default + granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + + +#if defined(sun) && defined(sparc) +#define CACHE_LEVEL "PAPI_L2_TCM" +#define EVT1 PAPI_L2_TCM +#define EVT2 PAPI_L2_TCA +#define EVT3 PAPI_L2_TCH +#define EVT1_STR "PAPI_L2_TCM" +#define EVT2_STR "PAPI_L2_TCA" +#define EVT3_STR "PAPI_L2_TCH" +#define MASK1 MASK_L2_TCM +#define MASK2 MASK_L2_TCA +#define MASK3 MASK_L2_TCH +#else +#if defined(__powerpc__) +#define CACHE_LEVEL "PAPI_L1_DCA" +#define EVT1 PAPI_L1_DCA +#define EVT2 PAPI_L1_DCW +#define EVT3 PAPI_L1_DCR +#define EVT1_STR "PAPI_L1_DCA" +#define EVT2_STR "PAPI_L1_DCW" +#define EVT3_STR "PAPI_L1_DCR" +#define MASK1 MASK_L1_DCA +#define MASK2 MASK_L1_DCW +#define MASK3 MASK_L1_DCR +#else +#define CACHE_LEVEL "PAPI_L1_TCM" +#define EVT1 PAPI_L1_TCM +#define EVT2 PAPI_L1_ICM +#define EVT3 PAPI_L1_DCM +#define EVT1_STR "PAPI_L1_TCM" +#define EVT2_STR "PAPI_L1_ICM" +#define EVT3_STR "PAPI_L1_DCM" +#define MASK1 MASK_L1_TCM +#define MASK2 MASK_L1_ICM +#define MASK3 MASK_L1_DCM +#endif +#endif + + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 30, tmp; + int EventSet1 = PAPI_NULL; + int EventSet2 = PAPI_NULL; + int EventSet3 = PAPI_NULL; + int mask1 = MASK1; + int mask2 = MASK2; + int mask3 = MASK3; + int num_events1; + int num_events2; + int num_events3; + long long **values; + int i, j; + long long min[3]; + long long max[3]; + long long sum[3]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Make sure that required resources are available */ + /* Skip (don't fail!) if they are not */ + retval = PAPI_query_event( EVT1 ); + if ( retval != PAPI_OK ) { + test_skip( __FILE__, __LINE__, EVT1_STR, retval ); + } + + retval = PAPI_query_event( EVT2 ); + if ( retval != PAPI_OK ) { + test_skip( __FILE__, __LINE__, EVT2_STR, retval ); + } + + retval = PAPI_query_event( EVT3 ); + if ( retval != PAPI_OK ) { + test_skip( __FILE__, __LINE__, EVT3_STR, retval ); + } + + EventSet1 = add_test_events( &num_events1, &mask1, 1 ); + EventSet2 = add_test_events( &num_events2, &mask2, 1 ); + EventSet3 = add_test_events( &num_events3, &mask3, 1 ); + + values = allocate_test_space( num_tests, 1 ); + + /* Warm me up */ + do_l1misses( ITERS ); + do_misses( 1, 1024 * 1024 * 4 ); + + for ( i = 0; i < 10; i++ ) { + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_l1misses( ITERS ); + do_misses( 1, 1024 * 1024 * 4 ); + + retval = PAPI_stop( EventSet1, values[( i * 3 ) + 0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_l1misses( ITERS ); + do_misses( 1, 1024 * 1024 * 4 ); + + retval = PAPI_stop( EventSet2, values[( i * 3 ) + 1] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_start( EventSet3 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_l1misses( ITERS ); + do_misses( 1, 1024 * 1024 * 4 ); + + retval = PAPI_stop( EventSet3, values[( i * 3 ) + 2] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + remove_test_events( &EventSet1, mask1 ); + remove_test_events( &EventSet2, mask2 ); + remove_test_events( &EventSet3, mask3 ); + + for ( j = 0; j < 3; j++ ) { + min[j] = 65535; + max[j] = sum[j] = 0; + } + for ( i = 0; i < 10; i++ ) { + for ( j = 0; j < 3; j++ ) { + if ( min[j] > values[( i * 3 ) + j][0] ) + min[j] = values[( i * 3 ) + j][0]; + if ( max[j] < values[( i * 3 ) + j][0] ) + max[j] = values[( i * 3 ) + j][0]; + sum[j] += values[( i * 3 ) + j][0]; + } + } + + if ( !quiet ) { + printf( "Test case 10: start, stop for derived event %s.\n", + CACHE_LEVEL ); + printf( "--------------------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", ITERS ); + printf( "Repeated 10 times\n" ); + printf + ( "-------------------------------------------------------------------------\n" ); +/* + for (i=0;i<10;i++) { + printf("Test type : %12s%13s%13s\n", "1", "2", "3"); + printf(TAB3, EVT1_STR, values[(i*3)+0][0], (long long)0, (long long)0); + printf(TAB3, EVT2_STR, (long long)0, values[(i*3)+1][0], (long long)0); + printf(TAB3, EVT3_STR, (long long)0, (long long)0, values[(i*3)+2][0]); + printf + ("-------------------------------------------------------------------------\n"); + } +*/ + printf( "Test type : %12s%13s%13s\n", "min", "max", "sum" ); + printf( TAB3, EVT1_STR, min[0], max[0], sum[0] ); + printf( TAB3, EVT2_STR, min[1], max[1], sum[1] ); + printf( TAB3, EVT3_STR, min[2], max[2], sum[2] ); + printf + ( "-------------------------------------------------------------------------\n" ); + printf( "Verification:\n" ); +#if defined(sun) && defined(sparc) + printf( TAB1, "Sum 1 approximately equals sum 2 - sum 3 or", + ( sum[1] - sum[2] ) ); +#else + printf( TAB1, "Sum 1 approximately equals sum 2 + sum 3 or", + ( sum[1] + sum[2] ) ); +#endif + } + + { + long long tmin, tmax; + +#if defined(sun) && defined(sparc) + tmax = ( long long ) ( sum[1] - sum[2] ); +#else + tmax = ( long long ) ( sum[1] + sum[2] ); +#endif + + if (!quiet) { + printf( "percent error: %f\n", + (( float ) abs( ( int ) ( tmax - sum[0] ) ) / + (float) sum[0] ) * 100.0 ); + } + tmin = ( long long ) ( ( double ) tmax * 0.8 ); + tmax = ( long long ) ( ( double ) tmax * 1.2 ); + if ( sum[0] > tmax || sum[0] < tmin ) { + test_fail( __FILE__, __LINE__, CACHE_LEVEL, 1 ); + } + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/thrspecific.c b/src/ctests/thrspecific.c new file mode 100644 index 0000000..e454b11 --- /dev/null +++ b/src/ctests/thrspecific.c @@ -0,0 +1,176 @@ +/* This file performs the following test: start, stop and timer +functionality for 2 slave pthreads */ + +/* No it doesn't, that description is *completely* wrong */ + +/* I think this is trying to test the pthread thread-specific */ +/* implementation but it is unclear and the git commit history */ +/* does not help at all here */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +static volatile int processing = 1; + +void * +Thread( void *arg ) +{ + int retval; + void *arg2; + int i; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if (!TESTS_QUIET) { + printf( "Thread %#x started, specific data is at %p\n", + ( int ) pthread_self( ), arg ); + } + + retval = PAPI_set_thr_specific( PAPI_USR1_TLS, arg ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_set_thr_specific", retval ); + } + + retval = PAPI_get_thr_specific( PAPI_USR1_TLS, &arg2 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_get_thr_specific", retval ); + } + + if ( arg != arg2 ) { + test_fail( __FILE__, __LINE__, "set vs get specific", 0 ); + } + + while ( processing ) { + if ( *( ( int * ) arg ) == 500000 ) { + sleep( 1 ); + + PAPI_all_thr_spec_t data; + data.num = 10; + data.id = ( unsigned long * ) malloc( ( size_t ) data.num * + sizeof ( unsigned long ) ); + data.data = ( void ** ) malloc( ( size_t ) data.num * sizeof ( void * ) ); + + retval = PAPI_get_thr_specific( PAPI_USR1_TLS | PAPI_TLS_ALL_THREADS, + ( void ** ) &data ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_get_thr_specific", + retval ); + } + + if ( data.num != 5 ) { + test_fail( __FILE__, __LINE__, "data.num != 5", 0 ); + } + + if (!TESTS_QUIET) for ( i = 0; i < data.num; i++ ) { + printf( "Entry %d, Thread %#lx, Data Pointer %p, Value %d\n", + i, data.id[i], data.data[i], *( int * ) data.data[i] ); + } + processing = 0; + } + } + + retval = PAPI_unregister_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + } + + return NULL; +} + +int +main( int argc, char **argv ) +{ + pthread_t e_th, f_th, g_th, h_th; + int flops1, flops2, flops3, flops4, flops5; + int retval, rc; + pthread_attr_t attr; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + if (!quiet) printf("Testing threads\n"); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) + ( pthread_self ) ); + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) { + test_skip( __FILE__, __LINE__, + "PAPI_thread_init", retval ); + } + else { + test_fail( __FILE__, __LINE__, + "PAPI_thread_init", retval ); + } + } + + pthread_attr_init( &attr ); + +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif + +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) { + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); + } +#endif + + flops1 = 1000000; + rc = pthread_create( &e_th, &attr, Thread, ( void * ) &flops1 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + flops2 = 2000000; + rc = pthread_create( &f_th, &attr, Thread, ( void * ) &flops2 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + flops3 = 4000000; + rc = pthread_create( &g_th, &attr, Thread, ( void * ) &flops3 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + flops4 = 8000000; + rc = pthread_create( &h_th, &attr, Thread, ( void * ) &flops4 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + pthread_attr_destroy( &attr ); + + flops5 = 500000; + Thread( &flops5 ); + + pthread_join( h_th, NULL ); + pthread_join( g_th, NULL ); + pthread_join( f_th, NULL ); + pthread_join( e_th, NULL ); + + test_pass( __FILE__ ); + + pthread_exit( NULL ); + + return 1; +} diff --git a/src/ctests/timer_overflow.c b/src/ctests/timer_overflow.c new file mode 100644 index 0000000..0e8a6fe --- /dev/null +++ b/src/ctests/timer_overflow.c @@ -0,0 +1,66 @@ +/* + * File: timer_overflow.c + * Author: Kevin London + * london@cs.utk.edu + * Mods: + * + */ + +/* This file looks for possible timer overflows. */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#define TIMER_THRESHOLD 100 + +int +main( int argc, char **argv ) +{ + int sleep_time = TIMER_THRESHOLD; + int retval, i; + long long timer; + + if ( argc > 1 ) { + if ( !strcmp( argv[1], "TESTS_QUIET" ) ) + tests_quiet( argc, argv ); + else { + sleep_time = atoi( argv[1] ); + if ( sleep_time <= 0 ) + sleep_time = TIMER_THRESHOLD; + } + } + + if ( TESTS_QUIET ) { + /* Skip the test in TESTS_QUIET so that the main script doesn't + * run this as it takes a long time to check for overflow + */ + printf( "%-40s SKIPPED\nLine # %d\n", __FILE__, __LINE__ ); + printf( "timer_overflow takes a long time to run, run separately.\n" ); + exit( 0 ); + } + + printf( "This test will take about: %f minutes.\n", + ( float ) ( 20 * ( sleep_time / 60.0 ) ) ); + if ( ( retval = + PAPI_library_init( PAPI_VER_CURRENT ) ) != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + timer = PAPI_get_real_usec( ); + for ( i = 0; i <= 20; i++ ) { + if ( timer < 0 ) + break; + sleep( ( unsigned int ) sleep_time ); + timer = PAPI_get_real_usec( ); + } + if ( timer < 0 ) + test_fail( __FILE__, __LINE__, "PAPI_get_real_usec: overflow", 1 ); + else + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/val_omp.c b/src/ctests/val_omp.c new file mode 100644 index 0000000..4f56d98 --- /dev/null +++ b/src/ctests/val_omp.c @@ -0,0 +1,178 @@ +/* This file performs the following test: each OMP thread measures flops +for its provided tasks, and compares this to expected flop counts, each +thread having been provided with a random amount of work, such that the +time and order that they complete their measurements varies. +Specifically tested is the case where the value returned for some threads +actually corresponds to that for another thread reading its counter values +at the same time. + + - It is based on zero_omp.c but ignored much of its functionality. + - It attempts to use the following two counters. It may use less +depending on hardware counter resource limitations. These are counted +in the default counting domain and default granularity, depending on +the platform. Usually this is the user domain (PAPI_DOM_USER) and +thread context (PAPI_GRN_THR). + + + PAPI_FP_INS + + PAPI_TOT_CYC + +Each thread inside the Thread routine: + - Do prework (MAX_FLOPS - flops) + - Get cyc. + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. + - Get cyc. + - Return flops +*/ + +#include "papi_test.h" + +#ifdef _OPENMP +#include +#else +#error "This compiler does not understand OPENMP" +#endif + +const int MAX_FLOPS = NUM_FLOPS; + +extern int TESTS_QUIET; /* Declared in test_utils.c */ +const PAPI_hw_info_t *hw_info = NULL; + +long long +Thread( int n ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int PAPI_event, mask1; + int num_events1; + long long flops; + long long **values; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN]; + + /* printf("Thread(n=%d) %#x started\n", n, omp_get_thread_num()); */ + num_events1 = 2; + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depending on the availability of the event on the + platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + values = allocate_test_space( num_tests, num_events1 ); + + do_flops( MAX_FLOPS - n ); /* prework for balance */ + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( n ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + flops = ( values[0] )[0]; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + remove_test_events( &EventSet1, mask1 ); + + if ( !TESTS_QUIET ) { + /*printf("Thread %#x %-12s : \t%lld\t%d\n", omp_get_thread_num(), event_name, + (values[0])[0], n); */ +#if 0 + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", omp_get_thread_num( ), + values[0][0] ); + printf( "Thread %#x Real usec : \t%lld\n", omp_get_thread_num( ), + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", omp_get_thread_num( ), + elapsed_cyc ); +#endif + } + + /* It is illegal for the threads to exit in OpenMP */ + /* test_pass(__FILE__,0,0); */ + free_test_space( values, num_tests ); + + PAPI_unregister_thread( ); + /* printf("Thread %#x finished\n", omp_get_thread_num()); */ + return flops; +} + +int +main( int argc, char **argv ) +{ + int tid, retval; + int maxthr = omp_get_max_threads( ); + int flopper = 0; + long long *flops = calloc( maxthr, sizeof ( long long ) ); + long long *flopi = calloc( maxthr, sizeof ( long long ) ); + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + if ( maxthr < 2 ) + test_skip( __FILE__, __LINE__, "omp_get_num_threads < 2", PAPI_EINVAL ); + + if ( ( flops == NULL ) || ( flopi == NULL ) ) + test_fail( __FILE__, __LINE__, "calloc", PAPI_ENOMEM ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + retval = + PAPI_thread_init( ( unsigned + long ( * )( void ) ) ( omp_get_thread_num ) ); + if ( retval != PAPI_OK ) + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + + flopper = Thread( 65536 ) / 65536; + printf( "flopper=%d\n", flopper ); + + for ( int i = 0; i < 100000; i++ ) +#pragma omp parallel private(tid) + { + tid = omp_get_thread_num( ); + flopi[tid] = rand( ) * 3; + flops[tid] = Thread( ( flopi[tid] / flopper ) % MAX_FLOPS ); +#pragma omp barrier +#pragma omp master + if ( flops[tid] < flopi[tid] ) { + printf( "test iteration=%d\n", i ); + for ( int j = 0; j < omp_get_num_threads( ); j++ ) { + printf( "Thread %#x Value %6lld %c %6lld", j, flops[j], + ( flops[j] < flopi[j] ) ? '<' : '=', flopi[j] ); + for ( int k = 0; k < omp_get_num_threads( ); k++ ) + if ( ( k != j ) && ( flops[k] == flops[j] ) ) + printf( " == Thread %#x!", k ); + printf( "\n" ); + } + test_fail( __FILE__, __LINE__, "value returned for thread", + PAPI_EBUG ); + } + } + + test_pass( __FILE__, NULL, 0 ); + exit( 0 ); +} diff --git a/src/ctests/version.c b/src/ctests/version.c new file mode 100644 index 0000000..3205a84 --- /dev/null +++ b/src/ctests/version.c @@ -0,0 +1,66 @@ +/* This file performs the following test: */ +/* compare and report versions from papi.h and the papi library */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int main( int argc, char **argv ) { + + int init_version, lib_version; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + init_version = PAPI_library_init( PAPI_VER_CURRENT ); + if ( init_version != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, + "PAPI_library_init", init_version ); + } + + lib_version = PAPI_get_opt( PAPI_LIB_VERSION, NULL ); + if (lib_version == PAPI_EINVAL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_opt", PAPI_EINVAL ); + } + + if ( !quiet) { + printf( "Version.c: Compare and report versions from papi.h and the papi library.\n" ); + printf( "-------------------------------------------------------------------------\n" ); + printf( " MAJOR MINOR REVISION INCREMENT\n" ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "PAPI_VER_CURRENT : %4d %6d %7d %10d\n", + PAPI_VERSION_MAJOR( PAPI_VER_CURRENT ), + PAPI_VERSION_MINOR( PAPI_VER_CURRENT ), + PAPI_VERSION_REVISION( PAPI_VER_CURRENT ), + PAPI_VERSION_INCREMENT( PAPI_VER_CURRENT ) ); + printf( "PAPI_library_init: %4d %6d %7d %10d\n", + PAPI_VERSION_MAJOR( init_version ), + PAPI_VERSION_MINOR( init_version ), + PAPI_VERSION_REVISION( init_version ), + PAPI_VERSION_INCREMENT( init_version ) ); + printf( "PAPI_VERSION : %4d %6d %7d %10d\n", + PAPI_VERSION_MAJOR( PAPI_VERSION ), + PAPI_VERSION_MINOR( PAPI_VERSION ), + PAPI_VERSION_REVISION( PAPI_VERSION ), + PAPI_VERSION_INCREMENT (PAPI_VERSION) ); + printf( "PAPI_get_opt : %4d %6d %7d %10d\n", + PAPI_VERSION_MAJOR( lib_version ), + PAPI_VERSION_MINOR( lib_version ), + PAPI_VERSION_REVISION( lib_version ), + PAPI_VERSION_INCREMENT( lib_version) ); + + printf( "-------------------------------------------------------------------------\n" ); + } + + if ( lib_version != PAPI_VERSION ) { + test_fail( __FILE__, __LINE__, "Version Mismatch", PAPI_EINVAL ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/virttime.c b/src/ctests/virttime.c new file mode 100644 index 0000000..29cb407 --- /dev/null +++ b/src/ctests/virttime.c @@ -0,0 +1,66 @@ +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +int +main( int argc, char **argv ) +{ + int retval; + long long elapsed_us, elapsed_cyc; + const PAPI_hw_info_t *hw_info; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + + elapsed_us = PAPI_get_virt_usec( ); + elapsed_cyc = PAPI_get_virt_cyc( ); + + if (!TESTS_QUIET) { + printf( "Testing virt time clock. (CPU Max %d MHz, CPU Min %d MHz)\n", + hw_info->cpu_max_mhz, hw_info->cpu_min_mhz ); + printf( "Sleeping for 10 seconds.\n" ); + } + + sleep( 10 ); + + elapsed_us = PAPI_get_virt_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_virt_cyc( ) - elapsed_cyc; + + if (!TESTS_QUIET) { + printf( "%lld us. %lld cyc.\n", elapsed_us, elapsed_cyc ); + } + +/* Elapsed microseconds and elapsed cycles are not as unambiguous as they appear. + On Pentium III and 4, for example, cycles is a measured value, while useconds + is computed from cycles and mhz. MHz is read from /proc/cpuinfo (on linux). + Thus, any error in MHz is propagated to useconds. + Conversely, on ultrasparc useconds are extracted from a system call (gethrtime()) + and cycles are computed from useconds. Also, MHz comes from a scan of system info, + Thus any error in gethrtime() propagates to both cycles and useconds, and cycles + can be further impacted by errors in reported MHz. + Without knowing the error bars on these system values, we can't really specify + error ranges for our reported values, but we *DO* know that errors for at least + one instance of Pentium 4 (torc17@utk) are on the order of one part per thousand. +*/ + + /* We'll accept 1.5 part per thousand error here (to allow Pentium 4 + and Alpha to pass) */ + if ( elapsed_us > 100000 ) + test_fail( __FILE__, __LINE__, "Virt time greater than .1 seconds!", + PAPI_EMISC ); + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/zero.c b/src/ctests/zero.c new file mode 100644 index 0000000..268c587 --- /dev/null +++ b/src/ctests/zero.c @@ -0,0 +1,176 @@ +/* zero.c */ + +/* This is possibly the most important PAPI tests, and is the one */ +/* that is often used as a quick test that PAPI is working. */ +/* We should make sure that it always passes, if possible. */ + +/* Traditionally it used FLOPS, due to the importance of this to HPC. */ +/* This has been changed to use Instructions/Cycles as some recent */ +/* major Intel chips do not have good floating point events and would fail. */ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "testcode.h" + +#define NUM_EVENTS 2 + +#define NUM_LOOPS 200 + +int main( int argc, char **argv ) { + + int retval, tmp, result, i; + int EventSet1 = PAPI_NULL; + long long values[NUM_EVENTS]; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + double ipc; + int quiet=0; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Initialize the EventSet */ + retval=PAPI_create_eventset(&EventSet1); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + } + + /* Add PAPI_TOT_CYC */ + retval=PAPI_add_named_event(EventSet1,"PAPI_TOT_CYC"); + if (retval!=PAPI_OK) { + if (!quiet) printf("Trouble adding PAPI_TOT_CYC\n"); + test_skip( __FILE__, __LINE__, "adding PAPI_TOT_CYC", retval ); + } + + /* Add PAPI_TOT_INS */ + retval=PAPI_add_named_event(EventSet1,"PAPI_TOT_INS"); + if (retval!=PAPI_OK) { + test_fail( __FILE__, __LINE__, "adding PAPI_TOT_INS", retval ); + } + + /* warm up the processor to pull it out of idle state */ + for(i=0;i<100;i++) { + result=instructions_million(); + } + + if (result==CODE_UNIMPLEMENTED) { + if (!quiet) printf("Instructions testcode not available\n"); + test_skip( __FILE__, __LINE__, "No instructions code", retval ); + } + + /* Gather before stats */ + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + elapsed_virt_us = PAPI_get_virt_usec( ); + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + /* Start PAPI */ + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our work code */ + for(i=0;i (1000000*NUM_LOOPS)) { + printf("%s Error of %.2f%%\n", "PAPI_TOT_INS", (100.0 * (double)(values[1] - (1000000*NUM_LOOPS)))/(1000000*NUM_LOOPS)); + test_fail( __FILE__, __LINE__, "Instruction validation", 0 ); + } + + /* Check that TOT_CYC is non-zero */ + if(values[0]==0) { + printf("Cycles is zero\n"); + test_fail( __FILE__, __LINE__, "Cycles validation", 0 ); + } + + /* Unless you have an amazing processor, IPC should be < 100 */ + if ((ipc <=0.01 ) || (ipc >=100.0)) { + printf("Unlikely IPC of %.2f%%\n", ipc); + test_fail( __FILE__, __LINE__, "IPC validation", 0 ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/zero_attach.c b/src/ctests/zero_attach.c new file mode 100644 index 0000000..327242e --- /dev/null +++ b/src/ctests/zero_attach.c @@ -0,0 +1,226 @@ +/* This file performs the following test: start, stop and timer functionality for + attached processes. + + - It attempts to use the following two counters. It may use less depending on + hardware counter resource limitations. These are counted in the default counting + domain and default granularity, depending on the platform. Usually this is + the user domain (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#ifdef _AIX +#define _LINUX_SOURCE_COMPAT +#endif + +#if defined(__FreeBSD__) +# define PTRACE_ATTACH PT_ATTACH +# define PTRACE_CONT PT_CONTINUE +#endif + +int +wait_for_attach_and_loop( void ) +{ + kill( getpid( ), SIGSTOP ); + do_flops( NUM_FLOPS ); + kill( getpid( ), SIGSTOP ); + return 0; +} + +int +main( int argc, char **argv ) +{ + int status, retval, num_tests = 1, tmp; + int EventSet1 = PAPI_NULL; + int PAPI_event, mask1; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + const PAPI_component_info_t *cmpinfo; + pid_t pid; + + /* Set TESTS_QUIET variable */ + tests_quiet( argc, argv ); + + /* Initialize the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if ( ( cmpinfo = PAPI_get_component_info( 0 ) ) == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_component_info", 0 ); + } + + if ( cmpinfo->attach == 0 ) { + test_skip( __FILE__, __LINE__, + "Platform does not support attaching", 0 ); + } + + pid = fork( ); + if ( pid < 0 ) { + test_fail( __FILE__, __LINE__, "fork()", PAPI_ESYS ); + } + if ( pid == 0 ) { + exit( wait_for_attach_and_loop( ) ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_ATTACH, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_ATTACH)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) + test_fail( __FILE__, __LINE__, + "Child process didnt return true to WIFSTOPPED", 0 ); + } + + retval = PAPI_attach( EventSet1, ( unsigned long ) pid ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_attach", retval ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + /* num_events1 is greater than num_events2 so don't worry. */ + + values = allocate_test_space( num_tests, num_events1 ); + + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + elapsed_virt_us = PAPI_get_virt_usec( ); + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + /* Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* Wait for the SIGSTOP. */ + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFSTOPPED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFSTOPPED", 0 ); + } + if ( WSTOPSIG( status ) != SIGSTOP ) { + test_fail( __FILE__, __LINE__, + "Child process didn't stop on SIGSTOP", 0 ); + } + } + + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + remove_test_events( &EventSet1, mask1 ); + + if ( cmpinfo->attach_must_ptrace ) { + if ( ptrace( PTRACE_CONT, pid, NULL, NULL ) == -1 ) { + perror( "ptrace(PTRACE_CONT)" ); + return 1; + } + } + + if ( waitpid( pid, &status, 0 ) == -1 ) { + perror( "waitpid()" ); + exit( 1 ); + } + if ( WIFEXITED( status ) == 0 ) { + test_fail( __FILE__, __LINE__, + "Child process didn't return true to WIFEXITED", 0 ); + } + + if (!TESTS_QUIET) { + printf( "Test case: 3rd party attach start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t 1\n" ); + + sprintf( add_event_str, "%-12s : \t", event_name ); + printf( TAB1, add_event_str, values[0][1] ); + printf( TAB1, "PAPI_TOT_CYC : \t", values[0][0] ); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: none\n" ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/zero_flip.c b/src/ctests/zero_flip.c new file mode 100644 index 0000000..69385ea --- /dev/null +++ b/src/ctests/zero_flip.c @@ -0,0 +1,167 @@ +/* This file performs the following test: start, stop and timer functionality + + - It attempts to use the following two counters. + It may use less depending on hardware counter resource limitations. + These are counted in the default counting domain and default granularity, + depending on the platform. + Usually this is the user domain (PAPI_DOM_USER) and + thread context (PAPI_GRN_THR). + + PAPI_FP_INS + + PAPI_TOT_CYC + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, eventcnt, events[2], i, tmp; + int EventSet1 = PAPI_NULL, EventSet2 = PAPI_NULL; + int PAPI_event; + long long values1[2], values2[2]; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN], add_event_str[PAPI_MAX_STR_LEN]; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + + /* query and set up the right instruction to monitor */ + if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) + PAPI_event = PAPI_FP_OPS; + else + PAPI_event = PAPI_TOT_INS; + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + sprintf( add_event_str, "PAPI_add_event[%s]", event_name ); + + retval = PAPI_create_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* Add the events */ + if (!quiet) printf( "Adding: %s\n", event_name ); + retval = PAPI_add_event( EventSet1, PAPI_event ); + if ( retval != PAPI_OK ) { + if (!quiet) printf("Trouble adding event\n"); + test_skip( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + retval = PAPI_add_event( EventSet1, PAPI_TOT_CYC ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + + /* Add them reversed to EventSet2 */ + + retval = PAPI_create_eventset( &EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + eventcnt = 2; + retval = PAPI_list_events( EventSet1, events, &eventcnt ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_list_events", retval ); + + for ( i = eventcnt - 1; i >= 0; i-- ) { + retval = PAPI_event_code_to_name( events[i], event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + retval = PAPI_add_event( EventSet2, events[i] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_add_event", retval ); + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet1, values1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + retval = PAPI_start( EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( NUM_FLOPS ); + + retval = PAPI_stop( EventSet2, values2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_cleanup_eventset( EventSet1 ); /* JT */ + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + retval = PAPI_cleanup_eventset( EventSet2 ); /* JT */ + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval ); + + retval = PAPI_destroy_eventset( &EventSet2 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval ); + + if ( !quiet ) { + printf( "Test case 0: start, stop.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t 1\t 2\n" ); + + sprintf( add_event_str, "%-12s : \t", event_name ); + printf( TAB2, add_event_str, values1[0], values2[1] ); + printf( TAB2, "PAPI_TOT_CYC : \t", values1[1], values2[0] ); + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + + printf + ( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: none\n" ); + } + + test_pass( __FILE__ ); + + return 0; + +} diff --git a/src/ctests/zero_fork.c b/src/ctests/zero_fork.c new file mode 100644 index 0000000..cb336c1 --- /dev/null +++ b/src/ctests/zero_fork.c @@ -0,0 +1,166 @@ +/* +* File: zero_fork.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: +* +*/ + +/* This file performs the following test: + + PAPI_library_init() + Add two events + PAPI_start() + fork() + / \ + parent child + | PAPI_library_init() + | Add two events + | PAPI_start() + | PAPI_stop() + | + fork()-----\ + | child + parent PAPI_library_init() + | Add two events + | PAPI_start() + | PAPI_stop() + | + wait() + wait() + | + PAPI_stop() + + No validation is done + */ + +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int EventSet1 = PAPI_NULL; +int PAPI_event, mask1; +int num_events1 = 2; +long long elapsed_us, elapsed_cyc; +long long **values; +char event_name[PAPI_MAX_STR_LEN]; +int retval, num_tests = 1; + +void +process_init( void ) +{ + if (!TESTS_QUIET) printf( "Process %d \n", ( int ) getpid( ) ); + + /* Initialize PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depends on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + + values = allocate_test_space( num_tests, num_events1 ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } +} + +void +process_fini( void ) +{ + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + remove_test_events( &EventSet1, mask1 ); + + if (!TESTS_QUIET) { + printf( "Process %d %-12s : \t%lld\n", ( int ) getpid( ), event_name, + values[0][1] ); + printf( "Process %d PAPI_TOT_CYC : \t%lld\n", ( int ) getpid( ), + values[0][0] ); + printf( "Process %d Real usec : \t%lld\n", ( int ) getpid( ), + elapsed_us ); + printf( "Process %d Real cycles : \t%lld\n", ( int ) getpid( ), + elapsed_cyc ); + } + + free_test_space( values, num_tests ); + +} + +int +main( int argc, char **argv ) +{ + int flops1; + int retval; + + tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ +# if (defined(__ALPHA) && defined(__osf__)) + test_skip( __FILE__, __LINE__, "main: fork not supported.", 0 ); +#endif + + if (!TESTS_QUIET) { + printf( "This tests if PAPI_library_init(),2*fork(),PAPI_library_init() works.\n" ); + } + /* Initialize PAPI for this process */ + process_init( ); + flops1 = 1000000; + if ( fork( ) == 0 ) { + /* Initialize PAPI for the child process */ + process_init( ); + /* Let the child process do work */ + do_flops( flops1 ); + /* Measure the child process */ + process_fini( ); + exit( 0 ); + } + flops1 = 2000000; + if ( fork( ) == 0 ) { + /* Initialize PAPI for the child process */ + process_init( ); + /* Let the child process do work */ + do_flops( flops1 ); + /* Measure the child process */ + process_fini( ); + exit( 0 ); + } + /* Let this process do work */ + flops1 = 4000000; + do_flops( flops1 ); + + /* Wait for child to finish */ + wait( &retval ); + /* Wait for child to finish */ + wait( &retval ); + + /* Measure this process */ + process_fini( ); + + test_pass( __FILE__ ); + return 0; +} diff --git a/src/ctests/zero_named.c b/src/ctests/zero_named.c new file mode 100644 index 0000000..40d655d --- /dev/null +++ b/src/ctests/zero_named.c @@ -0,0 +1,144 @@ +/* This test exercises the PAPI_{query, add, remove}_event APIs + for PRESET events. + It more or less duplicates the functionality of the classic "zero" test. +*/ + +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +int +main( int argc, char **argv ) +{ + int retval, num_tests = 1, tmp; + int EventSet = PAPI_NULL; + int num_events = 2; + long long **values; + long long elapsed_us, elapsed_cyc, elapsed_virt_us, elapsed_virt_cyc; + const char *event_names[] = {"PAPI_TOT_CYC","PAPI_TOT_INS"}; + char add_event_str[PAPI_MAX_STR_LEN]; + double cycles_error; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + /* Init the PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + /* Verify that the named events exist */ + retval = PAPI_query_named_event(event_names[0]); + if ( retval == PAPI_OK) { + retval = PAPI_query_named_event(event_names[1]); + } + if ( retval != PAPI_OK ) { + if (!quiet) printf("Trouble querying events\n"); + test_skip( __FILE__, __LINE__, "PAPI_query_named_event", retval ); + } + + /* Create an empty event set */ + retval = PAPI_create_eventset( &EventSet ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval ); + + /* add the events named above */ + retval = PAPI_add_named_event( EventSet, event_names[0] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + retval = PAPI_add_named_event( EventSet, event_names[1] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + values = allocate_test_space( num_tests, num_events ); + + /* Gather before stats */ + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + elapsed_virt_us = PAPI_get_virt_usec( ); + elapsed_virt_cyc = PAPI_get_virt_cyc( ); + + /* Start PAPI */ + retval = PAPI_start( EventSet ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + /* our test code */ + do_flops( NUM_FLOPS ); + + /* Stop PAPI */ + retval = PAPI_stop( EventSet, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + /* Calculate total values */ + elapsed_virt_us = PAPI_get_virt_usec( ) - elapsed_virt_us; + elapsed_virt_cyc = PAPI_get_virt_cyc( ) - elapsed_virt_cyc; + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + /* remove PAPI_TOT_CYC and PAPI_TOT_INS */ + retval = PAPI_remove_named_event( EventSet, event_names[0] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[0] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + retval = PAPI_remove_named_event( EventSet, event_names[1] ); + if ( retval != PAPI_OK ) { + sprintf( add_event_str, "PAPI_add_named_event[%s]", event_names[1] ); + test_fail( __FILE__, __LINE__, add_event_str, retval ); + } + + if ( !quiet ) { + printf( "PAPI_{query, add, remove}_named_event API test.\n" ); + printf( "-----------------------------------------------\n" ); + tmp = PAPI_get_opt( PAPI_DEFDOM, NULL ); + printf( "Default domain is: %d (%s)\n", tmp, + stringify_all_domains( tmp ) ); + tmp = PAPI_get_opt( PAPI_DEFGRN, NULL ); + printf( "Default granularity is: %d (%s)\n", tmp, + stringify_granularity( tmp ) ); + printf( "Using %d iterations of c += a*b\n", NUM_FLOPS ); + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Test type : \t 1\n" ); + + /* cycles is first, other event second */ + sprintf( add_event_str, "%-12s : \t", event_names[0] ); + printf( TAB1, add_event_str, values[0][0] ); + sprintf( add_event_str, "%-12s : \t", event_names[1] ); + printf( TAB1, add_event_str, values[0][1] ); + + printf( TAB1, "Real usec : \t", elapsed_us ); + printf( TAB1, "Real cycles : \t", elapsed_cyc ); + printf( TAB1, "Virt usec : \t", elapsed_virt_us ); + printf( TAB1, "Virt cycles : \t", elapsed_virt_cyc ); + + printf( "-------------------------------------------------------------------------\n" ); + + printf( "Verification: PAPI_TOT_CYC should be roughly real_cycles\n" ); + cycles_error=100.0*((double)values[0][0] - (double)elapsed_cyc)/ + (double)values[0][0]; + if (cycles_error>10.0) { + printf("Error of %.2f%%\n",cycles_error); + test_fail( __FILE__, __LINE__, "validation", 0 ); + } + + } + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/zero_omp.c b/src/ctests/zero_omp.c new file mode 100644 index 0000000..aa3e4d3 --- /dev/null +++ b/src/ctests/zero_omp.c @@ -0,0 +1,198 @@ +/* +* File: zero_omp.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: Nils Smeds +* smeds@pdc.kth.se +* Anders Nilsson +* anni@pdc.kth.se +*/ + +/* This file performs the following test: start, stop and timer +functionality for 2 slave OMP threads + + - It attempts to use the following two counters. It may use less +depending on hardware counter resource limitations. These are counted +in the default counting domain and default granularity, depending on +the platform. Usually this is the user domain (PAPI_DOM_USER) and +thread context (PAPI_GRN_THR). + + + PAPI_FP_INS + + PAPI_TOT_CYC + +Each thread inside the Thread routine: + - Get cyc. + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. + - Get cyc. + +Master serial thread: + - Get us. + - Get cyc. + - Run parallel for loop + - Get us. + - Get cyc. +*/ + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#ifdef _OPENMP +#include +#else +#error "This compiler does not understand OPENMP" +#endif + +const PAPI_hw_info_t *hw_info = NULL; + +void +Thread( int n ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int PAPI_event, mask1; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN]; + + if (!TESTS_QUIET) { + printf( "Thread %#x started\n", omp_get_thread_num( ) ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + if (num_events1==0) { + if (!TESTS_QUIET) printf("No events added!\n"); + test_fail(__FILE__,__LINE__,"No events",0); + } + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + values = allocate_test_space( num_tests, num_events1 ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + do_flops( n ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + remove_test_events( &EventSet1, mask1 ); + + if ( !TESTS_QUIET ) { + printf( "Thread %#x %-12s : \t%lld\n", omp_get_thread_num( ), + event_name, values[0][1] ); + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", omp_get_thread_num( ), + values[0][0] ); + printf( "Thread %#x Real usec : \t%lld\n", omp_get_thread_num( ), + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", omp_get_thread_num( ), + elapsed_cyc ); + } + + /* It is illegal for the threads to exit in OpenMP */ + /* test_pass(__FILE__,0,0); */ + free_test_space( values, num_tests ); + + PAPI_unregister_thread( ); + if (!TESTS_QUIET) { + printf( "Thread %#x finished\n", omp_get_thread_num( ) ); + } +} + +int +main( int argc, char **argv ) +{ + int retval; + long long elapsed_us, elapsed_cyc; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + hw_info = PAPI_get_hardware_info( ); + if ( hw_info == NULL ) { + test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); + } + + if (PAPI_query_event(PAPI_TOT_INS)!=PAPI_OK) { + if (!quiet) printf("Can't find PAPI_TOT_INS\n"); + test_skip(__FILE__,__LINE__,"Event missing",1); + } + + if (PAPI_query_event(PAPI_TOT_CYC)!=PAPI_OK) { + if (!quiet) printf("Can't find PAPI_TOT_CYC\n"); + test_skip(__FILE__,__LINE__,"Event missing",1); + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) + ( omp_get_thread_num ) ); + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) { + if (!quiet) printf("Trouble init threads\n"); + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + else { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + } +#pragma omp parallel + { + Thread( 1000000 * ( omp_get_thread_num( ) + 1 ) ); + } + omp_set_num_threads( 1 ); + Thread( 1000000 * ( omp_get_thread_num( ) + 1 ) ); + omp_set_num_threads( omp_get_max_threads( ) ); +#pragma omp parallel + { + Thread( 1000000 * ( omp_get_thread_num( ) + 1 ) ); + } + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + if ( !TESTS_QUIET ) { + printf( "Master real usec : \t%lld\n", elapsed_us ); + printf( "Master real cycles : \t%lld\n", elapsed_cyc ); + } + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/ctests/zero_pthreads.c b/src/ctests/zero_pthreads.c new file mode 100644 index 0000000..9e08d4d --- /dev/null +++ b/src/ctests/zero_pthreads.c @@ -0,0 +1,222 @@ +/* This file performs the following test: start, stop and timer +functionality for 2 slave pthreads + + - It attempts to use the following two counters. It may use less +depending on hardware counter resource limitations. These are counted +in the default counting domain and default granularity, depending on +the platform. Usually this is the user domain (PAPI_DOM_USER) and +thread context (PAPI_GRN_THR). + + + PAPI_FP_INS + + PAPI_TOT_CYC + +Each of 2 slave pthreads: + - Get cyc. + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. + - Get cyc. + +Master pthread: + - Get us. + - Get cyc. + - Fork threads + - Wait for threads to exit + - Get us. + - Get cyc. +*/ + +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +void * +Thread( void *arg ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int PAPI_event, mask1; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN]; + + retval = PAPI_register_thread( ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_register_thread", retval ); + } + + if (!TESTS_QUIET) { + printf( "Thread %#x started\n", ( int ) pthread_self( ) ); + } + + /* add PAPI_TOT_CYC and one of the events in + PAPI_FP_INS, PAPI_FP_OPS or PAPI_TOT_INS, + depending on the availability of the event + on the platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + if (!TESTS_QUIET) { + printf("Events %d\n",num_events1); + } + if (num_events1<2) { + test_fail( __FILE__, __LINE__, "Not enough events", retval ); + } + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + } + + values = allocate_test_space( num_tests, num_events1 ); + + elapsed_us = PAPI_get_real_usec( ); + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + } + + do_flops( *( int * ) arg ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + } + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + remove_test_events( &EventSet1, mask1 ); + + if ( !TESTS_QUIET ) { + printf( "Thread %#x %-12s : \t%lld\n", ( int ) pthread_self( ), + event_name, values[0][1] ); + printf( "Thread %#x PAPI_TOT_CYC : \t%lld\n", (int) pthread_self(), + values[0][0] ); + printf( "Thread %#x Real usec : \t%lld\n", + ( int ) pthread_self( ), + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", (int) pthread_self(), + elapsed_cyc ); + } + + free_test_space( values, num_tests ); + + retval = PAPI_unregister_thread( ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_unregister_thread", retval ); + return NULL; +} + +int +main( int argc, char **argv ) +{ + pthread_t e_th, f_th, g_th, h_th; + int flops1, flops2, flops3, flops4; + int retval, rc; + pthread_attr_t attr; + long long elapsed_us, elapsed_cyc; + int quiet; + + /* Set TESTS_QUIET variable */ + quiet = tests_quiet( argc, argv ); + + /* Init PAPI library */ + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + if (PAPI_query_event(PAPI_TOT_INS)!=PAPI_OK) { + if (!quiet) printf("Can't find PAPI_TOT_INS\n"); + test_skip(__FILE__,__LINE__,"Event missing",1); + } + + if (PAPI_query_event(PAPI_TOT_CYC)!=PAPI_OK) { + if (!quiet) printf("Can't find PAPI_TOT_CYC\n"); + test_skip(__FILE__,__LINE__,"Event missing",1); + } + + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) + ( pthread_self ) ); + + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) { + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + else { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + pthread_attr_init( &attr ); +#ifdef PTHREAD_CREATE_UNDETACHED + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_UNDETACHED ); +#endif +#ifdef PTHREAD_SCOPE_SYSTEM + retval = pthread_attr_setscope( &attr, PTHREAD_SCOPE_SYSTEM ); + if ( retval != 0 ) + test_skip( __FILE__, __LINE__, "pthread_attr_setscope", retval ); +#endif + + flops1 = 1000000; + rc = pthread_create( &e_th, &attr, Thread, ( void * ) &flops1 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + flops2 = 2000000; + rc = pthread_create( &f_th, &attr, Thread, ( void * ) &flops2 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + flops3 = 4000000; + rc = pthread_create( &g_th, &attr, Thread, ( void * ) &flops3 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + flops4 = 8000000; + rc = pthread_create( &h_th, &attr, Thread, ( void * ) &flops4 ); + if ( rc ) { + retval = PAPI_ESYS; + test_fail( __FILE__, __LINE__, "pthread_create", retval ); + } + + pthread_attr_destroy( &attr ); + flops1 = 500000; + Thread( &flops1 ); + pthread_join( h_th, NULL ); + pthread_join( g_th, NULL ); + pthread_join( f_th, NULL ); + pthread_join( e_th, NULL ); + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + if ( !quiet ) { + printf( "Master real usec : \t%lld\n", elapsed_us ); + printf( "Master real cycles : \t%lld\n", elapsed_cyc ); + } + + test_pass( __FILE__ ); + + pthread_exit( NULL ); + + return 0; +} diff --git a/src/ctests/zero_shmem.c b/src/ctests/zero_shmem.c new file mode 100644 index 0000000..4b6c123 --- /dev/null +++ b/src/ctests/zero_shmem.c @@ -0,0 +1,101 @@ +/* This code attempts to test that SHMEM works with PAPI */ +/* SHMEM was developed by Cray and supported by various */ +/* other vendors. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +void +Thread( int n ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int mask1 = 0x5; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + + EventSet1 = add_test_events( &num_events1, &mask1, 1 ); + + /* num_events1 is greater than num_events2 so don't worry. */ + + values = allocate_test_space( num_tests, num_events1 ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + retval = PAPI_start( EventSet1 ); + + /* we should indicate failure somehow, not just exit */ + if ( retval != PAPI_OK ) + exit( 1 ); + + do_flops( n ); + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + exit( 1 ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + remove_test_events( &EventSet1, mask1 ); + + printf( "Thread %#x PAPI_FP_INS : \t%lld\n", n / 1000000, + ( values[0] )[0] ); + printf( "Thread %#x PAPI_TOT_CYC: \t%lld\n", n / 1000000, + ( values[0] )[1] ); + printf( "Thread %#x Real usec : \t%lld\n", n / 1000000, + elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", n / 1000000, + elapsed_cyc ); + + free_test_space( values, num_tests ); +} + +int +main( int argc, char **argv ) +{ + int quiet; + long long elapsed_us, elapsed_cyc; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + +#ifdef HAVE_OPENSHMEM + /* Start 2 processing elements (SHMEM call) */ + start_pes( 2 ); + Thread( 1000000 * ( _my_pe( ) + 1 ) ); +#else + if (!quiet) { + printf("No OpenSHMEM support\n"); + } + test_skip( __FILE__, __LINE__, "OpenSHMEM support not found, skipping.", 0); +#endif + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + printf( "Master real usec : \t%lld\n", elapsed_us ); + printf( "Master real cycles : \t%lld\n", elapsed_cyc ); + + return 0; +} diff --git a/src/ctests/zero_smp.c b/src/ctests/zero_smp.c new file mode 100644 index 0000000..061bf14 --- /dev/null +++ b/src/ctests/zero_smp.c @@ -0,0 +1,176 @@ +/* This file performs the following test: start, stop and timer +functionality for 2 slave native SMP threads + + - It attempts to use the following two counters. It may use less +depending on hardware counter resource limitations. These are counted +in the default counting domain and default granularity, depending on +the platform. Usually this is the user domain (PAPI_DOM_USER) and +thread context (PAPI_GRN_THR). + + + PAPI_FP_INS + + PAPI_TOT_CYC + +Each of 2 slave pthreads: + - Get cyc. + - Get us. + - Start counters + - Do flops + - Stop and read counters + - Get us. + - Get cyc. + +Master pthread: + - Get us. + - Get cyc. + - Fork threads + - Wait for threads to exit + - Get us. + - Get cyc. +*/ + + +#include +#include + +#include "papi.h" +#include "papi_test.h" + +#include "do_loops.h" + +#if defined(sun) && defined(sparc) +#include +#elif defined(mips) && defined(sgi) && defined(unix) +#include +#elif defined(_AIX) || defined(__linux__) +#include +#endif + + +void +Thread( int t, int n ) +{ + int retval, num_tests = 1; + int EventSet1 = PAPI_NULL; + int PAPI_event, mask1; + int num_events1; + long long **values; + long long elapsed_us, elapsed_cyc; + char event_name[PAPI_MAX_STR_LEN]; + + /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or + PAPI_TOT_INS, depending on the availability of the event on the + platform */ + EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); + + retval = PAPI_event_code_to_name( PAPI_event, event_name ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); + + values = allocate_test_space( num_tests, num_events1 ); + + retval = PAPI_start( EventSet1 ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_start", retval ); + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + + do_flops( n ); + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + retval = PAPI_stop( EventSet1, values[0] ); + if ( retval != PAPI_OK ) + test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); + + remove_test_events( &EventSet1, mask1 ); + + if ( !TESTS_QUIET ) { + printf( "Thread %#x %-12s : \t%lld\n", t, event_name, + values[0][1] ); + printf( "Thread %#x PAPI_TOT_CYC : \t%lld\n", t, + values[0][0] ); + } + + free_test_space( values, num_tests ); + if ( !TESTS_QUIET ) { + printf( "Thread %#x Real usec : \t%lld\n", t, elapsed_us ); + printf( "Thread %#x Real cycles : \t%lld\n", t, elapsed_cyc ); + } + PAPI_unregister_thread( ); +} + +int +main( int argc, char **argv ) +{ + int i, retval, quiet; + long long elapsed_us, elapsed_cyc; + + /* Set TESTS_QUIET variable */ + quiet=tests_quiet( argc, argv ); + + retval = PAPI_library_init( PAPI_VER_CURRENT ); + if ( retval != PAPI_VER_CURRENT ) { + test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); + } + + elapsed_us = PAPI_get_real_usec( ); + + elapsed_cyc = PAPI_get_real_cyc( ); + +#if defined(_AIX) || defined(__linux__) + retval = + PAPI_thread_init( ( unsigned long ( * )( void ) ) ( pthread_self ) ); + if ( retval != PAPI_OK ) { + if ( retval == PAPI_ECMP ) + test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); + else + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } +#if defined(_AIX) +#pragma ibm parallel_loop +#endif + +#elif defined(sgi) && defined(mips) + retval = + PAPI_thread_init( ( unsigned long ( * )( void ) ) ( mp_my_threadnum ) ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } +#pragma parallel +#pragma local(i) +#pragma pfor +#elif defined(sun) && defined(sparc) + retval = PAPI_thread_init( ( unsigned long ( * )( void ) ) ( thr_self ) ); + if ( retval != PAPI_OK ) { + test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); + } +#pragma MP taskloop private(i) +#else + if (!quiet) { + printf("This test only runs on AIX/IRIX/SOLOARIS\n"); + } + test_skip(__FILE__, __LINE__, "Architecture not included in this test file yet.", 0); +#endif + for ( i = 1; i < 3; i++ ) { + Thread( i, 10000000 * i ); + } + + elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; + + elapsed_us = PAPI_get_real_usec( ) - elapsed_us; + + if ( !quiet ) { + printf( "Master real usec : \t%lld\n", elapsed_us ); + printf( "Master real cycles : \t%lld\n", elapsed_cyc ); + } + + // FIXME: we don't really validate anything here + + test_pass( __FILE__ ); + + return 0; +} diff --git a/src/darwin-common.c b/src/darwin-common.c new file mode 100644 index 0000000..d7ff0af --- /dev/null +++ b/src/darwin-common.c @@ -0,0 +1,422 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" + +#include "darwin-memory.h" +#include "darwin-common.h" + +#include "x86_cpuid_info.h" + +PAPI_os_info_t _papi_os_info; + +/* The locks used by Darwin */ + +#if defined(USE_PTHREAD_MUTEXES) +pthread_mutex_t _papi_hwd_lock_data[PAPI_MAX_LOCK]; +#else +volatile unsigned int _papi_hwd_lock_data[PAPI_MAX_LOCK]; +#endif + + +static int _darwin_init_locks(void) { + + int i; + + for ( i = 0; i < PAPI_MAX_LOCK; i++ ) { +#if defined(USE_PTHREAD_MUTEXES) + pthread_mutex_init(&_papi_hwd_lock_data[i],NULL); +#else + _papi_hwd_lock_data[i] = MUTEX_OPEN; +#endif + } + + return PAPI_OK; +} + + +int +_darwin_detect_hypervisor(char *virtual_vendor_name) { + + int retval=0; + +#if defined(__i386__)||defined(__x86_64__) + retval=_x86_detect_hypervisor(virtual_vendor_name); +#else + (void) virtual_vendor_name; +#endif + + return retval; +} + + +#define _PATH_SYS_SYSTEM "/sys/devices/system" +#define _PATH_SYS_CPU0 _PATH_SYS_SYSTEM "/cpu/cpu0" + +static char pathbuf[PATH_MAX] = "/"; + + +static char * +search_cpu_info( FILE * f, char *search_str, char *line ) +{ + /* This function courtesy of Rudolph Berrendorf! */ + /* See the home page for the German version of PAPI. */ + char *s; + + while ( fgets( line, 256, f ) != NULL ) { + if ( strstr( line, search_str ) != NULL ) { + /* ignore all characters in line up to : */ + for ( s = line; *s && ( *s != ':' ); ++s ); + if ( *s ) + return s; + } + } + return NULL; +} + + + +int +_darwin_get_cpu_info( PAPI_hw_info_t *hwinfo, int *cpuinfo_mhz ) +{ + + int mib[4]; + size_t len; + char buffer[BUFSIZ]; + long long ll; + + /* "sysctl -a" shows lots of info we can get on OSX */ + + /**********/ + /* Vendor */ + /**********/ + len = 3; + sysctlnametomib("machdep.cpu.vendor", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 3, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + strncpy( hwinfo->vendor_string,buffer,len); + + hwinfo->vendor = PAPI_VENDOR_INTEL; + + + /**************/ + /* Model Name */ + /**************/ + len = 3; + sysctlnametomib("machdep.cpu.brand_string", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 3, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + strncpy( hwinfo->model_string,buffer,len); + + /************/ + /* Revision */ + /************/ + len = 3; + sysctlnametomib("machdep.cpu.stepping", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 3, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->cpuid_stepping=buffer[0]; + hwinfo->revision=(float)(hwinfo->cpuid_stepping); + + /**********/ + /* Family */ + /**********/ + len = 3; + sysctlnametomib("machdep.cpu.family", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 3, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->cpuid_family=buffer[0]; + + /**********/ + /* Model */ + /**********/ + len = 3; + sysctlnametomib("machdep.cpu.model", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 3, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->cpuid_model=buffer[0]; + hwinfo->model=hwinfo->cpuid_model; + + /*************/ + /* Frequency */ + /*************/ + len = 2; + sysctlnametomib("hw.cpufrequency_max", mib, &len); + + len = 8; + if (sysctl(mib, 2, &ll, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->cpu_max_mhz=(int)(ll/(1000*1000)); + + len = 2; + sysctlnametomib("hw.cpufrequency_min", mib, &len); + + len = 8; + if (sysctl(mib, 2, &ll, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->cpu_min_mhz=(int)(ll/(1000*1000)); + + /**********/ + /* ncpu */ + /**********/ + len = 2; + sysctlnametomib("hw.ncpu", mib, &len); + + len = BUFSIZ; + if (sysctl(mib, 2, &buffer, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + hwinfo->totalcpus=buffer[0]; + + + return PAPI_OK; +} + + +int +_darwin_get_system_info( papi_mdi_t *mdi ) { + + int retval; + + char maxargs[PAPI_HUGE_STR_LEN]; + pid_t pid; + + int cpuinfo_mhz,sys_min_khz,sys_max_khz; + + /* Software info */ + + /* Path and args */ + + pid = getpid( ); + if ( pid < 0 ) { + PAPIERROR( "getpid() returned < 0" ); + return PAPI_ESYS; + } + mdi->pid = pid; + +#if 0 + sprintf( maxargs, "/proc/%d/exe", ( int ) pid ); + if ( readlink( maxargs, mdi->exe_info.fullname, PAPI_HUGE_STR_LEN ) < 0 ) { + PAPIERROR( "readlink(%s) returned < 0", maxargs ); + return PAPI_ESYS; + } + + /* Careful, basename can modify it's argument */ + + strcpy( maxargs, mdi->exe_info.fullname ); + strcpy( mdi->exe_info.address_info.name, basename( maxargs ) ); + + SUBDBG( "Executable is %s\n", mdi->exe_info.address_info.name ); + SUBDBG( "Full Executable is %s\n", mdi->exe_info.fullname ); + + /* Executable regions, may require reading /proc/pid/maps file */ + + retval = _darwin_update_shlib_info( mdi ); + SUBDBG( "Text: Start %p, End %p, length %d\n", + mdi->exe_info.address_info.text_start, + mdi->exe_info.address_info.text_end, + ( int ) ( mdi->exe_info.address_info.text_end - + mdi->exe_info.address_info.text_start ) ); + SUBDBG( "Data: Start %p, End %p, length %d\n", + mdi->exe_info.address_info.data_start, + mdi->exe_info.address_info.data_end, + ( int ) ( mdi->exe_info.address_info.data_end - + mdi->exe_info.address_info.data_start ) ); + SUBDBG( "Bss: Start %p, End %p, length %d\n", + mdi->exe_info.address_info.bss_start, + mdi->exe_info.address_info.bss_end, + ( int ) ( mdi->exe_info.address_info.bss_end - + mdi->exe_info.address_info.bss_start ) ); +#endif + /* PAPI_preload_option information */ + + strcpy( mdi->preload_info.lib_preload_env, "LD_PRELOAD" ); + mdi->preload_info.lib_preload_sep = ' '; + strcpy( mdi->preload_info.lib_dir_env, "LD_LIBRARY_PATH" ); + mdi->preload_info.lib_dir_sep = ':'; + + /* Hardware info */ + + retval = _darwin_get_cpu_info( &mdi->hw_info, &cpuinfo_mhz ); + if ( retval ) { + return retval; + } + + /* Set Up Memory */ + + retval = _darwin_get_memory_info( &mdi->hw_info, mdi->hw_info.model ); + if ( retval ) + return retval; + + SUBDBG( "Found %d %s(%d) %s(%d) CPUs at %d Mhz.\n", + mdi->hw_info.totalcpus, + mdi->hw_info.vendor_string, + mdi->hw_info.vendor, + mdi->hw_info.model_string, + mdi->hw_info.model, + mdi->hw_info.cpu_max_mhz); + + /* Get virtualization info */ + mdi->hw_info.virtualized=_darwin_detect_hypervisor(mdi->hw_info.virtual_vendor_string); + + return PAPI_OK; +} + +int +_papi_hwi_init_os(void) { + + int major=0,minor=0,sub=0; + char *ptr; + struct utsname uname_buffer; + + /* Initialize the locks */ + _darwin_init_locks(); + + /* Get the kernel info */ + uname(&uname_buffer); + + SUBDBG("Native kernel version %s\n",uname_buffer.release); + + strncpy(_papi_os_info.name,uname_buffer.sysname,PAPI_MAX_STR_LEN); + + strncpy(_papi_os_info.version,uname_buffer.release,PAPI_MAX_STR_LEN); + + ptr=strtok(_papi_os_info.version,"."); + if (ptr!=NULL) major=atoi(ptr); + + ptr=strtok(NULL,"."); + if (ptr!=NULL) minor=atoi(ptr); + + ptr=strtok(NULL,"."); + if (ptr!=NULL) sub=atoi(ptr); + + // _papi_os_info.os_version=LINUX_VERSION(major,minor,sub); + + _papi_os_info.itimer_sig = PAPI_INT_MPX_SIGNAL; + _papi_os_info.itimer_num = PAPI_INT_ITIMER; + _papi_os_info.itimer_ns = PAPI_INT_MPX_DEF_US * 1000; + _papi_os_info.itimer_res_ns = 1; + _papi_os_info.clock_ticks = sysconf( _SC_CLK_TCK ); + + /* Get Darwin-specific system info */ + _darwin_get_system_info( &_papi_hwi_system_info ); + + return PAPI_OK; +} + + +static inline long long +get_cycles( void ) +{ + long long ret = 0; +#ifdef __x86_64__ + do { + unsigned int a, d; + asm volatile ( "rdtsc":"=a" ( a ), "=d"( d ) ); + ( ret ) = ( ( long long ) a ) | ( ( ( long long ) d ) << 32 ); + } + while ( 0 ); +#else + __asm__ __volatile__( "rdtsc":"=A"( ret ): ); +#endif + return ret; +} + +long long +_darwin_get_real_cycles( void ) +{ + long long retval; + + retval = get_cycles( ); + + return retval; +} + + +long long +_darwin_get_real_usec_gettimeofday( void ) +{ + + long long retval; + + struct timeval buffer; + gettimeofday( &buffer, NULL ); + retval = ( long long ) buffer.tv_sec * ( long long ) 1000000; + retval += ( long long ) ( buffer.tv_usec ); + + return retval; +} + + +long long +_darwin_get_virt_usec_times( void ) +{ + + long long retval; + + struct tms buffer; + + times( &buffer ); + + SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime, + ( int ) buffer.tms_stime ); + retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * + 1000000 / sysconf( _SC_CLK_TCK )); + + /* NOT CLOCKS_PER_SEC as in the headers! */ + + return retval; +} + + + + + +papi_os_vector_t _papi_os_vector = { + .get_memory_info = _darwin_get_memory_info, + .get_dmem_info = _darwin_get_dmem_info, + .get_real_cycles = _darwin_get_real_cycles, + .update_shlib_info = _darwin_update_shlib_info, + .get_system_info = _darwin_get_system_info, + + .get_real_usec = _darwin_get_real_usec_gettimeofday, + .get_virt_usec = _darwin_get_virt_usec_times, + +}; diff --git a/src/darwin-common.h b/src/darwin-common.h new file mode 100644 index 0000000..4f2c322 --- /dev/null +++ b/src/darwin-common.h @@ -0,0 +1,20 @@ +#ifndef _DARWIN_COMMON_H +#define _DARWIN_COMMON_H + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +static inline pid_t +mygettid( void ) +{ + return pthread_self(); +} + +long long _darwin_get_real_cycles( void ); +long long _darwin_get_virt_usec_times( void ); +long long _darwin_get_real_usec_gettimeofday( void ); + +#endif diff --git a/src/darwin-context.h b/src/darwin-context.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/darwin-context.h diff --git a/src/darwin-lock.h b/src/darwin-lock.h new file mode 100644 index 0000000..ffdc635 --- /dev/null +++ b/src/darwin-lock.h @@ -0,0 +1,48 @@ +#ifndef _DARWIN_LOCK_H +#define _DARWIN_LOCK_H + +#include "mb.h" + +/* Locking functions */ + +#if defined(USE_PTHREAD_MUTEXES) + +#include + +extern pthread_mutex_t _papi_hwd_lock_data[PAPI_MAX_LOCK]; + +#define _papi_hwd_lock(lck) \ +do \ +{ \ + pthread_mutex_lock (&_papi_hwd_lock_data[lck]); \ +} while(0) +#define _papi_hwd_unlock(lck) \ +do \ +{ \ + pthread_mutex_unlock(&_papi_hwd_lock_data[lck]); \ +} while(0) + + +#else + +extern volatile unsigned int _papi_hwd_lock_data[PAPI_MAX_LOCK]; +#define MUTEX_OPEN 0 +#define MUTEX_CLOSED 1 + +#define _papi_hwd_lock(lck) \ +do \ +{ \ + unsigned int res = 0; \ + do { \ + __asm__ __volatile__ ("lock ; " "cmpxchg %1,%2" : "=a"(res) : "q"(MUTEX_CLOSED), "m"(_papi_hwd_lock_data[lck]), "0"(MUTEX_OPEN) : "memory"); \ + } while(res != (unsigned int)MUTEX_OPEN); \ +} while(0) +#define _papi_hwd_unlock(lck) \ +do \ +{ \ + unsigned int res = 0; \ + __asm__ __volatile__ ("xchg %0,%1" : "=r"(res) : "m"(_papi_hwd_lock_data[lck]), "0"(MUTEX_OPEN) : "memory"); \ +} while(0) + +#endif +#endif diff --git a/src/darwin-memory.c b/src/darwin-memory.c new file mode 100644 index 0000000..807d50d --- /dev/null +++ b/src/darwin-memory.c @@ -0,0 +1,81 @@ +#include +#include + +#include "papi.h" +#include "papi_internal.h" +#include "papi_memory.h" /* papi_calloc() */ + +#include "x86_cpuid_info.h" + +#include "darwin-lock.h" + +int +_darwin_get_dmem_info( PAPI_dmem_info_t * d ) +{ + + int mib[4]; + size_t len; + char buffer[BUFSIZ]; + long long ll; + + /**********/ + /* memory */ + /**********/ + len = 2; + sysctlnametomib("hw.memsize", mib, &len); + + len = 8; + if (sysctl(mib, 2, &ll, &len, NULL, 0) == -1) { + return PAPI_ESYS; + } + + d->size=ll; + + d->pagesize = getpagesize( ); + + return PAPI_OK; +} + +/* + * Architecture-specific cache detection code + */ + + +#if defined(__i386__)||defined(__x86_64__) +static int +x86_get_memory_info( PAPI_hw_info_t * hw_info ) +{ + int retval = PAPI_OK; + + switch ( hw_info->vendor ) { + case PAPI_VENDOR_AMD: + case PAPI_VENDOR_INTEL: + retval = _x86_cache_info( &hw_info->mem_hierarchy ); + break; + default: + PAPIERROR( "Unknown vendor in memory information call for x86." ); + return PAPI_ENOIMPL; + } + return retval; +} +#endif + + +int +_darwin_get_memory_info( PAPI_hw_info_t * hwinfo, int cpu_type ) +{ + ( void ) cpu_type; /*unused */ + int retval = PAPI_OK; + + x86_get_memory_info( hwinfo ); + + return retval; +} + +int +_darwin_update_shlib_info( papi_mdi_t *mdi ) +{ + + + return PAPI_OK; +} diff --git a/src/darwin-memory.h b/src/darwin-memory.h new file mode 100644 index 0000000..3108dc5 --- /dev/null +++ b/src/darwin-memory.h @@ -0,0 +1,4 @@ +int _darwin_get_dmem_info( PAPI_dmem_info_t * d ); +int _darwin_get_memory_info( PAPI_hw_info_t * hwinfo, int cpu_type ); +int _darwin_update_shlib_info( papi_mdi_t *mdi ); + diff --git a/src/event_data/power4/events b/src/event_data/power4/events new file mode 100644 index 0000000..654e798 --- /dev/null +++ b/src/event_data/power4/events @@ -0,0 +1,2207 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/power4/events +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ counter 1 } +#0,v,g,n,n,PM_BIQ_IDU_FULL_CYC,Cycles BIQ or IDU full +##0224,0824 +This signal will be asserted each time either the IDU is full or the BIQ is full. +#1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##0105,0605 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##0104,0604 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#3,u,g,n,n,PM_DC_PREF_L2_CLONE_L3,L2 prefetch cloned with L3 +##0C27 +A prefetch request was made to the L2 with a cloned request sent to the L3 +#4,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##0907 +A new Prefetch Stream was allocated +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##0905 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##0904 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##0101,0601 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##0003 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##0020 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##0000 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##0001 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##0002 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##0103,0603 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0023 +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##0021 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0022 +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##0007 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0024 +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##0004 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##0005 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##0006 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##0107,0607 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0027 +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0025 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0026 +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0100,0600 +The ISU sends a signal indicating the gct is full. +#27,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##0124,0624 +A group that previously attempted dispatch was rejected. +#28,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##0123,0623 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#29,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##0225,0825 +This signal is asserted when a prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#30,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##0226,0826 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#31,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##0227,0827 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#32,v,g,n,n,PM_INST_DISP,Instructions dispatched +##0121,0621 +The ISU sends the number of instructions dispatched. +#33,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##0223,0823 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#34,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##0901 +A SLB miss for an instruction fetch as occurred +#35,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##0900 +A TLB miss for an Instruction Fetch has occurred +#36,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##0C64 +The data source information is valid +#37,v,g,n,n,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##4007 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#38,v,g,n,n,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##4006 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##4005 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#40,v,g,n,n,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##4004 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##4023 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#42,v,g,n,n,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##4022 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##4021 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#44,v,g,n,n,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##4020 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#45,v,g,n,n,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##4027 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#46,v,g,n,n,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##4026 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#47,v,g,n,n,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##4025 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#48,v,g,n,n,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##4024 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#49,v,g,n,n,PM_L3B0_DIR_MIS,L3 bank 0 directory misses +##4001 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#50,v,g,n,n,PM_L3B0_DIR_REF,L3 bank 0 directory references +##4000 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#51,v,g,n,n,PM_L3B1_DIR_MIS,L3 bank 1 directory misses +##4003 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#52,v,g,n,n,PM_L3B1_DIR_REF,L3 bank 1 directory references +##4002 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#53,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##0106,0606 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#54,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##0902 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##0C02 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##0C03 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##0C00 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##0C01 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#59,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##0C20 +Data from a store instruction was forwarded to a load on unit 0 +#60,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##0906 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#61,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##0C06 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#62,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##0C07 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#63,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##0C04 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#64,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##0C05 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#65,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##0C24 +Data from a store instruction was forwarded to a load on unit 1 +#66,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##0927 +The LMQ was full +#67,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##0926 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##0C26 +LRQ slot zero was allocated +#69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##0C22 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#70,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##0C25 +SRQ Slot zero was allocated +#71,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##0C21 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##0922 +A DL1 reload occured due to marked load +#73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0920 +A marked load, executing on unit 0, missed the dcache +#74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0924 +A marked load, executing on unit 1, missed the dcache +#75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##0925 +A marked stcx (stwcx or stdcx) failed +#76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##0923 +A marked store missed the dcache +#77,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##0903 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#78,v,g,n,n,PM_STCX_FAIL,STCX failed +##0921 +A stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C23 +A store missed the dcache +#80,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##0102,0602 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#81,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#82,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 +##8C66 +DL1 was reloaded from the local L3 due to a demand load +#83,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data +##8020 +This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1 +#84,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction +##8000 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1 +#85,u,g,n,n,PM_GCT_EMPTY_CYC,Cycles GCT empty +##8004 +The Global Completion Table is completely empty +#86,c,g,n,n,PM_INST_CMPL,Instructions completed +##8001 +Number of Eligible Instructions that completed. +#87,v,g,n,n,PM_INST_FROM_MEM,Instruction fetched from memory +##8227 +An instruction fetch group was fetched from memory. Fetch Groups can contain up to 8 instructions +#88,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes +##8C00 +A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#89,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded +##8C20 +Data from a store instruction was forwarded to a load +#90,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 +##8C76 +DL1 was reloaded from the local L3 due to a marked demand load +#91,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched +##8002 +A group containing a sampled instruction was dispatched +#92,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses +##8920 +Marked L1 D cache load misses +#93,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed +##8003 +A sampled store has completed (data home) +#94,v,g,n,n,PM_RUN_CYC,Run cycles +##8005 +Processor Cycles gated by the run latch +$$$$ + +{ counter 2 } +#0,v,g,n,n,PM_BIQ_IDU_FULL_CYC,Cycles BIQ or IDU full +##0224,0824 +This signal will be asserted each time either the IDU is full or the BIQ is full. +#1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##0105,0605 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##0104,0604 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#3,u,g,n,n,PM_DC_PREF_L2_CLONE_L3,L2 prefetch cloned with L3 +##0C27 +A prefetch request was made to the L2 with a cloned request sent to the L3 +#4,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##0907 +A new Prefetch Stream was allocated +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##0905 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##0904 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##0101,0601 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##0003 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##0020 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##0000 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##0001 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##0002 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##0103,0603 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0023 +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##0021 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0022 +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##0007 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0024 +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##0004 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##0005 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##0006 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##0107,0607 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0027 +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0025 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0026 +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0100,0600 +The ISU sends a signal indicating the gct is full. +#27,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##0124,0624 +A group that previously attempted dispatch was rejected. +#28,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##0123,0623 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#29,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##0225,0825 +This signal is asserted when a prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#30,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##0226,0826 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#31,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##0227,0827 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#32,v,g,n,n,PM_INST_DISP,Instructions dispatched +##0121,0621 +The ISU sends the number of instructions dispatched. +#33,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##0223,0823 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#34,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##0901 +A SLB miss for an instruction fetch as occurred +#35,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##0900 +A TLB miss for an Instruction Fetch has occurred +#36,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##0C64 +The data source information is valid +#37,v,g,n,n,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##4007 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#38,v,g,n,n,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##4006 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##4005 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#40,v,g,n,n,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##4004 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##4023 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#42,v,g,n,n,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##4022 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##4021 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#44,v,g,n,n,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##4020 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#45,v,g,n,n,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##4027 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#46,v,g,n,n,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##4026 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#47,v,g,n,n,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##4025 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#48,v,g,n,n,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##4024 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#49,v,g,n,n,PM_L3B0_DIR_MIS,L3 bank 0 directory misses +##4001 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#50,v,g,n,n,PM_L3B0_DIR_REF,L3 bank 0 directory references +##4000 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#51,v,g,n,n,PM_L3B1_DIR_MIS,L3 bank 1 directory misses +##4003 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#52,v,g,n,n,PM_L3B1_DIR_REF,L3 bank 1 directory references +##4002 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#53,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##0106,0606 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#54,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##0902 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##0C02 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##0C03 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##0C00 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##0C01 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#59,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##0C20 +Data from a store instruction was forwarded to a load on unit 0 +#60,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##0906 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#61,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##0C06 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#62,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##0C07 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#63,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##0C04 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#64,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##0C05 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#65,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##0C24 +Data from a store instruction was forwarded to a load on unit 1 +#66,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##0927 +The LMQ was full +#67,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##0926 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##0C26 +LRQ slot zero was allocated +#69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##0C22 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#70,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##0C25 +SRQ Slot zero was allocated +#71,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##0C21 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##0922 +A DL1 reload occured due to marked load +#73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0920 +A marked load, executing on unit 0, missed the dcache +#74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0924 +A marked load, executing on unit 1, missed the dcache +#75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##0925 +A marked stcx (stwcx or stdcx) failed +#76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##0923 +A marked store missed the dcache +#77,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##0903 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#78,v,g,n,n,PM_STCX_FAIL,STCX failed +##0921 +A stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C23 +A store missed the dcache +#80,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##0102,0602 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#81,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#82,v,g,n,n,PM_DATA_FROM_MEM,Data loaded from memory +##8C66 +DL1 was reloaded from memory due to a demand load +#83,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction +##8000 +This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#84,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 +##8020 +FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1 +#85,v,g,n,n,PM_GRP_DISP,Group dispatches +##8004 +A group was dispatched +#86,c,g,n,n,PM_INST_FROM_L25_L275,Instruction fetched from L2.5/L2.75 +##8227 +An instruction fetch group was fetched from the L2 of another chip. Fetch Groups can contain up to 8 instructions +#87,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes +##8C00 +A store was flushed because it was unaligned +#88,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##8002 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#89,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished +##8005 +The branch unit finished a marked instruction. Instructions that finish may not necessary complete +#90,v,g,n,n,PM_MRK_DATA_FROM_MEM,Marked data loaded from memory +##8C76 +DL1 was reloaded from memory due to a marked demand load +#91,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout +##8003 +The threshold timer expired +#92,v,g,n,n,PM_WORK_HELD,Work held +##8001 +RAS Unit has signaled completion to stop and there are groups waiting to complete +$$$$ + +{ counter 3 } +#0,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##0450 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#1,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##0451 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##0452 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#3,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##0453 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##0454 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##0455 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_7INST_CLB_CYC,Cycles 7 instructions in CLB +##0456 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,v,g,n,n,PM_8INST_CLB_CYC,Cycles 8 instructions in CLB +##0457 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##0230,0830 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due CR bit setting +##0231,0831 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##0232,0832 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#11,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##0111,0611 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#12,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##0936 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#13,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##0C17 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#14,v,g,n,n,PM_DC_PREF_OUT_STREAMS,Out of prefetch streams +##0C36 +A new prefetch stream was detected, but no more stream entries were available +#15,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##0133,0633 +The number of Cycles MSR(EE) bit was off. +#16,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##0137,0637 +Cycles MSR(EE) bit off and external interrupt pending +#17,v,g,n,n,PM_FAB_CMD_ISSUED,Fabric command issued +##4016 +A bus command was issued on the MCM to MCM fabric from the local (this chip's) Fabric Bus Controller. This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#18,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##4017 +A bus command on the MCM to MCM fabric was retried. This event is the total count of all retried fabric commands for the local MCM (all four chips report the same value). This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#19,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##0930 +A floating point load was executed from LSU unit 0 +#20,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##0934 +A floating point load was executed from LSU unit 1 +#21,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##0012 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#22,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##0013 +fp0 finished, produced a result This only indicates finish, not completion. +#23,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##0010 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#24,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##0030 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#25,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##0011 +fThis signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#26,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##0016 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#27,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##0017 +fp1 finished, produced a result. This only indicates finish, not completion. +#28,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##0014 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#29,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##0015 +fThis signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#30,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##0110,0610 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#31,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##0132,0632 +The Fixed Point unit 0 finished an instruction and produced a result +#32,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##0136,0636 +The Fixed Point unit 1 finished an instruction and produced a result +#33,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##0135,0635 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#34,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##0131,0631 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#35,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##0C35 +A request to prefetch data into the L1 was made +#36,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##0233,0833 +This signal is asserted each cycle a cache write is active. +#37,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##4011 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#38,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##4010 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##4013 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#40,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##4012 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##4015 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#42,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##4014 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##0C34 +A request to prefetch data into L2 was made +#44,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##0C73 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#45,c,g,n,n,PM_LARX_LSU1,Larx executed on LSU1 +##0C77 +Invalid event, larx instructions are never executed on unit 1 +#46,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0C12 +A load, executing on unit 0, missed the dcache +#47,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0C16 +A load, executing on unit 1, missed the dcache +#48,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##0C10 +A load executed on unit 0 +#49,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##0C14 +A load executed on unit 1 +#50,v,g,n,n,PM_LSU0_BUSY,LSU0 busy +##0C33 +LSU unit 0 is busy rejecting instructions +#51,v,g,n,n,PM_LSU1_BUSY,LSU1 busy +##0C37 +LSU unit 1 is busy rejecting instructions +#52,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##0935 +The first entry in the LMQ was allocated. +#53,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##0931 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#54,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##0112,0612 +The isu sends this signal when the lrq is full. +#55,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##0113,0613 +The isu sends this signal when the srq is full. +#56,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##0932 +This signal is asserted every cycle when a sync is in the SRQ. +#57,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##0C74 +The source information is valid and is for a marked load +#58,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##0912 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#59,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##0913 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#60,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##0910 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#61,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##0911 +A marked store was flushed from unit 0 because it was unaligned +#62,c,g,n,n,PM_MRK_LSU0_INST_FIN,LSU0 finished a marked instruction +##0C31 +LSU unit 0 finished a marked instruction +#63,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##0916 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#64,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##0917 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#65,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##0914 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#66,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##0915 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#67,c,g,n,n,PM_MRK_LSU1_INST_FIN,LSU1 finished a marked instruction +##0C32 +LSU unit 1 finished a marked instruction +#68,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##0933 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#69,v,g,n,n,PM_STCX_PASS,Stcx passes +##0C75 +A stcx (stwcx or stdcx) instruction was successful +#70,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C13 +A store missed the dcache +#71,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##0C11 +A store executed on unit 0 +#72,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##0C15 +A store executed on unit 1 +#73,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#74,v,g,n,n,PM_DATA_FROM_L35,Data loaded from L3.5 +##8C66 +DL1 was reloaded from the L3 of another MCM due to a demand load +#75,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction +##8010 +This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. +#76,v,g,n,n,PM_FXU_FIN,FXU produced a result +##8130 +The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. +#77,v,g,n,n,PM_FXU_FIN,FXU produced a result +##8630 +The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. +#78,v,g,n,n,PM_INST_FROM_L2,Instructions fetched from L2 +##8227 +An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions +#79,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses +##8C10 +Total DL1 Load references that miss the DL1 +#80,v,g,n,n,PM_MRK_DATA_FROM_L35,Marked data loaded from L3.5 +##8C76 +DL1 was reloaded from the L3 of another MCM due to a marked demand load +#81,v,g,n,n,PM_MRK_LSU_FLUSH_LRQ,Marked LRQ flushes +##8910 +A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#82,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention +##8003 +A marked store previously sent to the memory subsystem completed (data home) after requiring intervention +#83,v,g,n,n,PM_STOP_COMPLETION,Completion stopped +##8001 +RAS Unit has signaled completion to stop +#84,v,g,n,n,PM_HV_CYC,Hypervisor Cycles +##8004 +Cycles when the processor is executing in Hypervisor (MSR[HV] = 0 and MSR[PR]=0) +#85,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##0114,0614 +The issue queue for FXU/LSU unit 1 cannot accept any more instructions. Issue is stopped +$$$$ + +{ counter 4 } +#0,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##0450 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#1,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##0451 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##0452 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#3,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##0453 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##0454 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##0455 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_7INST_CLB_CYC,Cycles 7 instructions in CLB +##0456 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,v,g,n,n,PM_8INST_CLB_CYC,Cycles 8 instructions in CLB +##0457 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##0230,0830 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due CR bit setting +##0231,0831 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##0232,0832 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#11,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##0111,0611 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#12,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##0936 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#13,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##0C17 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#14,v,g,n,n,PM_DC_PREF_OUT_STREAMS,Out of prefetch streams +##0C36 +A new prefetch stream was detected, but no more stream entries were available +#15,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##0133,0633 +The number of Cycles MSR(EE) bit was off. +#16,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##0137,0637 +Cycles MSR(EE) bit off and external interrupt pending +#17,v,g,n,n,PM_FAB_CMD_ISSUED,Fabric command issued +##4016 +A bus command was issued on the MCM to MCM fabric from the local (this chip's) Fabric Bus Controller. This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#18,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##4017 +A bus command on the MCM to MCM fabric was retried. This event is the total count of all retried fabric commands for the local MCM (all four chips report the same value). This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#19,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##0930 +A floating point load was executed from LSU unit 0 +#20,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##0934 +A floating point load was executed from LSU unit 1 +#21,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##0012 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#22,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##0013 +fp0 finished, produced a result This only indicates finish, not completion. +#23,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##0010 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#24,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##0030 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#25,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##0011 +fThis signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#26,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##0016 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#27,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##0017 +fp1 finished, produced a result. This only indicates finish, not completion. +#28,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##0014 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#29,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##0015 +fThis signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#30,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##0110,0610 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#31,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##0132,0632 +The Fixed Point unit 0 finished an instruction and produced a result +#32,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##0136,0636 +The Fixed Point unit 1 finished an instruction and produced a result +#33,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##0135,0635 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#34,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##0131,0631 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#35,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##0C35 +A request to prefetch data into the L1 was made +#36,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##0233,0833 +This signal is asserted each cycle a cache write is active. +#37,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##4011 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#38,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##4010 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##4013 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#40,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##4012 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##4015 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#42,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##4014 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##0C34 +A request to prefetch data into L2 was made +#44,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##0C73 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#45,c,g,n,n,PM_LARX_LSU1,Larx executed on LSU1 +##0C77 +Invalid event, larx instructions are never executed on unit 1 +#46,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0C12 +A load, executing on unit 0, missed the dcache +#47,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0C16 +A load, executing on unit 1, missed the dcache +#48,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##0C10 +A load executed on unit 0 +#49,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##0C14 +A load executed on unit 1 +#50,v,g,n,n,PM_LSU0_BUSY,LSU0 busy +##0C33 +LSU unit 0 is busy rejecting instructions +#51,v,g,n,n,PM_LSU1_BUSY,LSU1 busy +##0C37 +LSU unit 1 is busy rejecting instructions +#52,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##0935 +The first entry in the LMQ was allocated. +#53,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##0931 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#54,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##0112,0612 +The isu sends this signal when the lrq is full. +#55,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##0113,0613 +The isu sends this signal when the srq is full. +#56,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##0932 +This signal is asserted every cycle when a sync is in the SRQ. +#57,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##0C74 +The source information is valid and is for a marked load +#58,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##0912 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#59,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##0913 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#60,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##0910 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#61,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##0911 +A marked store was flushed from unit 0 because it was unaligned +#62,c,g,n,n,PM_MRK_LSU0_INST_FIN,LSU0 finished a marked instruction +##0C31 +LSU unit 0 finished a marked instruction +#63,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##0916 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#64,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##0917 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#65,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##0914 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#66,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##0915 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#67,c,g,n,n,PM_MRK_LSU1_INST_FIN,LSU1 finished a marked instruction +##0C32 +LSU unit 1 finished a marked instruction +#68,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##0933 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#69,v,g,n,n,PM_STCX_PASS,Stcx passes +##0C75 +A stcx (stwcx or stdcx) instruction was successful +#70,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C13 +A store missed the dcache +#71,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##0C11 +A store executed on unit 0 +#72,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##0C15 +A store executed on unit 1 +#73,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#74,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 +##8C66 +DL1 was reloaded from the local L2 due to a demand load +#75,v,g,n,n,PM_FPU_FIN,FPU produced a result +##8010 +FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1 +#76,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle +##8002 +FXU0 was idle while FXU1 was busy +#77,c,g,n,n,PM_INST_CMPL,Instructions completed +##8001 +Number of Eligible Instructions that completed. +#78,v,g,n,n,PM_INST_FROM_L35,Instructions fetched from L3.5 +##8227 +An instruction fetch group was fetched from the L3 of another module. Fetch Groups can contain up to 8 instructions +#79,v,g,n,n,PM_LARX,Larx executed +##8C70 +A Larx (lwarx or ldarx) was executed. This is the combined count from LSU0 + LSU1, but these instructions only execute on LSU0 +#80,v,g,n,n,PM_LSU_BUSY,LSU busy +##8C30 +LSU (unit 0 + unit 1) is busy rejecting instructions +#81,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty +##8003 +The Store Request Queue is empty +#82,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished +##8005 +The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete +#83,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 +##8C76 +DL1 was reloaded from the local L2 due to a marked demand load +#84,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed +##8004 +A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. +#85,v,g,n,n,PM_MRK_LSU_FLUSH_SRQ,Marked SRQ flushes +##8910 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#86,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##0114,0614 +The issue queue for FXU/LSU unit 1 cannot accept any more instructions. Issue is stopped +$$$$ + +{ counter 5 } +#0,v,g,n,n,PM_BIQ_IDU_FULL_CYC,Cycles BIQ or IDU full +##0224,0824 +This signal will be asserted each time either the IDU is full or the BIQ is full. +#1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##0105,0605 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##0104,0604 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#3,u,g,n,n,PM_DC_PREF_L2_CLONE_L3,L2 prefetch cloned with L3 +##0C27 +A prefetch request was made to the L2 with a cloned request sent to the L3 +#4,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##0907 +A new Prefetch Stream was allocated +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##0905 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##0904 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##0101,0601 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##0003 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##0020 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##0000 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##0001 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##0002 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##0103,0603 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0023 +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##0021 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0022 +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##0007 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0024 +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##0004 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##0005 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##0006 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##0107,0607 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0027 +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0025 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0026 +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0100,0600 +The ISU sends a signal indicating the gct is full. +#27,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##0124,0624 +A group that previously attempted dispatch was rejected. +#28,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##0123,0623 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#29,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##0225,0825 +This signal is asserted when a prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#30,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##0226,0826 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#31,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##0227,0827 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#32,v,g,n,n,PM_INST_DISP,Instructions dispatched +##0121,0621 +The ISU sends the number of instructions dispatched. +#33,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##0223,0823 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#34,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##0901 +A SLB miss for an instruction fetch as occurred +#35,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##0900 +A TLB miss for an Instruction Fetch has occurred +#36,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##0C64 +The data source information is valid +#37,v,g,n,n,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##4007 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#38,v,g,n,n,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##4006 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##4005 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#40,v,g,n,n,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##4004 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##4023 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#42,v,g,n,n,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##4022 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##4021 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#44,v,g,n,n,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##4020 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#45,v,g,n,n,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##4027 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#46,v,g,n,n,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##4026 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#47,v,g,n,n,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##4025 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#48,v,g,n,n,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##4024 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#49,v,g,n,n,PM_L3B0_DIR_MIS,L3 bank 0 directory misses +##4001 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#50,v,g,n,n,PM_L3B0_DIR_REF,L3 bank 0 directory references +##4000 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#51,v,g,n,n,PM_L3B1_DIR_MIS,L3 bank 1 directory misses +##4003 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#52,v,g,n,n,PM_L3B1_DIR_REF,L3 bank 1 directory references +##4002 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#53,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##0106,0606 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#54,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##0902 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##0C02 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##0C03 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##0C00 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##0C01 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#59,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##0C20 +Data from a store instruction was forwarded to a load on unit 0 +#60,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##0906 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#61,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##0C06 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#62,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##0C07 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#63,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##0C04 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#64,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##0C05 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#65,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##0C24 +Data from a store instruction was forwarded to a load on unit 1 +#66,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##0927 +The LMQ was full +#67,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##0926 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##0C26 +LRQ slot zero was allocated +#69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##0C22 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#70,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##0C25 +SRQ Slot zero was allocated +#71,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##0C21 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##0922 +A DL1 reload occured due to marked load +#73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0920 +A marked load, executing on unit 0, missed the dcache +#74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0924 +A marked load, executing on unit 1, missed the dcache +#75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##0925 +A marked stcx (stwcx or stdcx) failed +#76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##0923 +A marked store missed the dcache +#77,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##0903 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#78,v,g,n,n,PM_STCX_FAIL,STCX failed +##0921 +A stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C23 +A store missed the dcache +#80,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##0102,0602 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#81,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed +##8003 +A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. +#82,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#83,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared +##8C66 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load +#84,v,g,n,n,PM_FPU_ALL,FPU executed add, mult, sub, cmp or sel instruction +##8000 +This signal is active for one cycle when FPU is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo. Combined Unit 0 + Unit 1 +#85,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full +##8100 +Cycles when one or both FPU issue queues are full +#86,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full +##8600 +Cycles when one or both FPU issue queues are full +#87,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction +##8020 +FPU is executing single precision instruction. Combined Unit 0 + Unit 1 +#88,u,g,n,n,PM_FXU_IDLE,FXU idle +##8002 +FXU0 and FXU1 are both idle +#89,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success +##8001 +Number of groups sucessfully dispatched (not rejected) +#90,v,g,n,n,PM_GRP_MRK,Group marked in IDU +##8004 +A group was sampled (marked) +#91,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 +##8227 +An instruction fetch group was fetched from L3. Fetch Groups can contain up to 8 instructions +#92,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes +##8C00 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#93,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared +##8C76 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load +#94,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout +##8005 +The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor +$$$$ + +{ counter 6 } +#0,v,g,n,n,PM_BIQ_IDU_FULL_CYC,Cycles BIQ or IDU full +##0224,0824 +This signal will be asserted each time either the IDU is full or the BIQ is full. +#1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##0105,0605 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##0104,0604 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#3,u,g,n,n,PM_DC_PREF_L2_CLONE_L3,L2 prefetch cloned with L3 +##0C27 +A prefetch request was made to the L2 with a cloned request sent to the L3 +#4,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##0907 +A new Prefetch Stream was allocated +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##0905 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##0904 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##0101,0601 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##0003 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##0020 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##0000 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##0001 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##0002 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##0103,0603 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0023 +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##0021 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0022 +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##0007 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0024 +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##0004 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##0005 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##0006 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##0107,0607 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0027 +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0025 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0026 +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0100,0600 +The ISU sends a signal indicating the gct is full. +#27,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##0124,0624 +A group that previously attempted dispatch was rejected. +#28,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##0123,0623 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#29,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##0225,0825 +This signal is asserted when a prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#30,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##0226,0826 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#31,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##0227,0827 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#32,v,g,n,n,PM_INST_DISP,Instructions dispatched +##0121,0621 +The ISU sends the number of instructions dispatched. +#33,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##0223,0823 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#34,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##0901 +A SLB miss for an instruction fetch as occurred +#35,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##0900 +A TLB miss for an Instruction Fetch has occurred +#36,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##0C64 +The data source information is valid +#37,v,g,n,n,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##4007 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#38,v,g,n,n,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##4006 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##4005 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#40,v,g,n,n,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##4004 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##4023 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#42,v,g,n,n,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##4022 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##4021 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#44,v,g,n,n,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##4020 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#45,v,g,n,n,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##4027 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#46,v,g,n,n,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##4026 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#47,v,g,n,n,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##4025 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#48,v,g,n,n,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##4024 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#49,v,g,n,n,PM_L3B0_DIR_MIS,L3 bank 0 directory misses +##4001 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#50,v,g,n,n,PM_L3B0_DIR_REF,L3 bank 0 directory references +##4000 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#51,v,g,n,n,PM_L3B1_DIR_MIS,L3 bank 1 directory misses +##4003 +A reference was made to the local L3 directory by a local CPU and it missed in the L3. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#52,v,g,n,n,PM_L3B1_DIR_REF,L3 bank 1 directory references +##4002 +A reference was made to the local L3 directory by a local CPU. Only requests from on-MCM CPUs are counted. This event is scaled to the L3 speed and the count must be scaled. i.e. if the L3 is running 3:1, divide the count by 3 +#53,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##0106,0606 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#54,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##0902 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##0C02 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##0C03 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##0C00 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##0C01 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#59,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##0C20 +Data from a store instruction was forwarded to a load on unit 0 +#60,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##0906 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#61,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##0C06 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#62,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##0C07 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#63,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##0C04 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#64,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##0C05 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#65,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##0C24 +Data from a store instruction was forwarded to a load on unit 1 +#66,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##0927 +The LMQ was full +#67,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##0926 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##0C26 +LRQ slot zero was allocated +#69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##0C22 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#70,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##0C25 +SRQ Slot zero was allocated +#71,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##0C21 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##0922 +A DL1 reload occured due to marked load +#73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0920 +A marked load, executing on unit 0, missed the dcache +#74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0924 +A marked load, executing on unit 1, missed the dcache +#75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##0925 +A marked stcx (stwcx or stdcx) failed +#76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##0923 +A marked store missed the dcache +#77,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##0903 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#78,v,g,n,n,PM_STCX_FAIL,STCX failed +##0921 +A stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C23 +A store missed the dcache +#80,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##0102,0602 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#81,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#82,v,g,n,n,PM_DATA_FROM_L275_SHR,Data loaded from L2.75 shared +##8C66 +DL1 was reloaded with shared (T) data from the L2 of another MCM due to a demand load +#83,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction +##8000 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#84,v,g,n,n,PM_FPU_STF,FPU executed store instruction +##8020 +FPU is executing a store instruction. Combined Unit 0 + Unit 1 +#85,u,g,n,n,PM_FXU_BUSY,FXU busy +##8002 +FXU0 and FXU1 are both busy +#86,c,g,n,n,PM_INST_CMPL,Instructions completed +##8001 +Number of Eligible Instructions that completed. +#87,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 +##8227 +An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions +#88,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses +##8900 +Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#89,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes +##8C00 +A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#90,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR,Marked data loaded from L2.75 shared +##8C76 +DL1 was reloaded with shared (T) data from the L2 of another MCM due to a marked demand load +#91,v,g,n,n,PM_MRK_FXU_FIN,Marked instruction FXU processing finished +##8004 +One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete +#92,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued +##8005 +A sampled instruction was issued +#93,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS +##8003 +A sampled store has been sent to the memory subsystem +$$$$ + +{ counter 7 } +#0,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##0450 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#1,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##0451 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##0452 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#3,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##0453 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##0454 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##0455 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_7INST_CLB_CYC,Cycles 7 instructions in CLB +##0456 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,v,g,n,n,PM_8INST_CLB_CYC,Cycles 8 instructions in CLB +##0457 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##0230,0830 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due CR bit setting +##0231,0831 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##0232,0832 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#11,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##0111,0611 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#12,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##0936 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#13,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##0C17 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#14,v,g,n,n,PM_DC_PREF_OUT_STREAMS,Out of prefetch streams +##0C36 +A new prefetch stream was detected, but no more stream entries were available +#15,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##0133,0633 +The number of Cycles MSR(EE) bit was off. +#16,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##0137,0637 +Cycles MSR(EE) bit off and external interrupt pending +#17,v,g,n,n,PM_FAB_CMD_ISSUED,Fabric command issued +##4016 +A bus command was issued on the MCM to MCM fabric from the local (this chip's) Fabric Bus Controller. This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#18,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##4017 +A bus command on the MCM to MCM fabric was retried. This event is the total count of all retried fabric commands for the local MCM (all four chips report the same value). This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#19,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##0930 +A floating point load was executed from LSU unit 0 +#20,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##0934 +A floating point load was executed from LSU unit 1 +#21,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##0012 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#22,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##0013 +fp0 finished, produced a result This only indicates finish, not completion. +#23,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##0010 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#24,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##0030 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#25,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##0011 +fThis signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#26,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##0016 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#27,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##0017 +fp1 finished, produced a result. This only indicates finish, not completion. +#28,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##0014 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#29,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##0015 +fThis signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#30,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##0110,0610 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#31,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##0132,0632 +The Fixed Point unit 0 finished an instruction and produced a result +#32,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##0136,0636 +The Fixed Point unit 1 finished an instruction and produced a result +#33,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##0135,0635 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#34,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##0131,0631 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#35,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##0C35 +A request to prefetch data into the L1 was made +#36,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##0233,0833 +This signal is asserted each cycle a cache write is active. +#37,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##4011 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#38,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##4010 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##4013 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#40,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##4012 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##4015 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#42,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##4014 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##0C34 +A request to prefetch data into L2 was made +#44,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##0C73 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#45,c,g,n,n,PM_LARX_LSU1,Larx executed on LSU1 +##0C77 +Invalid event, larx instructions are never executed on unit 1 +#46,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0C12 +A load, executing on unit 0, missed the dcache +#47,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0C16 +A load, executing on unit 1, missed the dcache +#48,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##0C10 +A load executed on unit 0 +#49,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##0C14 +A load executed on unit 1 +#50,v,g,n,n,PM_LSU0_BUSY,LSU0 busy +##0C33 +LSU unit 0 is busy rejecting instructions +#51,v,g,n,n,PM_LSU1_BUSY,LSU1 busy +##0C37 +LSU unit 1 is busy rejecting instructions +#52,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##0935 +The first entry in the LMQ was allocated. +#53,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##0931 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#54,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##0112,0612 +The isu sends this signal when the lrq is full. +#55,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##0113,0613 +The isu sends this signal when the srq is full. +#56,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##0932 +This signal is asserted every cycle when a sync is in the SRQ. +#57,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##0C74 +The source information is valid and is for a marked load +#58,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##0912 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#59,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##0913 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#60,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##0910 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#61,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##0911 +A marked store was flushed from unit 0 because it was unaligned +#62,c,g,n,n,PM_MRK_LSU0_INST_FIN,LSU0 finished a marked instruction +##0C31 +LSU unit 0 finished a marked instruction +#63,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##0916 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#64,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##0917 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#65,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##0914 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#66,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##0915 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#67,c,g,n,n,PM_MRK_LSU1_INST_FIN,LSU1 finished a marked instruction +##0C32 +LSU unit 1 finished a marked instruction +#68,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##0933 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#69,v,g,n,n,PM_STCX_PASS,Stcx passes +##0C75 +A stcx (stwcx or stdcx) instruction was successful +#70,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C13 +A store missed the dcache +#71,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##0C11 +A store executed on unit 0 +#72,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##0C15 +A store executed on unit 1 +#73,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#74,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified +##8C66 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a demand load. +#75,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions +##8010 +This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#76,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle +##8002 +FXU0 is busy while FXU1 was idle +#77,v,g,n,n,PM_GRP_CMPL,Group completed +##8003 +A group completed. Microcoded instructions that span multiple groups will generate this event once per group. +#78,c,g,n,n,PM_INST_CMPL,Instructions completed +##8001 +Number of Eligible Instructions that completed. +#79,v,g,n,n,PM_INST_FROM_PREF,Instructions fetched from prefetch +##8227 +An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions +#80,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified +##8C76 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a marked demand load. +#81,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished +##8004 +One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete +#82,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished +##8005 +One of the execution units finished a marked instruction. Instructions that finish may not necessary complete +#83,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes +##8910 +A marked store was flushed because it was unaligned +#84,v,g,n,n,PM_ST_REF_L1,L1 D cache store references +##8C10 +Total DL1 Store references +#85,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##0114,0614 +The issue queue for FXU/LSU unit 1 cannot accept any more instructions. Issue is stopped +$$$$ + +{ counter 8 } +#0,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##0450 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#1,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##0451 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##0452 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#3,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##0453 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##0454 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##0455 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_7INST_CLB_CYC,Cycles 7 instructions in CLB +##0456 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,v,g,n,n,PM_8INST_CLB_CYC,Cycles 8 instructions in CLB +##0457 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##0230,0830 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due CR bit setting +##0231,0831 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##0232,0832 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#11,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##0111,0611 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#12,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##0936 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#13,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##0C17 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#14,v,g,n,n,PM_DC_PREF_OUT_STREAMS,Out of prefetch streams +##0C36 +A new prefetch stream was detected, but no more stream entries were available +#15,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##0133,0633 +The number of Cycles MSR(EE) bit was off. +#16,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##0137,0637 +Cycles MSR(EE) bit off and external interrupt pending +#17,v,g,n,n,PM_FAB_CMD_ISSUED,Fabric command issued +##4016 +A bus command was issued on the MCM to MCM fabric from the local (this chip's) Fabric Bus Controller. This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#18,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##4017 +A bus command on the MCM to MCM fabric was retried. This event is the total count of all retried fabric commands for the local MCM (all four chips report the same value). This event is scaled to the fabric frequency and must be adjusted for a true count. i.e. if the fabric is running 2:1, divide the count by 2. +#19,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##0930 +A floating point load was executed from LSU unit 0 +#20,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##0934 +A floating point load was executed from LSU unit 1 +#21,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##0012 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#22,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##0013 +fp0 finished, produced a result This only indicates finish, not completion. +#23,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##0010 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#24,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##0030 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#25,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##0011 +fThis signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#26,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##0016 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#27,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##0017 +fp1 finished, produced a result. This only indicates finish, not completion. +#28,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##0014 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#29,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##0015 +fThis signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#30,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##0110,0610 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#31,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##0132,0632 +The Fixed Point unit 0 finished an instruction and produced a result +#32,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##0136,0636 +The Fixed Point unit 1 finished an instruction and produced a result +#33,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##0135,0635 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#34,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##0131,0631 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#35,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##0C35 +A request to prefetch data into the L1 was made +#36,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##0233,0833 +This signal is asserted each cycle a cache write is active. +#37,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##4011 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#38,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##4010 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#39,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##4013 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#40,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##4012 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#41,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##4015 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#42,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##4014 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#43,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##0C34 +A request to prefetch data into L2 was made +#44,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##0C73 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#45,c,g,n,n,PM_LARX_LSU1,Larx executed on LSU1 +##0C77 +Invalid event, larx instructions are never executed on unit 1 +#46,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##0C12 +A load, executing on unit 0, missed the dcache +#47,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##0C16 +A load, executing on unit 1, missed the dcache +#48,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##0C10 +A load executed on unit 0 +#49,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##0C14 +A load executed on unit 1 +#50,v,g,n,n,PM_LSU0_BUSY,LSU0 busy +##0C33 +LSU unit 0 is busy rejecting instructions +#51,v,g,n,n,PM_LSU1_BUSY,LSU1 busy +##0C37 +LSU unit 1 is busy rejecting instructions +#52,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##0935 +The first entry in the LMQ was allocated. +#53,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##0931 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#54,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##0112,0612 +The isu sends this signal when the lrq is full. +#55,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##0113,0613 +The isu sends this signal when the srq is full. +#56,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##0932 +This signal is asserted every cycle when a sync is in the SRQ. +#57,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##0C74 +The source information is valid and is for a marked load +#58,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##0912 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#59,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##0913 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#60,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##0910 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#61,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##0911 +A marked store was flushed from unit 0 because it was unaligned +#62,c,g,n,n,PM_MRK_LSU0_INST_FIN,LSU0 finished a marked instruction +##0C31 +LSU unit 0 finished a marked instruction +#63,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##0916 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#64,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##0917 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#65,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##0914 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#66,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##0915 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#67,c,g,n,n,PM_MRK_LSU1_INST_FIN,LSU1 finished a marked instruction +##0C32 +LSU unit 1 finished a marked instruction +#68,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##0933 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#69,v,g,n,n,PM_STCX_PASS,Stcx passes +##0C75 +A stcx (stwcx or stdcx) instruction was successful +#70,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##0C13 +A store missed the dcache +#71,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##0C11 +A store executed on unit 0 +#72,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##0C15 +A store executed on unit 1 +#73,v,g,n,n,PM_0INST_FETCH,No instructions fetched +##8227 +No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) +#74,v,g,n,n,PM_CYC,Processor cycles +##800F +Processor cycles +#75,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##8C66 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load +#76,v,g,n,n,PM_EXT_INT,External interrupts +##8002 +An external interrupt occurred +#77,v,g,n,n,PM_FPU_FMOV_FEST,FPU executing FMOV or FEST instructions +##8010 +This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1 +#78,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction +##8930 +LSU executed Floating Point load instruction +#79,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full +##8110 +Cycles when one or both FXU/LSU issue queue are full +#80,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##8003 +A group that previously attempted dispatch was rejected. +#81,c,g,n,n,PM_INST_CMPL,Instructions completed +##8001 +Number of Eligible Instructions that completed. +#82,v,g,n,n,PM_LD_REF_L1,L1 D cache load references +##8C10 +Total DL1 Load references +#83,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##8C76 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load +#84,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished +##8004 +One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete +#85,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes +##8910 +A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#86,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition +##8005 +When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 +#87,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##0114,0614 +The issue queue for FXU/LSU unit 1 cannot accept any more instructions. Issue is stopped diff --git a/src/event_data/power4/groups b/src/event_data/power4/groups new file mode 100644 index 0000000..3d0d681 --- /dev/null +++ b/src/event_data/power4/groups @@ -0,0 +1,332 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/power4/groups +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ Number of groups + 63 + +{ Group descriptions + +#0,94,81,83,77,81,81,77,80,pm_slice0,Time Slice 0 +##8005,800F,8001,8001,8003,800F,8003,8003 +00000D0E,00000000,4A5675AC,00022000 +Time Slice 0 + +#1,81,81,79,13,32,86,84,82,pm_eprof,Group for use with eprof +##800F,800F,8C10,0C17,0621,8001,8C10,8C10 +0000070E,10034000,45F29420,00002001 +Group for use with eprof + +#2,86,81,79,13,32,86,84,82,pm_basic,Basic performance indicators +##8001,800F,8C10,0C17,0621,8001,8C10,8C10 +0000090E,10034000,45F29420,00002000 +Basic performance indicators + +#3,86,0,8,9,33,81,10,36,pm_ifu,IFU events +##8001,0224,0230,0231,0223,800F,0232,0233 +00000938,80000000,C6767D6C,00022000 +IFU events + +#4,7,1,33,77,86,26,73,79,pm_isu,ISU Queue full events +##0601,0605,0635,8001,8600,0600,800F,8110 +0000112A,50041000,EA5103A0,00002000 +ISU Queue full events + +#5,82,82,74,74,83,82,74,75,pm_lsource,Information on data source +##8C66,8C66,8C66,8C66,8C66,8C66,8C66,8C66 +00000E1C,0010C000,739CE738,00002000 +Information on data source + +#6,87,86,78,78,91,87,79,73,pm_isource,Instruction Source information +##8227,8227,8227,8227,8227,8227,8227,8227 +00000F1E,80000000,7BDEF7BC,00022000 +Instruction Source information + +#7,88,87,73,77,92,89,84,82,pm_lsu,Information on the Load Store Unit +##8C00,8C00,800F,8001,8C00,8C00,8C10,8C10 +00000810,000F0000,3A508420,00002000 +Information on the Load Store Unit + +#8,35,6,12,53,31,88,78,74,pm_xlate1,Translation Events +##0900,0904,0936,0931,0227,8900,8001,800F +00001028,81082000,F67E849C,00022000 +Translation Events + +#9,34,5,56,52,31,88,78,74,pm_xlate2,Translation Events +##0901,0905,0932,0935,0227,8900,8001,800F +0000112A,81082000,D77E849C,00022000 +Translation Events + +#10,50,49,17,18,52,51,78,74,pm_gps1,L3 Events +##4000,4001,4016,4017,4002,4003,8001,800F +00001022,00000C00,B5E5349C,00022000 +L3 Events + +#11,38,39,38,37,40,37,78,74,pm_l2a,L2 SliceA events +##4006,4005,4010,4011,4004,4007,8001,800F +0000162A,00000C00,8469749C,00022000 +L2 SliceA events + +#12,42,43,40,39,44,41,78,74,pm_l2b,L2 SliceB events +##4022,4021,4012,4013,4020,4023,8001,800F +00001A32,00000600,94F1B49C,00022000 +L2 SliceB events + +#13,46,47,42,41,48,45,78,74,pm_l2c,L2 SliceC events +##4026,4025,4014,4015,4024,4027,8001,800F +00001E3A,00000600,A579F49C,00022000 +L2 SliceC events + +#14,84,83,75,75,82,83,78,77,pm_fpu1,Floating Point events +##8000,8000,8010,8010,800F,8000,8001,8010 +00000810,00000000,420E84A0,00002000 +Floating Point events + +#15,83,84,73,77,84,84,75,78,pm_fpu2,Floating Point events +##8020,8020,800F,8001,8000,8020,8010,8930 +00000810,010020E8,3A508420,00002000 +Floating Point events + +#16,86,81,0,1,81,81,2,3,pm_idu1,Instruction Decode Unit events +##8001,800F,0450,0451,8003,800F,0452,0453 +0000090E,04010000,8456794C,00022000 +Instruction Decode Unit events + +#17,86,81,4,5,89,81,6,7,pm_idu2,Instruction Decode Unit events +##8001,800F,0454,0455,8001,800F,0456,0457 +0000090E,04010000,A5527B5C,00022000 +Instruction Decode Unit events + +#18,80,2,11,34,53,32,78,74,pm_isu_rename,ISU Rename Pool Events +##0602,0604,0611,0631,0606,0621,8001,800F +00001228,10055000,8E6D949C,00022000 +ISU Rename Pool Events + +#19,13,22,30,30,82,86,54,55,pm_isu_queues1,ISU Queue Full Events +##0603,0607,0610,0614,800F,8001,0612,0613 +0000132E,10050000,850E994C,00022000 +ISU Queue Full Events + +#20,32,81,31,32,28,27,78,74,pm_isu_flow,ISU Instruction Flow Events +##0621,800F,0632,0636,0623,0624,8001,800F +0000190E,10005000,D7B7C49C,00022000 +ISU Instruction Flow Events + +#21,85,92,83,16,82,86,15,76,pm_isu_work,ISU Indicators of Work Blockage +##8004,8001,8001,0637,800F,8001,0633,8002 +00000C12,10001000,4FCE9DA8,00002000 +ISU Indicators of Work Blockage + +#22,77,78,69,73,81,86,44,45,pm_serialize,LSU Serializing Events +##0903,0921,0C75,800F,8003,8001,0C73,0C77 +00001332,0118B000,E9D69DFC,00022000 +LSU Serializing Events + +#23,71,70,50,51,69,68,78,74,pm_lsubusy,LSU Busy Events +##0C21,0C25,0C33,0C37,0C22,0C26,8001,800F +0000193A,0000F000,DFF5E49C,00022000 +LSU Busy Events + +#24,86,36,73,74,83,82,74,75,pm_lsource2,Information on data source +##8001,0C64,800F,8C66,8C66,8C66,8C66,8C66 +00000938,0010C000,3B9CE738,00002000 +Information on data source + +#25,82,82,74,74,36,81,74,81,pm_lsource3,Information on data source +##8C66,8C66,8C66,8C66,0C64,800F,8C66,8001 +00000E1C,0010C000,73B87724,00022000 +Information on data source + +#26,86,81,78,78,91,87,79,73,pm_isource2,Instruction Source information +##8001,800F,8227,8227,8227,8227,8227,8227 +0000090E,80000000,7BDEF7BC,00022000 +Instruction Source information + +#27,87,86,78,78,91,87,73,81,pm_isource3,Instruction Source information +##8227,8227,8227,8227,8227,8227,800F,8001 +00000F1E,80000000,7BDEF3A4,00022000 +Instruction Source information + +#28,10,19,25,29,11,20,78,74,pm_fpu3,Floating Point events by unit +##0000,0004,0011,0015,0001,0005,8001,800F +00001028,00000000,8D63549C,00022000 +Floating Point events by unit + +#29,12,21,22,27,8,17,78,74,pm_fpu4,Floating Point events by unit +##0002,0006,0013,0017,0003,0007,8001,800F +0000122C,00000000,9DE7749C,00022000 +Floating Point events by unit + +#30,9,18,23,28,82,86,21,26,pm_fpu5,Floating Point events by unit +##0020,0024,0010,0014,800F,8001,0012,0016 +00001838,00000000,850E9958,00002000 +Floating Point events by unit + +#31,14,23,19,20,16,25,73,81,pm_fpu6,Floating Point events by unit +##0023,0027,0930,0934,0022,0026,800F,8001 +00001B3E,01002000,C735E3A4,00022000 +Floating Point events by unit + +#32,15,24,22,27,82,86,73,24,pm_fpu7,Floating Point events by unit +##0021,0025,0013,0017,800F,8001,800F,0030 +0000193A,00000000,9DCE93E0,00002000 +Floating Point events by unit + +#33,86,81,76,76,88,85,76,79,pm_fxu,Fix Point Unit events +##8001,800F,8130,8002,8002,8002,8002,8110 +0000090E,40000002,4294A520,00002000 +Fix Point Unit events + +#34,67,66,52,53,82,86,56,12,pm_lsu_lmq,LSU Load Miss Queue Events +##0926,0927,0935,0931,800F,8001,0932,0936 +00001E3E,0100A000,EE4E9D78,00002000 +LSU Load Miss Queue Events + +#35,55,61,73,73,56,62,78,74,pm_lsu_flush,LSU Flush Events +##0C02,0C06,800F,800F,0C03,0C07,8001,800F +0000122C,000C0000,39E7749C,00022000 +LSU Flush Events + +#36,57,63,48,49,82,86,46,47,pm_lsu_load1,LSU Load Events +##0C00,0C04,0C10,0C14,800F,8001,0C12,0C16 +00001028,000F0000,850E9958,00002000 +LSU Load Events + +#37,58,64,71,72,82,86,70,13,pm_lsu_store1,LSU Store Events +##0C01,0C05,0C11,0C15,800F,8001,0C13,0C17 +0000112A,000F0000,8D4E99DC,00022000 +LSU Store Events + +#38,59,65,71,72,79,81,78,74,pm_lsu_store2,LSU Store Events +##0C20,0C24,0C11,0C15,0C23,800F,8001,800F +00001838,0003C000,8D76749C,00022000 +LSU Store Events + +#39,54,60,73,73,36,81,78,74,pm_lsu7,Information on the Load Store Unit +##0902,0906,800F,800F,0C64,800F,8001,800F +0000122C,0118C000,39F8749C,00022000 +Information on the Load Store Unit + +#40,4,3,43,35,82,86,73,14,pm_dpfetch,Data Prefetch Events +##0907,0C27,0C34,0C35,800F,8001,800F,0C36 +0000173E,0108F000,E74E93F8,00002000 +Data Prefetch Events + +#41,85,88,84,73,81,86,77,86,pm_misc,Misc Events for testing +##8004,8002,8004,800F,8003,8001,8003,8005 +00000C14,00000000,61D695B4,00022000 +Misc Events for testing + +#42,92,91,73,84,90,92,82,81,pm_mark1,Information on marked instructions +##8920,8003,800F,8004,8004,8005,8005,8001 +00000816,01008080,3B18D6A4,00722001 +Information on marked instructions + +#43,91,89,73,82,90,91,81,84,pm_mark2,Marked Instructions Processing Flow +##8002,8005,800F,8005,8004,8004,8004,8004 +00000A1A,00000000,3B58C630,00002001 +Marked Instructions Processing Flow + +#44,93,81,82,84,94,93,68,81,pm_mark3,Marked Stores Processing Flow +##8003,800F,8003,8004,8005,8003,0933,8001 +00000B0E,01002000,5B1ABDA4,00022001 +Marked Stores Processing Flow + +#45,92,81,81,85,94,92,78,85,pm_mark4,Marked Loads Processing FLow +##8920,800F,8910,8910,8005,8005,8001,8910 +0000080E,01028080,421AD4A0,00002001 +Marked Loads Processing FLow + +#46,90,90,80,83,93,90,80,83,pm_mark_lsource,Information on marked data source +##8C76,8C76,8C76,8C76,8C76,8C76,8C76,8C76 +00000E1C,00103000,739CE738,00002001 +Information on marked data source + +#47,86,81,57,83,93,90,80,83,pm_mark_lsource2,Information on marked data source +##8001,800F,0C74,8C76,8C76,8C76,8C76,8C76 +0000090E,00103000,E39CE738,00002001 +Information on marked data source + +#48,90,90,80,83,82,86,80,57,pm_mark_lsource3,Information on marked data source +##8C76,8C76,8C76,8C76,800F,8001,8C76,0C74 +00000E1C,00103000,738E9770,00002001 +Information on marked data source + +#49,76,72,60,65,82,86,61,66,pm_lsu_mark1,Load Store Unit Marked Events +##0923,0922,0910,0914,800F,8001,0911,0915 +00001B34,01028000,850E98D4,00022001 +Load Store Unit Marked Events + +#50,73,74,58,63,82,86,59,64,pm_lsu_mark2,Load Store Unit Marked Events +##0920,0924,0912,0916,800F,8001,0913,0917 +00001838,01028000,958E99DC,00022001 +Load Store Unit Marked Events + +#51,75,81,62,67,82,92,82,81,pm_lsu_mark3,Load Store Unit Marked Events +##0925,800F,0C31,0C32,800F,8005,8005,8001 +00001D0E,0100B000,CE8ED6A4,00022001 +Load Store Unit Marked Events + +#52,67,91,53,77,82,92,77,52,pm_threshold,Group for pipeline threshold studies +##0926,8003,0931,8001,800F,8005,8003,0935 +00001E16,0100A000,CA4ED5F4,00722001 +Group for pipeline threshold studies + +#53,84,83,77,75,82,83,78,77,pm_pe_bench1,PE Benchmarker group for FP analysis +##8000,8000,8630,8010,800F,8000,8001,8010 +00000810,10001002,420E84A0,00002000 +PE Benchmarker group for FP analysis + +#54,81,84,22,77,86,84,27,78,pm_pe_bench2,PE Benchmarker group for FP stalls analysis +##800F,8020,0013,8001,8600,8020,0017,8930 +00000710,11042068,9A508BA0,00002000 +PE Benchmarker group for FP stalls analysis + +#55,86,0,8,9,1,81,10,36,pm_pe_bench3,PE Benchmarker group for branch analysis +##8001,0224,0230,0231,0605,800F,0232,0233 +00000938,90040000,C66A7D6C,00022000 +PE Benchmarker group for branch analysis + +#56,6,35,79,70,82,86,84,82,pm_pe_bench4,PE Benchmarker group for L1 and TLB analysis +##0904,0900,8C10,0C13,800F,8001,8C10,8C10 +00001420,010B0000,44CE9420,00002000 +PE Benchmarker group for L1 and TLB analysis + +#57,86,81,74,74,83,82,74,75,pm_pe_bench5,PE Benchmarker group for L2 analysis +##8001,800F,8C66,8C66,8C66,8C66,8C66,8C66 +0000090E,0010C000,739CE738,00002000 +PE Benchmarker group for L2 analysis + +#58,82,82,74,74,83,81,78,75,pm_pe_bench6,PE Benchmarker group for L3 analysis +##8C66,8C66,8C66,8C66,8C66,800F,8001,8C66 +00000E1C,0010C000,739C74B8,00002000 +PE Benchmarker group for L3 analysis + +#59,6,88,79,70,82,86,84,82,pm_hpmcount1,Hpmcount group for L1 and TLB behavior analysis +##0904,8002,8C10,0C13,800F,8001,8C10,8C10 +00001414,010B0000,44CE9420,00002000 +Hpmcount group for L1 and TLB behavior analysis + +#60,84,83,22,27,82,84,78,78,pm_hpmcount2,Hpmcount group for computation intensity analysis +##8000,8000,0013,0017,800F,8020,8001,8930 +00000810,01002028,9DCE84A0,00002000 +Hpmcount group for computation intensity analysis + +#61,86,81,79,8,79,81,9,10,pm_l1andbr,L1 misses and branch misspredict analysis +##8001,800F,8C10,0230,0C23,800F,0231,0232 +0000090E,8003C000,46367CE8,00002000 +L1 misses and branch misspredict analysis + +#62,86,81,79,8,82,79,84,82,pm_imix,Instruction mix: loads, stores and branches +##8001,800F,8C10,0230,800F,0C23,8C10,8C10 +0000090E,8003C000,460FB420,00002000 +Instruction mix: loads, stores and branches diff --git a/src/event_data/power5+/events b/src/event_data/power5+/events new file mode 100644 index 0000000..0131ec4 --- /dev/null +++ b/src/event_data/power5+/events @@ -0,0 +1,4311 @@ +{ File: power5+/events +{ Date: 12/13/06 +{ Version: 1.7 +{ Copyright (c) International Business Machines, 2006. +{ Contributed by Eric Kjeldergaard + + +362,356,355,352,1,1 + +{ counter 1 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#2,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed +##00013 +A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. +#3,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#4,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#5,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#6,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#7,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#8,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#9,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. +#10,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. +#11,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. +#12,v,g,n,n,PM_BR_UNCOND,Unconditional branch +##23087 +An unconditional branch was executed. +#13,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles when both thread's CLB is completely empty. +#14,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles when both thread's CLB is full. +#15,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#16,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#17,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#18,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 +##C3087 +The processor's Data Cache was reloaded from the local L2 due to a demand load. +#19,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared +##C3097 +The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#20,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified +##C30A3 +The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. +#21,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 +##C308E +The processor's Data Cache was reloaded from the local L3 due to a demand load. +#22,v,g,n,n,PM_DATA_FROM_L35_SHR,Data loaded from L3.5 shared +##C309E +The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a demand load. +#23,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified +##C30A7 +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. +#24,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory +##C30A1 +The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on. +#25,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#26,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#27,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams +##C50C2 +A new prefetch stream was detected but no more stream entries were available. +#28,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +A prefetch stream was started using the DST instruction. +#29,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated. +#30,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. +#31,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4,C20E0 +Data TLB misses, all page sizes. +#32,v,g,n,n,PM_DTLB_MISS_4K,Data TLB miss for 4K page +##C208D +Data TLB references to 4KB pages that missed the TLB. Page size is determined at TLB reload time. +#33,v,g,n,n,PM_DTLB_REF,Data TLB references +##C20E4 +Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. +#34,v,g,n,n,PM_DTLB_REF_4K,Data TLB reference for 4K page +##C2086 +Data TLB references for 4KB pages. Includes hits + misses. +#35,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. +#36,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles when an interrupt due to an external exception is pending but external exceptions were masked. +#37,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. +#38,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#39,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#40,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#41,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#42,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#43,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#44,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#45,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#46,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#47,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. +#48,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. +#49,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. +#50,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. +#51,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#52,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes occurred including LSU and Branch flushes. +#53,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +A flush was caused by a branch mispredict. +#54,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#55,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#56,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. +#57,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#58,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#59,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +FPU0 has encountered a denormalized operand. +#60,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#61,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#62,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. +#63,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#64,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#65,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#66,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#67,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#68,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. +#69,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +FPU0 has executed a single precision instruction. +#70,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#71,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +FPU0 has executed a Floating Point Store instruction. +#72,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#73,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +FPU1 has encountered a denormalized operand. +#74,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#75,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#76,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , +#77,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#78,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions +##010C4 +FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#79,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#80,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#81,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped +#82,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +FPU1 has executed a single precision instruction. +#83,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#84,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +FPU1 has executed a Floating Point Store instruction. +#85,v,g,n,n,PM_FPU_1FLOP,FPU executed one flop instruction +##00090 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#86,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data +##02088 +The floating point unit has encountered a denormalized operand. Combined Unit 0 + Unit 1. +#87,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction +##00088 +The floating point unit has executed a divide instruction. This could be fdiv, fdivs, fdiv., fdivs.. Combined Unit 0 + Unit 1. +#88,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction +##010A8 +The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. +#89,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full +##10090 +Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. +#90,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction +##02090 +FPU is executing single precision instruction. Combined Unit 0 + Unit 1. +#91,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#92,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#93,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full +##110A8 +Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. +#94,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#95,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#96,u,g,n,n,PM_FXU_IDLE,FXU idle +##00012 +FXU0 and FXU1 are both idle. +#97,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##100C0 +The Global Completion Table is completely full. +#98,v,g,n,n,PM_GCT_NOSLOT_CYC,Cycles no GCT slot allocated +##00004 +Cycles when the Global Completion Table has no slots from this thread. +#99,v,g,n,s,PM_GCT_USAGE_00to59_CYC,Cycles GCT less than 60% full +##0001F +Cycles when the Global Completion Table has fewer than 60% of its slots used. The GCT has 20 entries shared between threads. +#100,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#101,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. +#102,v,g,n,n,PM_GRP_BR_REDIR_NONSPEC,Group experienced non-speculative branch redirect +##12091 +Number of groups, counted at completion, that have encountered a branch redirect. +#103,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +A scoreboard operation on a non-renamed resource has blocked dispatch. +#104,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##120E4 +A group that previously attempted dispatch was rejected. +#105,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +A group is available for dispatch. This does not mean it was successfully dispatched. +#106,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. +#107,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect +##120E5 +Group experienced non-speculative I cache miss or branch redirect +#108,v,g,n,n,PM_GRP_IC_MISS_NONSPEC,Group experienced non-speculative I cache miss +##12099 +Number of groups, counted at completion, that have encountered an instruction cache miss. +#109,v,g,n,n,PM_GRP_MRK,Group marked in IDU +##00014 +A group was sampled (marked). The group is called a marked group. One instruction within the group is tagged for detailed monitoring. The sampled instruction is called a marked instructions. Events associated with the marked instruction are annotated with the marked term. +#110,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). +#111,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). +#112,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +An instruction prefetch request has been made. +#113,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#114,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat +##210C6 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#115,v,g,n,n,PM_IOPS_CMPL,Internal operations completed +##00001 +Number of internal operations that completed. +#116,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted +##120E1 +Number of PowerPC Instructions dispatched (attempted, not filtered by success. +#117,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Cycles when at least one instruction was sent from the fetch unit to the decode unit. +#118,v,g,n,n,PM_INST_FROM_L2,Instruction fetched from L2 +##22086 +An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions +#119,v,g,n,n,PM_INST_FROM_L25_SHR,Instruction fetched from L2.5 shared +##22096 +An instruction fetch group was fetched with shared (T or SL) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions. +#120,v,g,n,n,PM_INST_FROM_L2MISS,Instruction fetched missed L2 +##2209B +An instruction fetch group was fetched from beyond the local L2. +#121,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 +##2208D +An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions +#122,v,g,n,n,PM_INST_FROM_L35_SHR,Instruction fetched from L3.5 shared +##2209D +An instruction fetch group was fetched with shared (S) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions +#123,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#124,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#125,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. +#126,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#127,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +Cycles that a cache line was written to the instruction cache. +#128,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#129,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#130,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt +##701C0 +A Read/Claim dispatch for a Load was attempted +#131,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#132,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons +##731E0 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#133,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full +##721E0 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#134,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt +##702C0 +A Read/Claim dispatch for a Store was attempted. +#135,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#136,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons +##732E0 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#137,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full +##722E0 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#138,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#139,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy +##713C0 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#140,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#141,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#142,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. +#143,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#144,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#145,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#146,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt +##701C1 +A Read/Claim dispatch for a Load was attempted +#147,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#148,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons +##731E1 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#149,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full +##721E1 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#150,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt +##702C1 +A Read/Claim dispatch for a Store was attempted. +#151,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#152,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons +##732E1 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#153,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full +##722E2 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#154,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#155,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy +##713C1 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#156,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#157,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#158,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. +#159,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#160,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#161,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#162,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt +##701C2 +A Read/Claim dispatch for a Load was attempted +#163,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#164,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons +##731E2 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#165,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full +##721E2 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#166,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt +##702C2 +A Read/Claim dispatch for a Store was attempted. +#167,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#168,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons +##732E2 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#169,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full +##722E1 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#170,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#171,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy +##713C2 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#172,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#173,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#174,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. +#175,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#176,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#177,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +Cycles All Castin/Castout machines are busy. +#178,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#179,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#180,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#181,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#182,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#183,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#184,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +Cycles All Castin/Castout machines are busy. +#185,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#186,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#187,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#188,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#189,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#190,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#191,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +Cycles All Castin/Castout machines are busy. +#192,v,g,n,s,PM_L3SC_HIT,L3 slice C hits +##711C5 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice +#193,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. +#194,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#195,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. +#196,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#197,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#198,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) +#199,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +Load references that miss the Level 1 Data cache, by unit 0. +#200,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C5,C10C6 +Load references that miss the Level 1 Data cache, by unit 1. +#201,v,g,n,n,PM_LD_REF_L1,L1 D cache load references +##C10A8 +Load references to the Level 1 Data Cache. Combined unit 0 + 1. +#202,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +Load references to Level 1 Data Cache, by unit 0. +#203,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##230E3 +The target address of a branch instruction was predicted. +#204,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#205,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E1 +Total cycles the Load Store Unit 0 is busy rejecting instructions. +#206,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#207,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#208,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes +##C00C3 +A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. +#209,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) +#210,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). +#211,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed by LSU0 +#212,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +A non-cacheable load was executed by unit 0. +#213,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C40C3 +Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#214,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C40C1 +Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#215,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C40C2 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#216,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects +##C40C0 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#217,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C60E1 +Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#218,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E5 +Total cycles the Load Store Unit 1 is busy rejecting instructions. +#219,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#220,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#221,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#222,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). +#223,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) +#224,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed by LSU1 +#225,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +A non-cacheable load was executed by Unit 0. +#226,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C40C7 +Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#227,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C40C5 +Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#228,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C40C6 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#229,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects +##C40C4 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#230,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C60E5 +Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#231,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +A flush was initiated by the Load Store Unit +#232,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#233,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes +##C0090 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1. +#234,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#235,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes +##C0088 +A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1). Combined Unit 0 + 1. +#236,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction +##C50A8 +LSU executed Floating Point load instruction. Combined Unit 0 + 1. +#237,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The Load Miss Queue was full. +#238,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#239,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#240,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#241,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +Cycles when the LRQ is full. +#242,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C60E7 +LRQ slot zero was allocated +#243,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C60E6 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). +#244,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss +##C4090 +Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#245,v,g,n,n,PM_LSU_REJECT_SRQ,LSU SRQ lhs rejects +##C4088 +Total cycles the Load Store Unit is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. Combined Unit 0 + 1. +#246,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +Cycles the Store Request Queue is full. +#247,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E7 +SRQ Slot zero was allocated +#248,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E6 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). +#249,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +Cycles that a sync instruction is active in the Store Request Queue. +#250,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. +#251,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched +##731E6 +Fast path memory read dispatched +#252,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##210C7 +A prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#253,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#254,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled +##711C6 +A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#255,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#256,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#257,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 +##734E6 +Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#258,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##724E6 +Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#259,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#260,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 +##702C6 +A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#261,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#262,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 +##712C6 +A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#263,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 +##722E6 +A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#264,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled +##721E6 +Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) +#265,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 +##723E6 +A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#266,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 +##733E6 +A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#267,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#268,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#269,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 +##C7087 +The processor's Data Cache was reloaded from the local L2 due to a marked load. +#270,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared +##C7097 +The processor's Data Cache was reloaded with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a marked load. +#271,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified +##C70A3 +The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load. +#272,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 +##C708E +The processor's Data Cache was reloaded from the local L3 due to a marked load. +#273,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR,Marked data loaded from L3.5 shared +##C709E +The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on the same module as this processor is located due to a marked load. +#274,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified +##C70A7 +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load. +#275,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory +##C70A1 +The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on. +#276,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +A Data SLB miss was caused by a marked instruction. +#277,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6,C60E0 +Data TLB references by a marked instruction that missed the TLB (all page sizes). +#278,v,g,n,n,PM_MRK_DTLB_MISS_4K,Marked Data TLB misses for 4K page +##C608D +Data TLB references to 4KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. +#279,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference +##C60E4 +Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. +#280,v,g,n,n,PM_MRK_DTLB_REF_4K,Marked Data TLB reference for 4K page +##C6086 +Data TLB references by a marked instruction for 4KB pages. +#281,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched +##00002 +A group containing a sampled instruction was dispatched +#282,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued +##00015 +A sampled instruction was issued. +#283,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occurred due to marked load +#284,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#285,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses +##82088 +Marked L1 D cache load misses +#286,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses +##820E0 +Load references that miss the Level 1 Data cache, by LSU0. +#287,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses +##820E4 +Load references that miss the Level 1 Data cache, by LSU1. +#288,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#289,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#290,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C1 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#291,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C0 +A marked store was flushed from unit 0 because it was unaligned +#292,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#293,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#294,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#295,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#296,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes +##810A8 +A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#297,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#298,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#299,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed +##00003 +A sampled store has completed (data home) +#300,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#301,v,g,n,n,PM_PMC4_OVERFLOW,PMC4 Overflow +##0000A +Overflows from PMC4 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#302,v,g,n,n,PM_PMC5_OVERFLOW,PMC5 Overflow +##0001A +Overflows from PMC5 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#303,v,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of PowerPC instructions that completed. +#304,v,g,n,n,PM_PTEG_FROM_L2,PTEG loaded from L2 +##83087 +A Page Table Entry was loaded into the TLB from the local L2 due to a demand load +#305,v,g,n,n,PM_PTEG_FROM_L25_SHR,PTEG loaded from L2.5 shared +##83097 +A Page Table Entry was loaded into the TLB with shared (T or SL) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#306,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified +##830A3 +A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. +#307,v,g,n,n,PM_PTEG_FROM_L3,PTEG loaded from L3 +##8308E +A Page Table Entry was loaded into the TLB from the local L3 due to a demand load. +#308,v,g,n,n,PM_PTEG_FROM_L35_SHR,PTEG loaded from L3.5 shared +##8309E +A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on the same module as this processor is located, due to a demand load. +#309,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified +##830A7 +A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load. +#310,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory +##830A1 +A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on. +#311,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid +##830E4 +A Page Table Entry was loaded into the TLB. +#312,v,g,n,n,PM_RUN_CYC,Run cycles +##00005 +Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop. +#313,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#314,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#315,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#316,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#317,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#318,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#319,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#320,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#321,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A tlbie was snooped from another processor. +#322,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#323,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#324,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#325,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#326,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#327,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache. Combined Unit 0 + 1. +#328,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +Store references to the Data Cache by LSU0. +#329,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C4 +Store references to the Data Cache by LSU1. +#330,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +The counter is suspended (does not count). +#331,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition +##00018 +When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 +#332,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##410C7 +Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. +#333,v,g,n,s,PM_THRD_ONE_RUN_CYC,One of the threads in run cycles +##0000B +At least one thread has set its run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. This event does not respect FCWAIT. +#334,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. +#335,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles this thread was running at priority level 2. +#336,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles this thread was running at priority level 3. +#337,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles this thread was running at priority level 4. +#338,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles this thread was running at priority level 5. +#339,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles this thread was running at priority level 6. +#340,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles this thread was running at priority level 7. +#341,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles when this thread's priority is equal to the other thread's priority. +#342,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. +#343,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. +#344,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. +#345,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. +#346,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. +#347,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. +#348,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty +##410C2 +Thread selection was overridden because one thread's CLB was empty. +#349,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance +##410C4 +Thread selection was overridden because of a GCT imbalance. +#350,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds +##410C5 +Thread selection was overridden because of an ISU hold. +#351,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses +##410C3 +Thread selection was overridden because one thread was had a L2 miss pending. +#352,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Thread selection picked thread 0 for decode. +#353,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Thread selection picked thread 1 for decode. +#354,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +A hung thread was detected +#355,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +Cycles a TLBIE instruction was held at dispatch. +#356,v,g,n,n,PM_TLB_MISS,TLB misses +##80088 +Total of Data TLB mises + Instruction TLB misses +#357,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#358,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##230E2 +A conditional branch instruction was predicted as taken or not taken. +#359,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 +##732E6 +A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#360,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 +##727E6 +A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#361,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision +##735E6 +Snoop retry due to a b collision + +$$$$$$$$ + +{ counter 2 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#2,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#3,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#4,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#5,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#6,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#7,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. +#11,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##23087,230E3 +The target address of a branch instruction was predicted. +#12,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles when both thread's CLB is completely empty. +#13,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles when both thread's CLB is full. +#14,v,g,n,n,PM_CMPLU_STALL_DCACHE_MISS,Completion stall caused by D cache miss +##1109A +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a Data Cache Miss. Data Cache Miss has higher priority than any other Load/Store delay, so if an instruction encounters multiple delays only the Data Cache Miss will be reported and the entire delay period will be charged to Data Cache Miss. This is a subset of PM_CMPLU_STALL_LSU. +#15,v,g,n,n,PM_CMPLU_STALL_FDIV,Completion stall caused by FDIV or FQRT instruction +##1109B +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point divide or square root instruction. This is a subset of PM_CMPLU_STALL_FPU. +#16,v,g,n,n,PM_CMPLU_STALL_FXU,Completion stall caused by FXU instruction +##11099 +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point instruction. +#17,v,g,n,n,PM_CMPLU_STALL_LSU,Completion stall caused by LSU instruction +##11098 +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a load/store instruction. +#18,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#19,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#20,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#21,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##C3097 +The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#22,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified +##C309E +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. +#23,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory +##C3087 +The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on. +#24,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#25,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#26,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams +##C50C2 +A new prefetch stream was detected but no more stream entries were available. +#27,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +A prefetch stream was started using the DST instruction. +#28,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated. +#29,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. +#30,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4,C20E0 +Data TLB misses, all page sizes. +#31,v,g,n,n,PM_DTLB_MISS_64K,Data TLB miss for 64K page +##C208D +Data TLB references to 64KB pages that missed the TLB. Page size is determined at TLB reload time. +#32,v,g,n,n,PM_DTLB_REF,Data TLB references +##C20E4 +Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. +#33,v,g,n,n,PM_DTLB_REF_64K,Data TLB reference for 64K page +##C2086 +Data TLB references for 64KB pages. Includes hits + misses. +#34,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. +#35,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles when an interrupt due to an external exception is pending but external exceptions were masked. +#36,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. +#37,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#38,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#39,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#40,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#41,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#42,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#43,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#44,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#45,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#46,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. +#47,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. +#48,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. +#49,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. +#50,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#51,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes occurred including LSU and Branch flushes. +#52,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +A flush was caused by a branch mispredict. +#53,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#54,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#55,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. +#56,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#57,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#58,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +FPU0 has encountered a denormalized operand. +#59,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#60,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#61,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. +#62,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#63,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#64,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#65,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#66,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#67,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. +#68,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +FPU0 has executed a single precision instruction. +#69,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#70,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +FPU0 has executed a Floating Point Store instruction. +#71,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#72,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +FPU1 has encountered a denormalized operand. +#73,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#74,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#75,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , +#76,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#77,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions +##010C4 +FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#78,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#79,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#80,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped +#81,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +FPU1 has executed a single precision instruction. +#82,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#83,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +FPU1 has executed a Floating Point Store instruction. +#84,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction +##00088 +This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. +#85,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions +##010A8 +The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. +#86,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction +##00090 +The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. +#87,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 +##02088 +FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1. +#88,v,g,n,n,PM_FPU_STF,FPU executed store instruction +##02090 +FPU has executed a store instruction. Combined Unit 0 + Unit 1. +#89,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#90,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#91,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#92,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#93,u,g,n,n,PM_FXU_BUSY,FXU busy +##00012 +Cycles when both FXU0 and FXU1 are busy. +#94,v,g,n,n,PM_MRK_FXU_FIN,Marked instruction FXU processing finished +##00014 +One of the Fixed Point Units finished a marked instruction. Instructions that finish may not necessary complete. +#95,v,g,n,s,PM_GCT_EMPTY_CYC,Cycles GCT empty +##00004 +The Global Completion Table is completely empty +#96,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##100C0 +The Global Completion Table is completely full. +#97,v,g,n,n,PM_GCT_NOSLOT_IC_MISS,No slot in GCT caused by I cache miss +##1009C +Cycles when the Global Completion Table has no slots from this thread because of an Instruction Cache miss. +#98,v,g,n,s,PM_GCT_USAGE_60to79_CYC,Cycles GCT 60-79% full +##0001F +Cycles when the Global Completion Table has between 60% and 70% of its slots used. The GCT has 20 entries shared between threads. +#99,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#100,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. +#101,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect +##120E5 +Group experienced non-speculative I cache miss or branch redirect +#102,v,g,n,n,PM_GRP_DISP,Group dispatches +##00002 +A group was dispatched +#103,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +A scoreboard operation on a non-renamed resource has blocked dispatch. +#104,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##120E4 +A group that previously attempted dispatch was rejected. +#105,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +A group is available for dispatch. This does not mean it was successfully dispatched. +#106,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. +#107,v,g,n,n,PM_HV_CYC,Hypervisor Cycles +##0000B +Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0) +#108,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). +#109,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). +#110,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +An instruction prefetch request has been made. +#111,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#112,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat +##210C6 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#113,v,g,n,n,PM_IOPS_CMPL,Internal operations completed +##00001 +Number of internal operations that completed. +#114,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted +##120E1 +Number of PowerPC Instructions dispatched (attempted, not filtered by success. +#115,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Cycles when at least one instruction was sent from the fetch unit to the decode unit. +#116,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 +##2208D +An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions +#117,v,g,n,n,PM_INST_FROM_L25_MOD,Instruction fetched from L2.5 modified +##22096 +An instruction fetch group was fetched with modified (M) data from the L2 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions. +#118,v,g,n,n,PM_INST_FROM_L35_MOD,Instruction fetched from L3.5 modified +##2209D +An instruction fetch group was fetched with modified (M) data from the L3 of a chip on the same module as this processor is located. Fetch groups can contain up to 8 instructions +#119,v,g,n,n,PM_INST_FROM_LMEM,Instruction fetched from local memory +##22086 +An instruction fetch group was fetched from memory attached to the same module this proccessor is located on. Fetch groups can contain up to 8 instructions +#120,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#121,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#122,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. +#123,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#124,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +Cycles that a cache line was written to the instruction cache. +#125,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#126,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#127,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt +##701C0 +A Read/Claim dispatch for a Load was attempted +#128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#129,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons +##731E0 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#130,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full +##721E0 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#131,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt +##702C0 +A Read/Claim dispatch for a Store was attempted. +#132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#133,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons +##732E0 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#134,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full +##722E0 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#135,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#136,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy +##713C0 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#137,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#138,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#139,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. +#140,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#141,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#142,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#143,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt +##701C1 +A Read/Claim dispatch for a Load was attempted +#144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#145,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons +##731E1 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#146,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full +##721E1 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#147,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt +##702C1 +A Read/Claim dispatch for a Store was attempted. +#148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#149,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons +##732E1 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#150,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full +##722E2 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#151,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#152,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy +##713C1 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#153,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#154,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#155,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. +#156,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#157,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#158,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#159,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt +##701C2 +A Read/Claim dispatch for a Load was attempted +#160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#161,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons +##731E2 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#162,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full +##721E2 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#163,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt +##702C2 +A Read/Claim dispatch for a Store was attempted. +#164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#165,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons +##732E2 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#166,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full +##722E1 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#167,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#168,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy +##713C2 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#169,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#170,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#171,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. +#172,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#173,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#174,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +Cycles All Castin/Castout machines are busy. +#175,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#176,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#177,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#178,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#179,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#180,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#181,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +Cycles All Castin/Castout machines are busy. +#182,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#183,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#184,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#185,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#186,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#187,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#188,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +Cycles All Castin/Castout machines are busy. +#189,v,g,n,s,PM_L3SC_HIT,L3 slice C hits +##711C5 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice +#190,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. +#191,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#192,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. +#193,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#194,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#195,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) +#196,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +Load references that miss the Level 1 Data cache, by unit 0. +#197,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C5 +Load references that miss the Level 1 Data cache, by unit 1. +#198,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +Load references to Level 1 Data Cache, by unit 0. +#199,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C6 +Load references that miss the Level 1 Data cache, by unit 1. +#200,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#201,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E1 +Total cycles the Load Store Unit 0 is busy rejecting instructions. +#202,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#203,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#204,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes +##C00C3 +A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. +#205,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) +#206,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). +#207,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed by LSU0 +#208,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +A non-cacheable load was executed by unit 0. +#209,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C40C3 +Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#210,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C40C1 +Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#211,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C40C2 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#212,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects +##C40C0 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#213,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C60E1 +Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#214,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E5 +Total cycles the Load Store Unit 1 is busy rejecting instructions. +#215,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#216,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#217,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#218,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). +#219,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) +#220,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed by LSU1 +#221,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +A non-cacheable load was executed by Unit 0. +#222,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C40C7 +Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#223,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C40C5 +Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#224,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C40C6 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#225,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects +##C40C4 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#226,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C60E5 +Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#227,v,g,n,n,PM_LSU_BUSY_REJECT,LSU busy due to reject +##C2088 +Total cycles the Load Store Unit is busy rejecting instructions. Combined unit 0 + 1. +#228,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses +##80090 +Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1. +#229,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +A flush was initiated by the Load Store Unit +#230,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes +##C0090 +A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1. +#231,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#232,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#233,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes +##C0088 +A store was flushed because it was unaligned (crossed a 4K boundary). Combined Unit 0 + 1. +#234,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The Load Miss Queue was full. +#235,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#236,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#237,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#238,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00015 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#239,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +Cycles when the LRQ is full. +#240,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C60E7 +LRQ slot zero was allocated +#241,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C60E6 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). +#242,v,g,n,n,PM_LSU_REJECT_LMQ_FULL,LSU reject due to LMQ full or missed data coming +##C4088 +Total cycles the Load Store Unit is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all the eight entries are full, subsequent load instructions are rejected. Combined unit 0 + 1. +#243,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision +##C4090 +Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1. +#244,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +Cycles the Store Request Queue is full. +#245,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E7 +SRQ Slot zero was allocated +#246,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E6 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). +#247,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded +##C6088 +Data from a store instruction was forwarded to a load. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. Combined Unit 0 + 1. +#248,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +Cycles that a sync instruction is active in the Store Request Queue. +#249,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. +#250,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 +##734E6 +Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#251,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##210C7 +A prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#252,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#253,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled +##711C6 +A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#254,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#255,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#256,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 +##702C6 +A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#257,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##724E6 +Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#258,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#259,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 +##712C6 +A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#260,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#261,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 +##722E6 +A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#262,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 +##732E6 +A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#263,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled +##721E6 +Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) +#264,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 +##723E6 +A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#265,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 +##733E6 +A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#266,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#267,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#268,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished +##00005 +The branch unit finished a marked instruction. Instructions that finish may not necessary complete. +#269,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##C7097 +The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load. +#270,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR_CYC,Marked load latency from L2.5 shared +##C70A2 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#271,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR_CYC,Marked load latency from L2.75 shared +##C70A3 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#272,v,g,n,n,PM_MRK_DATA_FROM_L2_CYC,Marked load latency from L2 +##C70A0 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#273,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified +##C709E +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load. +#274,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR_CYC,Marked load latency from L3.5 shared +##C70A6 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#275,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR_CYC,Marked load latency from L3.75 shared +##C70A7 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#276,v,g,n,n,PM_MRK_DATA_FROM_L3_CYC,Marked load latency from L3 +##C70A4 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#277,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory +##C7087 +The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on. +#278,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +A Data SLB miss was caused by a marked instruction. +#279,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6,C60E0 +Data TLB references by a marked instruction that missed the TLB (all page sizes). +#280,v,g,n,n,PM_MRK_DTLB_MISS_64K,Marked Data TLB misses for 64K page +##C608D +Data TLB references to 64KB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. +#281,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference +##C60E4 +Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. +#282,v,g,n,n,PM_MRK_DTLB_REF_64K,Marked Data TLB reference for 64K page +##C6086 +Data TLB references by a marked instruction for 64KB pages. +#283,v,g,n,n,PM_MRK_GRP_BR_REDIR,Group experienced marked branch redirect +##12091 +A group containing a marked (sampled) instruction experienced a branch redirect. +#284,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occurred due to marked load +#285,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#286,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses +##820E0 +Load references that miss the Level 1 Data cache, by LSU0. +#287,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses +##820E4 +Load references that miss the Level 1 Data cache, by LSU1. +#288,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#289,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#290,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C1 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#291,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C0 +A marked store was flushed from unit 0 because it was unaligned +#292,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#293,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#294,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#295,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#296,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes +##810A8 +A marked store was flushed because it was unaligned +#297,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#298,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#299,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS +##00003 +A sampled store has been sent to the memory subsystem +#300,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#301,v,g,n,n,PM_PMC1_OVERFLOW,PMC1 Overflow +##0000A +Overflows from PMC1 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#302,v,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of PowerPC instructions that completed. +#303,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified +##83097 +A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#304,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified +##8309E +A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load. +#305,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory +##83087 +A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on. +#306,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid +##830E4 +A Page Table Entry was loaded into the TLB. +#307,v,g,n,n,PM_SLB_MISS,SLB misses +##80088 +Total of all Segment Lookaside Buffer (SLB) misses, Instructions + Data. +#308,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#309,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#310,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#311,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#312,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#313,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#314,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#315,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#316,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A tlbie was snooped from another processor. +#317,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#318,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#319,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#320,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#321,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#322,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache. Combined Unit 0 + 1. +#323,v,g,n,n,PM_ST_REF_L1,L1 D cache store references +##C10A8 +Store references to the Data Cache. Combined Unit 0 + 1. +#324,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +Store references to the Data Cache by LSU0. +#325,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C4 +Store references to the Data Cache by LSU1. +#326,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +The counter is suspended (does not count). +#327,v,g,n,n,PM_THRD_GRP_CMPL_BOTH_CYC,Cycles group completed by both threads +##00013 +Cycles that both threads completed. +#328,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##410C7 +Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. +#329,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. +#330,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles this thread was running at priority level 2. +#331,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles this thread was running at priority level 3. +#332,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles this thread was running at priority level 4. +#333,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles this thread was running at priority level 5. +#334,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles this thread was running at priority level 6. +#335,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles this thread was running at priority level 7. +#336,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles when this thread's priority is equal to the other thread's priority. +#337,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. +#338,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. +#339,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. +#340,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. +#341,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. +#342,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. +#343,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty +##410C2 +Thread selection was overridden because one thread's CLB was empty. +#344,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance +##410C4 +Thread selection was overridden because of a GCT imbalance. +#345,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds +##410C5 +Thread selection was overridden because of an ISU hold. +#346,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses +##410C3 +Thread selection was overridden because one thread was had a L2 miss pending. +#347,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Thread selection picked thread 0 for decode. +#348,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Thread selection picked thread 1 for decode. +#349,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +A hung thread was detected +#350,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +Cycles a TLBIE instruction was held at dispatch. +#351,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#352,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##230E2 +A conditional branch instruction was predicted as taken or not taken. +#353,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 +##727E6 +A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#354,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched +##731E6 +Fast path memory read dispatched +#355,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision +##735E6 +Snoop retry due to a b collision + +$$$$$$$$ + +{ counter 3 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#2,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#3,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#4,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#5,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#6,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#7,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#8,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. +#9,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. +#10,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. +#11,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##23087,230E2 +A conditional branch instruction was predicted as taken or not taken. +#12,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles when both thread's CLB is completely empty. +#13,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles when both thread's CLB is full. +#14,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#15,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#16,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#17,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##C30A2 +The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#18,v,g,n,n,PM_DATA_FROM_L275_SHR,Data loaded from L2.75 shared +##C3097 +The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a demand load. +#19,v,g,n,n,PM_DATA_FROM_L2MISS,Data loaded missed L2 +##C309B +The processor's Data Cache was reloaded but not from the local L2. +#20,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 +##C30AF +The processor's Data Cache was reloaded from the local L3 due to a demand load. +#21,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified +##C30A6 +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. +#22,v,g,n,n,PM_DATA_FROM_L375_SHR,Data loaded from L3.75 shared +##C309E +The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a demand load. +#23,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory +##C30A0 +The processor's Data Cache was reloaded from memory attached to the same module this proccessor is located on. +#24,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#25,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#26,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams +##C50C2 +A new prefetch stream was detected but no more stream entries were available. +#27,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +A prefetch stream was started using the DST instruction. +#28,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated. +#29,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. +#30,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4,C20E0 +Data TLB misses, all page sizes. +#31,v,g,n,n,PM_DTLB_MISS_16M,Data TLB miss for 16M page +##C208D +Data TLB references to 16MB pages that missed the TLB. Page size is determined at TLB reload time. +#32,v,g,n,n,PM_DTLB_REF,Data TLB references +##C20E4 +Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. +#33,v,g,n,n,PM_DTLB_REF_16M,Data TLB reference for 16M page +##C2086 +Data TLB references for 16MB pages. Includes hits + misses. +#34,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. +#35,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles when an interrupt due to an external exception is pending but external exceptions were masked. +#36,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. +#37,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#38,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#39,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#40,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#41,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#42,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#43,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#44,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#45,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#46,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. +#47,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. +#48,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. +#49,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. +#50,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#51,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes occurred including LSU and Branch flushes. +#52,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +A flush was caused by a branch mispredict. +#53,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#54,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#55,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. +#56,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#57,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#58,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +FPU0 has encountered a denormalized operand. +#59,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#60,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#61,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. +#62,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#63,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#64,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#65,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#66,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#67,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. +#68,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +FPU0 has executed a single precision instruction. +#69,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#70,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +FPU0 has executed a Floating Point Store instruction. +#71,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#72,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +FPU1 has encountered a denormalized operand. +#73,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#74,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#75,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , +#76,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#77,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions +##010C4 +FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#78,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#79,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#80,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped +#81,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +FPU1 has executed a single precision instruction. +#82,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#83,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +FPU1 has executed a Floating Point Store instruction. +#84,v,g,n,n,PM_FPU_FMOV_FEST,FPU executed FMOV or FEST instructions +##01088 +The floating point unit has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ.. Combined Unit 0 + Unit 1. +#85,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions +##01090 +The floating point unit has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. +#86,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction +##000A8 +The floating point unit has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1. +#87,v,g,n,n,PM_FPU_STF,FPU executed store instruction +##020A8 +FPU has executed a store instruction. Combined Unit 0 + Unit 1. +#88,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#89,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#90,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle +##00012 +FXU0 is busy while FXU1 was idle +#91,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#92,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#93,v,g,n,n,PM_FXU_FIN,FXU produced a result +##13088 +The fixed point unit (Unit 0 + Unit 1) finished an instruction. Instructions that finish may not necessary complete. +#94,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##100C0 +The Global Completion Table is completely full. +#95,v,g,n,n,PM_GCT_NOSLOT_SRQ_FULL,No slot in GCT caused by SRQ full +##10084 +Cycles when the Global Completion Table has no slots from this thread because the Store Request Queue (SRQ) is full. This happens when the storage subsystem can not process the stores in the SRQ. Groups can not be dispatched until a SRQ entry is available. +#96,v,g,n,s,PM_GCT_USAGE_80to99_CYC,Cycles GCT 80-99% full +##0001F +Cycles when the Global Completion Table has between 80% and 99% of its slots used. The GCT has 20 entries shared between threads +#97,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#98,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. +#99,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect +##120E5 +Group experienced non-speculative I cache miss or branch redirect +#100,v,g,n,n,PM_GRP_CMPL,Group completed +##00013 +A group completed. Microcoded instructions that span multiple groups will generate this event once per group. +#101,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +A scoreboard operation on a non-renamed resource has blocked dispatch. +#102,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##120E4 +A group that previously attempted dispatch was rejected. +#103,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success +##00002 +Number of groups sucessfully dispatched (not rejected) +#104,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +A group is available for dispatch. This does not mean it was successfully dispatched. +#105,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. +#106,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). +#107,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). +#108,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##210C7 +A prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#109,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +An instruction prefetch request has been made. +#110,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#111,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat +##210C6 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#112,v,g,n,n,PM_IOPS_CMPL,Internal operations completed +##00001 +Number of internal operations that completed. +#113,v,g,n,n,PM_INST_DISP,Instructions dispatched +##00009 +Number of PowerPC instructions successfully dispatched. +#114,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Cycles when at least one instruction was sent from the fetch unit to the decode unit. +#115,v,g,n,n,PM_INST_FROM_L275_SHR,Instruction fetched from L2.75 shared +##22096 +An instruction fetch group was fetched with shared (T) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions +#116,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 +##220AE +An instruction fetch group was fetched from the local L3. Fetch groups can contain up to 8 instructions +#117,v,g,n,n,PM_INST_FROM_L375_SHR,Instruction fetched from L3.75 shared +##2209D +An instruction fetch group was fetched with shared (S) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions +#118,v,g,n,n,PM_INST_FROM_PREF,Instruction fetched from prefetch +##2208D +An instruction fetch group was fetched from the prefetch buffer. Fetch groups can contain up to 8 instructions +#119,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#120,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#121,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. +#122,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#123,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +Cycles that a cache line was written to the instruction cache. +#124,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#125,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#126,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt +##701C0 +A Read/Claim dispatch for a Load was attempted +#127,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons +##731E0 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#129,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full +##721E0 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#130,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt +##702C0 +A Read/Claim dispatch for a Store was attempted. +#131,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons +##732E0 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#133,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full +##722E0 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#134,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#135,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy +##713C0 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#136,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#137,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#138,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. +#139,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#140,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#141,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#142,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt +##701C1 +A Read/Claim dispatch for a Load was attempted +#143,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons +##731E1 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#145,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full +##721E1 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#146,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt +##702C1 +A Read/Claim dispatch for a Store was attempted. +#147,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons +##732E1 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#149,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full +##722E2 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#150,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#151,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy +##713C1 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#152,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#153,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#154,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. +#155,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#156,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#157,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#158,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt +##701C2 +A Read/Claim dispatch for a Load was attempted +#159,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons +##731E2 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#161,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full +##721E2 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#162,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt +##702C2 +A Read/Claim dispatch for a Store was attempted. +#163,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons +##732E2 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#165,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full +##722E1 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#166,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#167,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy +##713C2 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#168,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#169,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#170,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. +#171,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#172,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#173,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +Cycles All Castin/Castout machines are busy. +#174,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#175,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#176,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#177,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#178,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#179,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#180,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +Cycles All Castin/Castout machines are busy. +#181,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#182,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#183,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#184,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#185,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#186,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#187,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +Cycles All Castin/Castout machines are busy. +#188,v,g,n,s,PM_L3SC_HIT,L3 slice C hits +##711C5 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice +#189,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. +#190,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#191,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. +#192,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#193,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#194,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) +#195,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses +##C1088 +Load references that miss the Level 1 Data cache. Combined unit 0 + 1. +#196,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +Load references that miss the Level 1 Data cache, by unit 0. +#197,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C6 +Load references that miss the Level 1 Data cache, by unit 1. +#198,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +Load references to Level 1 Data Cache, by unit 0. +#199,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C5 +Load references that miss the Level 1 Data cache, by unit 1. +#200,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#201,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E1 +Total cycles the Load Store Unit 0 is busy rejecting instructions. +#202,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#203,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#204,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes +##C00C3 +A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. +#205,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) +#206,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). +#207,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed by LSU0 +#208,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +A non-cacheable load was executed by unit 0. +#209,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C40C3 +Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#210,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C40C1 +Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#211,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C40C2 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#212,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects +##C40C0 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#213,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C60E1 +Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#214,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E5 +Total cycles the Load Store Unit 1 is busy rejecting instructions. +#215,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#216,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#217,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#218,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). +#219,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) +#220,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed by LSU1 +#221,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +A non-cacheable load was executed by Unit 0. +#222,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C40C7 +Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#223,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C40C5 +Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#224,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C40C6 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#225,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects +##C40C4 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#226,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C60E5 +Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#227,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses +##800A8 +Total D-ERAT Misses. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. Combined Unit 0 + 1. +#228,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +A flush was initiated by the Load Store Unit +#229,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes +##C00A8 +A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. Combined Units 0 and 1. +#230,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#231,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#232,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The Load Miss Queue was full. +#233,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#234,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#235,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#236,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00015 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#237,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +Cycles when the LRQ is full. +#238,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C60E7 +LRQ slot zero was allocated +#239,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C60E6 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). +#240,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision +##C40A8 +Total cycles the Load Store Unit is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. Combined Unit 0 + 1. +#241,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +Cycles the Store Request Queue is full. +#242,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E7 +SRQ Slot zero was allocated +#243,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E6 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). +#244,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +Cycles that a sync instruction is active in the Store Request Queue. +#245,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. +#246,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched +##731E6 +Fast path memory read dispatched +#247,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 +##722E6 +A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#248,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#249,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled +##711C6 +A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#250,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#251,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#252,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 +##734E6 +Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#253,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##724E6 +Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#254,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#255,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 +##732E6 +A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#256,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#257,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 +##702C6 +A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#258,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 +##712C6 +A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#259,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled +##721E6 +Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) +#260,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 +##723E6 +A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#261,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 +##733E6 +A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#262,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#263,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#264,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##C70A2 +The processor's Data Cache was reloaded with modified (M) data from the L2 of a chip on the same module as this processor is located due to a marked load. +#265,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR,Marked data loaded from L2.75 shared +##C7097 +The processor's Data Cache was reloaded with shared (T) data from the L2 on a different module than this processor is located due to a marked load. +#266,v,g,n,n,PM_MRK_DATA_FROM_L2MISS,Marked data loaded missed L2 +##C709B +DL1 was reloaded from beyond L2 due to a marked demand load. +#267,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 +##C70AF +The processor's Data Cache was reloaded from the local L3 due to a marked load. +#268,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified +##C70A6 +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a marked load. +#269,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR,Marked data loaded from L3.75 shared +##C709E +The processor's Data Cache was reloaded with shared (S) data from the L3 of a chip on a different module than this processor is located due to a marked load. +#270,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory +##C70A0 +The processor's Data Cache was reloaded due to a marked load from memory attached to the same module this proccessor is located on. +#271,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +A Data SLB miss was caused by a marked instruction. +#272,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6,C60E0 +Data TLB references by a marked instruction that missed the TLB (all page sizes). +#273,v,g,n,n,PM_MRK_DTLB_MISS_16M,Marked Data TLB misses for 16M page +##C608D +Marked Data TLB misses for 16M page +#274,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference +##C60E4 +Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. +#275,v,g,n,n,PM_MRK_DTLB_REF_16M,Marked Data TLB reference for 16M page +##C6086 +Data TLB references by a marked instruction for 16MB pages. +#276,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished +##00014 +One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete +#277,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occurred due to marked load +#278,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished +##00005 +One of the execution units finished a marked instruction. Instructions that finish may not necessary complete +#279,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#280,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses +##820E0 +Load references that miss the Level 1 Data cache, by LSU0. +#281,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses +##820E4 +Load references that miss the Level 1 Data cache, by LSU1. +#282,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#283,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#284,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C1 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#285,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C0 +A marked store was flushed from unit 0 because it was unaligned +#286,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#287,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#288,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#289,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#290,v,g,n,n,PM_MRK_LSU_FLUSH_LRQ,Marked LRQ flushes +##81088 +A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#291,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes +##81090 +A marked store was flushed because it was unaligned +#292,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#293,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#294,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention +##00003 +A marked store previously sent to the memory subsystem completed (data home) after requiring intervention +#295,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#296,v,g,n,n,PM_PMC2_OVERFLOW,PMC2 Overflow +##0000A +Overflows from PMC2 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#297,v,g,n,n,PM_PMC6_OVERFLOW,PMC6 Overflow +##0001A +Overflows from PMC6 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#298,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified +##830A2 +A Page Table Entry was loaded into the TLB with modified (M) data from the L2 of a chip on the same module as this processor is located due to a demand load. +#299,v,g,n,n,PM_PTEG_FROM_L275_SHR,PTEG loaded from L2.75 shared +##83097 +A Page Table Entry was loaded into the TLB with shared (T) data from the L2 on a different module than this processor is located due to a demand load. +#300,v,g,n,n,PM_PTEG_FROM_L2MISS,PTEG loaded from L2 miss +##8309B +A Page Table Entry was loaded into the TLB but not from the local L2. +#301,v,g,n,n,PM_PTEG_FROM_L3,PTEG loaded from L3 +##830AF +A Page Table Entry was loaded into the TLB from the local L3 due to a demand load. +#302,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified +##830A6 +A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on the same module as this processor is located, due to a demand load. +#303,v,g,n,n,PM_PTEG_FROM_L375_SHR,PTEG loaded from L3.75 shared +##8309E +A Page Table Entry was loaded into the TLB with shared (S) data from the L3 of a chip on a different module than this processor is located, due to a demand load. +#304,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory +##830A0 +A Page Table Entry was loaded into the TLB from memory attached to the same module this proccessor is located on. +#305,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid +##830E4 +A Page Table Entry was loaded into the TLB. +#306,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#307,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#308,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#309,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#310,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#311,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#312,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#313,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#314,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A tlbie was snooped from another processor. +#315,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#316,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#317,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#318,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#319,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#320,v,g,n,n,PM_STOP_COMPLETION,Completion stopped +##00018 +RAS Unit has signaled completion to stop +#321,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache. Combined Unit 0 + 1. +#322,v,g,n,n,PM_ST_REF_L1,L1 D cache store references +##C1090 +Store references to the Data Cache. Combined Unit 0 + 1. +#323,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +Store references to the Data Cache by LSU0. +#324,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C4 +Store references to the Data Cache by LSU1. +#325,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +The counter is suspended (does not count). +#326,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##410C7 +Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. +#327,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. +#328,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles this thread was running at priority level 2. +#329,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles this thread was running at priority level 3. +#330,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles this thread was running at priority level 4. +#331,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles this thread was running at priority level 5. +#332,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles this thread was running at priority level 6. +#333,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles this thread was running at priority level 7. +#334,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles when this thread's priority is equal to the other thread's priority. +#335,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. +#336,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. +#337,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. +#338,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. +#339,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. +#340,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. +#341,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty +##410C2 +Thread selection was overridden because one thread's CLB was empty. +#342,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance +##410C4 +Thread selection was overridden because of a GCT imbalance. +#343,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds +##410C5 +Thread selection was overridden because of an ISU hold. +#344,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses +##410C3 +Thread selection was overridden because one thread was had a L2 miss pending. +#345,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Thread selection picked thread 0 for decode. +#346,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Thread selection picked thread 1 for decode. +#347,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +A hung thread was detected +#348,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout +##0000B +The threshold timer expired +#349,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +Cycles a TLBIE instruction was held at dispatch. +#350,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#351,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##230E3 +The target address of a branch instruction was predicted. +#352,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 +##727E6 +A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#353,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision +##735E6 +Snoop retry due to a b collision +#354,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted +##120E1 +Number of PowerPC Instructions dispatched (attempted, not filtered by success. + +$$$$$$$$ + +{ counter 4 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#1,v,g,n,n,PM_0INST_FETCH,No instructions fetched +##2208D +No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) +#2,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#3,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#4,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#5,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#6,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#7,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is a 6-deep, 4-wide instruction buffer. Fullness is reported on a cycle basis with each event representing the number of cycles the CLB had the corresponding number of entries occupied. These events give a real time history of the number of instruction buffers used, but not the number of PowerPC instructions within those buffers. Each thread has its own set of CLB; these events are thread specific. +#8,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +Cycles when the issue queue that feeds the branch unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#9,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +A branch instruction was issued to the branch unit. A branch that was incorrectly predicted may issue and execute multiple times. +#10,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +A conditional branch instruction was incorrectly predicted as taken or not taken. The branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This will result in a branch redirect flush if not overfidden by a flush of an older instruction. +#11,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +A branch instruction target was incorrectly predicted. This will result in a branch mispredict flush unless a flush is detected from an older instruction. +#12,v,g,n,n,PM_BR_PRED_CR_TA,A conditional branch was predicted, CR and target prediction +##23087 +Both the condition (taken or not taken) and the target address of a branch instruction was predicted. +#13,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles when both thread's CLB is completely empty. +#14,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles when both thread's CLB is full. +#15,v,g,n,n,PM_CMPLU_STALL_DIV,Completion stall caused by DIV instruction +##11099 +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a fixed point divide instruction. This is a subset of PM_CMPLU_STALL_FXU. +#16,v,g,n,n,PM_CMPLU_STALL_ERAT_MISS,Completion stall caused by ERAT miss +##1109B +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered an ERAT miss. This is a subset of PM_CMPLU_STALL_REJECT. +#17,v,g,n,n,PM_CMPLU_STALL_FPU,Completion stall caused by FPU instruction +##11098 +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes was a floating point instruction. +#18,v,g,n,n,PM_CMPLU_STALL_REJECT,Completion stall caused by reject +##1109A +Following a completion stall (any period when no groups completed) the last instruction to finish before completion resumes suffered a load/store reject. This is a subset of PM_CMPLU_STALL_LSU. +#19,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The issue queue that feeds the Conditional Register unit is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#20,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The Conditional Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#21,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#22,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified +##C3097 +The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a demand load. +#23,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified +##C309E +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on the same module as this processor is located due to a demand load. +#24,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory +##C3087 +The processor's Data Cache was reloaded from memory attached to a different module than this proccessor is located on. +#25,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +Cycles a translation tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#26,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#27,v,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of prefetch streams +##C50C2 +A new prefetch stream was detected but no more stream entries were available. +#28,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +A prefetch stream was started using the DST instruction. +#29,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated. +#30,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve. +#31,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4,C20E0 +Data TLB misses, all page sizes. +#32,v,g,n,n,PM_DTLB_MISS_16G,Data TLB miss for 16G page +##C208D +Data TLB references to 16GB pages that missed the TLB. Page size is determined at TLB reload time. +#33,v,g,n,n,PM_DTLB_REF,Data TLB references +##C20E4 +Total number of Data TLB references for all page sizes. Page size is determined at TLB reload time. +#34,v,g,n,n,PM_DTLB_REF_16G,Data TLB reference for 16G page +##C2086 +Data TLB references for 16GB pages. Includes hits + misses. +#35,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +Cycles MSR(EE) bit was off indicating that interrupts due to external exceptions were masked. +#36,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles when an interrupt due to an external exception is pending but external exceptions were masked. +#37,v,g,n,n,PM_EXT_INT,External interrupts +##00003 +An interrupt due to an external exception occurred +#38,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Incremented when a chip issues a command on its SnoopA address bus. Each of the two address busses (SnoopA and SnoopB) is capable of one transaction per fabric cycle (one fabric cycle = 2 cpu cycles in normal 2:1 mode), but each chip can only drive the SnoopA bus, and can only drive one transaction every two fabric cycles (i.e., every four cpu cycles). In MCM-based systems, two chips interleave their accesses to each of the two fabric busses (SnoopA, SnoopB) to reach a peak capability of one transaction per cpu clock cycle. The two chips that drive SnoopB are wired so that the chips refer to the bus as SnoopA but it is connected to the other two chips as SnoopB. Note that this event will only be recorded by the FBC on the chip that sourced the operation. The signal is delivered at FBC speed and the count must be scaled. +#39,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Incremented when a command issued by a chip on its SnoopA address bus is retried for any reason. The overwhelming majority of retries are due to running out of memory controller queues but retries can also be caused by trying to reference addresses that are in a transient cache state -- e.g. a line is transient after issuing a DCLAIM instruction to a shared line but before the associated store completes. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#40,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +A DCLAIM command was issued. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#41,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +A DCLAIM command was retried. Each chip reports its own counts. The signal is delivered at FBC speed and the count must be scaled accordingly. +#42,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Fabric cyles when the Next Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#43,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Fabric cycles when the Vertical Node out hold-buffers are emtpy. The signal is delivered at FBC speed and the count must be scaled accordingly. +#44,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +Fabric cycles when the Minus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#45,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +Fabric cycles when the Minus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#46,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +Fabric cycles when the Plus-1 hip/hop sidecars (sidecars for chip to chip data transfer) are empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#47,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +Fabric cycles when the Plus-1 jump sidecar (sidecars for mcm to mcm data transfer) is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#48,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound NN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled. +#49,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +Fabric Data beats that the base chip takes the inbound PN data and forwards it on to the outbound NN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled. +#50,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +Fabric Data beats that the base chip takes the inbound PN data and passes it through to the outbound VN bus without going into a sidecar. The signal is delivered at FBC speed and the count must be scaled accordingly. +#51,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +Fabric data beats that the base chip takes the inbound PN data and forwards it on to the outbound VN data bus after going into a sidecar first. The signal is delivered at FBC speed and the count must be scaled accordingly. +#52,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Fabric cycles when the Middle Bypass sidecar is empty. The signal is delivered at FBC speed and the count must be scaled accordingly. +#53,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes occurred including LSU and Branch flushes. +#54,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +A flush was caused by a branch mispredict. +#55,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +This thread has been flushed at dispatch because it is stalled and a GCT imbalance exists. GCT thresholds are set in the TSCR register. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#56,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +This thread has been flushed at dispatch because its scoreboard bit is set indicating that a non-renamed resource is being updated. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#57,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +This thread has been flushed at dispatch due to a sync, lwsync, ptesync, or tlbsync instruction. This allows the other thread to have more machine resources for it to make progress until the sync finishes. +#58,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#59,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#60,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +FPU0 has encountered a denormalized operand. +#61,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +FPU0 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#62,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +FPU0 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#63,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +FPU0 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads. +#64,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#65,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +FPU0 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#66,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +FPU0 has executed FPSCR move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*, mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#67,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +FPU0 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#68,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +FPU0 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#69,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU0 cannot accept any more instruction. Dispatch to this issue queue is stopped. +#70,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +FPU0 has executed a single precision instruction. +#71,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +FPU0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#72,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +FPU0 has executed a Floating Point Store instruction. +#73,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#74,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +FPU1 has encountered a denormalized operand. +#75,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +FPU1 has executed a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#76,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +FPU1 has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#77,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +FPU1 finished, produced a result. This only indicates finish, not completion. Floating Point Stores are included in this count but not Floating Point Loads., , +#78,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +The floating point unit has executed a multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#79,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executed FMOV or FEST instructions +##010C4 +FPU1 has executed a move kind of instruction or one of the estimate instructions. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ. +#80,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +FPU1 has executed a frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#81,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +FPU1 has executed a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#82,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU1 cannot accept any more instructions. Dispatch to this issue queue is stopped +#83,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +FPU1 has executed a single precision instruction. +#84,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +FPU1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). +#85,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +FPU1 has executed a Floating Point Store instruction. +#86,v,g,n,n,PM_FPU_1FLOP,FPU executed one flop instruction +##000A8 +The floating point unit has executed an add, mult, sub, compare, fsel, fneg, fabs, fnabs, fres, or frsqrte kind of instruction. These are single FLOP operations. +#87,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction +##01090 +The floating point unit has executed an estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. +#88,v,g,n,n,PM_FPU_FIN,FPU produced a result +##01088 +FPU finished, produced a result. This only indicates finish, not completion. Combined Unit 0 + Unit 1. Floating Point Stores are included in this count but not Floating Point Loads., , , XYZs +#89,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full +##100A8 +Cycles when one or both FPU issue queues are full. Combined Unit 0 + 1. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. +#90,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction +##020A8 +FPU is executing single precision instruction. Combined Unit 0 + Unit 1. +#91,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue that feeds the Fixed Point unit 0 / Load Store Unit 0 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#92,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue that feeds the Fixed Point unit 1 / Load Store Unit 1 is full. This condition will prevent dispatch groups from being dispatched. This event only indicates that the queue was full, not that dispatch was prevented. +#93,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full +##11090 +Cycles when the issue queues for one or both FXU/LSU units is full. Use with caution since this is the sum of cycles when Unit 0 was full plus Unit 1 full. It does not indicate when both units were full. +#94,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#95,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle +##00012 +FXU0 was idle while FXU1 was busy. +#96,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result. Instructions that finish may not necessary complete. +#97,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0001F,100C0 +The Global Completion Table is completely full. +#98,v,g,n,n,PM_GCT_NOSLOT_BR_MPRED,No slot in GCT caused by branch mispredict +##1009C +Cycles when the Global Completion Table has no slots from this thread because of a branch misprediction. +#99,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The General Purpose Register mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#100,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Number of groups, counted at dispatch, that have encountered a branch redirect. Every group constructed from a fetch group that has been redirected will count. +#101,c,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect +##120E5 +Group experienced non-speculative I cache miss or branch redirect +#102,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +A scoreboard operation on a non-renamed resource has blocked dispatch. +#103,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##00002,120E4 +A group that previously attempted dispatch was rejected. +#104,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +A group is available for dispatch. This does not mean it was successfully dispatched. +#105,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Number of groups, counted at dispatch, that have encountered an icache miss redirect. Every group constructed from a fetch group that missed the instruction cache will count. +#106,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (CR mispredict). +#107,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +A demand (not prefetch) miss to the instruction cache was sent to the L2 as a result of a branch prediction redirect (either ALL mispredicted or Target). +#108,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch buffer +##210C7 +A prefetch buffer entry (line) is allocated but the request is not a demand fetch. +#109,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +An instruction prefetch request has been made. +#110,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#111,v,g,n,n,PM_IERAT_XLATE_WR_LP,Large page translation written to ierat +##210C6 +An entry was written into the IERAT as a result of an IERAT miss. This event can be used to count IERAT misses. An ERAT miss that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed. +#112,v,g,n,n,PM_IOPS_CMPL,Internal operations completed +##00001 +Number of internal operations that completed. +#113,v,g,n,n,PM_INST_DISP,Instructions dispatched +##00009 +Number of PowerPC instructions successfully dispatched. +#114,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Cycles when at least one instruction was sent from the fetch unit to the decode unit. +#115,v,g,n,n,PM_INST_FROM_L275_MOD,Instruction fetched from L2.75 modified +##22096 +An instruction fetch group was fetched with modified (M) data from the L2 on a different module than this processor is located. Fetch groups can contain up to 8 instructions +#116,v,g,n,n,PM_INST_FROM_L375_MOD,Instruction fetched from L3.75 modified +##2209D +An instruction fetch group was fetched with modified (M) data from the L3 of a chip on a different module than this processor is located. Fetch groups can contain up to 8 instructions +#117,v,g,n,n,PM_INST_FROM_RMEM,Instruction fetched from remote memory +##22086 +An instruction fetch group was fetched from memory attached to a different module than this proccessor is located on. Fetch groups can contain up to 8 instructions +#118,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#119,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#120,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid,the data cache has been reloaded. Prior to POWER5+ this included data cache reloads due to prefetch activity. With POWER5+ this now only includes reloads due to demand loads. +#121,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#122,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +Cycles that a cache line was written to the instruction cache. +#123,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#124,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#125,v,g,n,s,PM_L2SA_RCLD_DISP,L2 slice A RC load dispatch attempt +##701C0 +A Read/Claim dispatch for a Load was attempted +#126,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#127,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 slice A RC load dispatch attempt failed due to other reasons +##731E0 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#128,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 slice A RC load dispatch attempt failed due to all RC full +##721E0 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#129,v,g,n,s,PM_L2SA_RCST_DISP,L2 slice A RC store dispatch attempt +##702C0 +A Read/Claim dispatch for a Store was attempted. +#130,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#131,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 slice A RC store dispatch attempt failed due to other reasons +##732E0 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#132,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 slice A RC store dispatch attempt failed due to all RC full +##722E0 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#133,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#134,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice A RC dispatch attempt failed due to all CO busy +##713C0 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#135,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#136,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#137,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B, and C. +#138,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#139,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#140,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#141,v,g,n,s,PM_L2SB_RCLD_DISP,L2 slice B RC load dispatch attempt +##701C1 +A Read/Claim dispatch for a Load was attempted +#142,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#143,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 slice B RC load dispatch attempt failed due to other reasons +##731E1 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#144,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 slice B RC load dispatch attempt failed due to all RC full +##721E1 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#145,v,g,n,s,PM_L2SB_RCST_DISP,L2 slice B RC store dispatch attempt +##702C1 +A Read/Claim dispatch for a Store was attempted. +#146,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#147,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 slice B RC store dispatch attempt failed due to other reasons +##732E1 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#148,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 slice B RC store dispatch attempt failed due to all RC full +##722E2 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#149,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#150,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice B RC dispatch attempt failed due to all CO busy +##713C1 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#151,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#152,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#153,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A, B and C. +#154,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#155,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#156,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A, B, and C. +#157,v,g,n,s,PM_L2SC_RCLD_DISP,L2 slice C RC load dispatch attempt +##701C2 +A Read/Claim dispatch for a Load was attempted +#158,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +A Read/Claim dispatch for a load failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#159,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 slice C RC load dispatch attempt failed due to other reasons +##731E2 +A Read/Claim dispatch for a load failed for some reason other than Full or Collision conditions. +#160,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 slice C RC load dispatch attempt failed due to all RC full +##721E2 +A Read/Claim dispatch for a load failed because all RC machines are busy. +#161,v,g,n,s,PM_L2SC_RCST_DISP,L2 slice C RC store dispatch attempt +##702C2 +A Read/Claim dispatch for a Store was attempted. +#162,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +A Read/Claim dispatch for a store failed because of an address conflict. Two RC machines will never both work on the same line or line in the same congruence class at the same time. +#163,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 slice C RC store dispatch attempt failed due to other reasons +##732E2 +A Read/Claim dispatch for a store failed for some reason other than Full or Collision conditions. Rejected dispatches do not count because they have not yet been attempted. +#164,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 slice C RC store dispatch attempt failed due to all RC full +##722E1 +A Read/Claim dispatch for a store failed because all RC machines are busy. +#165,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +A Read/Claim Dispatch was rejected at dispatch because the Castout Machine was busy. In the case of an RC starting up on a miss and the victim is valid, the CO machine must be available for the RC to process the access. If the CO is still busy working on an old castout, then the RC must not-ack the access if it is a miss(re-issued by the CIU). If it is a miss and the CO is available to process the castout, the RC will accept the access. Once the RC has finished, it can restart and process new accesses that result in a hit (or miss that doesn't need a CO) even though the CO is still processing a castout from a previous access. +#166,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 slice C RC dispatch attempt failed due to all CO busy +##713C2 +A Read/Claim dispatch was rejected because all Castout machines were busy. +#167,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A, B, and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#168,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A, B, and C. +#169,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +A store request made from the core hit in the L2 directory. The event is provided on each of the three slices A, B, and C. +#170,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A, B, and C. +#171,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#172,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +Cycles All Castin/Castout machines are busy. +#173,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#174,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#175,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case) Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#176,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#177,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#178,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#179,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +Cycles All Castin/Castout machines are busy. +#180,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 slice +#181,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I). Mu|Me are not included since they are formed due to a prev read op. Tx is not included since it is considered shared at this point. +#182,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#183,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice +#184,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#185,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#186,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +Cycles All Castin/Castout machines are busy. +#187,v,g,n,s,PM_L3SC_HIT,L3 slice C hits +##711C5 +Number of attempts made by this chip cores that resulted in an L3 hit. Reported per L3 Slice +#188,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 snooper detects someone doing a store to a line that is truly M in this L3 (i.e. L3 going M=>I) Mu|Me are not included since they are formed due to a previous read op Tx is not included since it is considered shared at this point. +#189,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 snooper detects someone doing a read to a line that is truly M in this L3(i.e. L3 going M->T or M->I(go_Mu case); Mu|Me are not included since they are formed due to a prev read op). Tx is not included since it is considered shared at this point. +#190,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +Number of attempts made by this chip cores to find data in the L3. Reported per L3 slice. +#191,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 snooper detects someone doing a store to a line that is Sx in this L3(i.e. invalidate hit SX and dispatched). +#192,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +Number of times an L3 retried a snoop because it got two in at the same time (one on snp_a, one on snp_b) +#193,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no corresponding unit 1 event since larx instructions can only execute on unit 0) +#194,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +Load references that miss the Level 1 Data cache, by unit 0. +#195,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C6 +Load references that miss the Level 1 Data cache, by unit 1. +#196,v,g,n,n,PM_LD_REF_L1,L1 D cache load references +##C1090 +Load references to the Level 1 Data Cache. Combined unit 0 + 1. +#197,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +Load references to Level 1 Data Cache, by unit 0. +#198,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C5 +Load references that miss the Level 1 Data cache, by unit 1. +#199,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The LR/CTR mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#200,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E1 +Total cycles the Load Store Unit 0 is busy rejecting instructions. +#201,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +Total D-ERAT Misses by LSU0. Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#202,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#203,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ lhs flushes +##C00C3 +A store was flushed by unit 0 because younger load hits and older store that is already in the SRQ or in the same group. +#204,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1) +#205,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4K boundary). +#206,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed by LSU0 +#207,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +A non-cacheable load was executed by unit 0. +#208,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C40C3 +Total cycles the Load Store Unit 0 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#209,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C40C1 +Total cycles the Load Store Unit 0 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#210,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C40C2 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#211,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ lhs rejects +##C40C0 +Total cycles the Load Store Unit 0 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#212,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C60E1 +Data from a store instruction was forwarded to a load on unit 0. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#213,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E5 +Total cycles the Load Store Unit 1 is busy rejecting instructions. +#214,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#215,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#216,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ lhs flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#217,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64 byte boundary, or 32 byte if it missed the L1). +#218,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4K boundary) +#219,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed by LSU1 +#220,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +A non-cacheable load was executed by Unit 0. +#221,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C40C7 +Total cycles the Load Store Unit 1 is busy rejecting instructions due to an ERAT miss. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#222,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C40C5 +Total cycles the Load Store Unit 1 is busy rejecting instructions because the Load Miss Queue was full. The LMQ has eight entries. If all eight entries are full, subsequent load instructions are rejected. +#223,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C40C6 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Critical Data Forward. When critical data arrives from the storage system it is formatted and immediately forwarded, bypassing the data cache, to the destination register using the result bus. Any instruction the requires the result bus in the same cycle is rejected. Tag update rejects are caused when an instruction requires access to the Dcache directory or ERAT in the same system when they are being updated. +#224,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ lhs rejects +##C40C4 +Total cycles the Load Store Unit 1 is busy rejecting instructions because of Load Hit Store conditions. Loads are rejected when data is needed from a previous store instruction but store forwarding is not possible because the data is not fully contained in the Store Data Queue or is not yet available in the Store Data Queue. +#225,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C60E5 +Data from a store instruction was forwarded to a load on unit 1. A load that misses L1 but becomes a store forward is treated as a load miss and it causes the DL1 load miss event to be counted. It does not go into the LMQ. If a load that hits L1 but becomes a store forward, then it's not treated as a load miss. +#226,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +A flush was initiated by the Load Store Unit +#227,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +This thread was flushed at dispatch because its Load Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#228,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes +##C00A8 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. Combined Unit 0 + 1. +#229,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +This thread was flushed at dispatch because its Store Request Queue was full. This allows the other thread to have more machine resources for it to make progress while this thread is stalled. +#230,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction +##C5090 +LSU executed Floating Point load instruction. Combined Unit 0 + 1. +#231,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The Load Miss Queue was full. +#232,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A data cache miss occurred for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#233,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#234,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#235,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +Cycles when the LRQ is full. +#236,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C60E7 +LRQ slot zero was allocated +#237,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C60E6 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the LRQ is split between the two threads (16 entries each). +#238,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss +##C40A8 +Total cycles the Load Store Unit is busy rejecting instructions due to an ERAT miss. Combined unit 0 + 1. Requests that miss the Derat are rejected and retried until the request hits in the Erat. +#239,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty +##00015 +Cycles the Store Request Queue is empty +#240,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +Cycles the Store Request Queue is full. +#241,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E7 +SRQ Slot zero was allocated +#242,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E6 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. In SMT mode the SRQ is split between the two threads (16 entries each). +#243,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +Cycles that a sync instruction is active in the Store Request Queue. +#244,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +Cycles a LWSYNC instruction was held at dispatch. LWSYNC instructions are held at dispatch until all previous loads are done and all previous stores have issued. LWSYNC enters the Store Request Queue and is sent to the storage subsystem but does not wait for a response. +#245,c,g,n,n,PM_MEM_FAST_PATH_RD_DISP,Fast path memory read dispatched +##731E6 +Fast path memory read dispatched +#246,v,g,n,n,PM_MEM_RQ_DISP_Q16to19,Memory read queue dispatched to queues 16-19 +##727E6 +A memory operation was dispatched to read queue 16,17,18 or 19. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#247,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +A memory write, which was upgraded to high priority, completed. Writes can be upgraded to high priority to ensure that read traffic does not lock out writes. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#248,v,g,n,n,PM_MEM_RQ_DISP_Q12to15,Memory read queue dispatched to queues 12-15 +##732E6 +A memory operation was dispatched to read queue 12,13,14 or 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#249,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +A memory write, which was not upgraded to high priority, completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#250,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Number of Partial Writes dispatched. The MC provides resources to gather partial cacheline writes (Partial line DMA writes & CI-stores) to up to four different cachelines at a time. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#251,v,g,n,n,PM_MEM_PWQ_DISP_Q2or3,Memory partial-write queue dispatched to Write Queue 2 or 3 +##734E6 +Memory partial-write queue dispatched to Write Queue 2 or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#252,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##724E6 +Number of Partial Writes completed. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#253,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Two or more partial-writes have been merged into a single memory write. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#254,v,g,n,n,PM_INST_DISP_ATTEMPT,Instructions dispatch attempted +##120E1 +Number of PowerPC Instructions dispatched (attempted, not filtered by success. +#255,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +A memory read was dispatched. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#256,v,g,n,n,PM_MEM_RQ_DISP_Q0to3,Memory read queue dispatched to queues 0-3 +##702C6 +A memory operation was dispatched to read queue 0,1,2, or 3. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#257,v,g,n,n,PM_MEM_RQ_DISP_Q4to7,Memory read queue dispatched to queues 4-7 +##712C6 +A memory operation was dispatched to read queue 4,5,6 or 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#258,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read cancelled +##721E6 +Speculative memory read cancelled (i.e. cresp = sourced by L2/L3) +#259,v,g,n,n,PM_MEM_WQ_DISP_Q0to7,Memory write queue dispatched to queues 0-7 +##723E6 +A memory operation was dispatched to a write queue in the range between 0 and 7. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#260,v,g,n,n,PM_MEM_WQ_DISP_Q8to15,Memory write queue dispatched to queues 8-15 +##733E6 +A memory operation was dispatched to a write queue in the range between 8 and 15. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#261,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +A memory dclaim or flush operation was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#262,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#263,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished +##00005 +The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete. +#264,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD_CYC,Marked load latency from L2.5 modified +##C70A2 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#265,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified +##C7097 +The processor's Data Cache was reloaded with modified (M) data from the L2 on a different module than this processor is located due to a marked load. +#266,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD_CYC,Marked load latency from L2.75 modified +##C70A3 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#267,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD_CYC,Marked load latency from L3.5 modified +##C70A6 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#268,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified +##C709E +The processor's Data Cache was reloaded with modified (M) data from the L3 of a chip on a different module than this processor is located due to a marked load. +#269,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD_CYC,Marked load latency from L3.75 modified +##C70A7 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#270,v,g,n,n,PM_MRK_DATA_FROM_LMEM_CYC,Marked load latency from local memory +##C70A0 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#271,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory +##C7087 +The processor's Data Cache was reloaded due to a marked load from memory attached to a different module than this proccessor is located on. +#272,v,g,n,n,PM_MRK_DATA_FROM_RMEM_CYC,Marked load latency from remote memory +##C70A1 +Cycles a marked load waited for data from this level of the storage system. Counting begins when a marked load misses the data cache and ends when the data is reloaded into the data cache. To calculate average latency divide this count by the number of marked misses to the same level. +#273,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +A Data SLB miss was caused by a marked instruction. +#274,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6,C60E0 +Data TLB references by a marked instruction that missed the TLB (all page sizes). +#275,v,g,n,n,PM_MRK_DTLB_MISS_16G,Marked Data TLB misses for 16G page +##C608D +Data TLB references to 16GB pages by a marked instruction that missed the TLB. Page size is determined at TLB reload time. +#276,v,g,n,n,PM_MRK_DTLB_REF,Marked Data TLB reference +##C60E4 +Total number of Data TLB references by a marked instruction for all page sizes. Page size is determined at TLB reload time. +#277,v,g,n,n,PM_MRK_DTLB_REF_16G,Marked Data TLB reference for 16G page +##C6086 +Data TLB references by a marked instruction for 16GB pages. +#278,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed +##00013 +A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. +#279,v,g,n,n,PM_MRK_GRP_IC_MISS,Group experienced marked I cache miss +##12091 +A group containing a marked (sampled) instruction experienced an instruction cache miss. +#280,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout +##0000B +The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor +#281,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occurred due to marked load +#282,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#283,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 marked L1 D cache load misses +##820E0 +Load references that miss the Level 1 Data cache, by LSU0. +#284,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 marked L1 D cache load misses +##820E4 +Load references that miss the Level 1 Data cache, by LSU1. +#285,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#286,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ lhs flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#287,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C1 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#288,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C0 +A marked store was flushed from unit 0 because it was unaligned +#289,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#290,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ lhs flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#291,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#292,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#293,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished +##00014 +One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete +#294,v,g,n,n,PM_MRK_LSU_FLUSH_SRQ,Marked SRQ lhs flushes +##81088 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#295,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes +##81090 +A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#296,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#297,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#298,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#299,v,g,n,n,PM_PMC3_OVERFLOW,PMC3 Overflow +##0000A +Overflows from PMC3 are counted. This effectively widens the PMC. The Overflow from the original PMC will not trigger an exception even if the PMU is configured to generate exceptions on overflow. +#300,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified +##83097 +A Page Table Entry was loaded into the TLB with modified (M) data from the L2 on a different module than this processor is located due to a demand load. +#301,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified +##8309E +A Page Table Entry was loaded into the TLB with modified (M) data from the L3 of a chip on a different module than this processor is located, due to a demand load. +#302,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory +##83087 +A Page Table Entry was loaded into the TLB from memory attached to a different module than this proccessor is located on. +#303,v,g,n,n,PM_PTEG_RELOAD_VALID,PTEG reload valid +##830E4 +A Page Table Entry was loaded into the TLB. +#304,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +The memory controller A memory write was dispatched to a write queue. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#305,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +A snoop request for a partial write to memory was retried because the write queues that handle partial writes were full. When this happens the active writes are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#306,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#307,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +A snoop request for a partial write to memory was retried because it matched the cache line of an active write or partial write. When this happens the snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#308,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +A snoop request for a read from memory was retried because the read queues were full. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#309,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +A snoop request for a read from memory was retried because it matched the cache line of an active read. The snoop request is retried because the L2 may be able to source data via intervention for the 2nd read faster than the MC. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#310,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +A snoop request for a read from memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#311,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#312,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A tlbie was snooped from another processor. +#313,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +A snoop request for a write to memory was retried because the write queues were full. When this happens the snoop request is retried and the writes in the write reorder queue are changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#314,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +A snoop request for a write or dclaim to memory was retried because it matched the cacheline of an active read. This event is sent from the Memory Controller clock domain and must be scaled accordingly +#315,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +A snoop request for a write or dclaim to memory was retried because it matched the cache line of an active write. The snoop request is retried and the active write is changed to high priority. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#316,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#317,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#318,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache. Combined Unit 0 + 1. +#319,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +Store references to the Data Cache by LSU0. +#320,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C4 +Store references to the Data Cache by LSU1. +#321,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +The counter is suspended (does not count). +#322,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##41084,410C7 +Cycles that both threads have L2 miss pending. If only one thread has a L2 miss pending the other thread is given priority at decode. If both threads have L2 miss pending decode priority is determined by the number of GCT entries used. +#323,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles this thread was running at priority level 1. Priority level 1 is the lowest and indicates the thread is sleeping. +#324,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles this thread was running at priority level 2. +#325,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles this thread was running at priority level 3. +#326,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles this thread was running at priority level 4. +#327,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles this thread was running at priority level 5. +#328,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles this thread was running at priority level 6. +#329,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles this thread was running at priority level 7. +#330,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles when this thread's priority is equal to the other thread's priority. +#331,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles when this thread's priority is higher than the other thread's priority by 1 or 2. +#332,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles when this thread's priority is higher than the other thread's priority by 3 or 4. +#333,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles when this thread's priority is higher than the other thread's priority by 5 or 6. +#334,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles when this thread's priority is lower than the other thread's priority by 1 or 2. +#335,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles when this thread's priority is lower than the other thread's priority by 3 or 4. +#336,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles when this thread's priority is lower than the other thread's priority by 5 or 6. +#337,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overrides caused by CLB empty +##410C2 +Thread selection was overridden because one thread's CLB was empty. +#338,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overrides caused by GCT imbalance +##410C4 +Thread selection was overridden because of a GCT imbalance. +#339,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overrides caused by ISU holds +##410C5 +Thread selection was overridden because of an ISU hold. +#340,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overrides caused by L2 misses +##410C3 +Thread selection was overridden because one thread was had a L2 miss pending. +#341,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Thread selection picked thread 0 for decode. +#342,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Thread selection picked thread 1 for decode. +#343,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +A hung thread was detected +#344,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +Cycles a TLBIE instruction was held at dispatch. +#345,v,g,n,n,PM_WORK_HELD,Work held +##0000C +RAS Unit has signaled completion to stop and there are groups waiting to complete +#346,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The XER mapper cannot accept any more groups. This condition will prevent dispatch groups from being dispatched. This event only indicates that the mapper was full, not that dispatch was prevented. +#347,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##230E2 +A conditional branch instruction was predicted as taken or not taken. +#348,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##230E3 +The target address of a branch instruction was predicted. +#349,v,g,n,n,PM_MEM_RQ_DISP_Q8to11,Memory read queue dispatched to queues 8-11 +##722E6 +A memory operation was dispatched to read queue 8,9,10 or 11. This event is sent from the Memory Controller clock domain and must be scaled accordingly. +#350,v,g,n,n,PM_SNOOP_RETRY_AB_COLLISION,Snoop retry due to a b collision +##735E6 +Snoop retry due to a b collision +#351,v,g,n,s,PM_MEM_NONSPEC_RD_CANCEL,Non speculative memory read cancelled +##711C6 +A non-speculative read was cancelled because the combined response indicated it was sourced from aother L2 or L3. This event is sent from the Memory Controller clock domain and must be scaled accordingly + +$$$$$$$$ + +{ counter 5 } +#0,v,g,n,n,PM_RUN_INST_CMPL,Run instructions completed +##00009 +Number of run instructions completed. + +$$$$$$$$ + +{ counter 6 } +#0,v,g,n,n,PM_RUN_CYC,Run cycles +##00005 +Processor Cycles gated by the run latch. Operating systems use the run latch to indicate when they are doing useful work. The run latch is typically cleared in the OS idle loop. Gating by the run latch filters out the idle loop. diff --git a/src/event_data/power5+/groups b/src/event_data/power5+/groups new file mode 100644 index 0000000..27b940c --- /dev/null +++ b/src/event_data/power5+/groups @@ -0,0 +1,950 @@ +{ File: power5+/groups +{ Date: 03/15/07 +{ Version: 1.8 +{ (C) Copyright IBM Corporation, 2006, 2007. All Rights Reserved. +{ Contributed by Corey Ashford + +{ Number of groups + 188 + +{ Group descriptions + +#0,312,302,113,21,0,0,pm_utilization,CPI and utilization data +##00005,00009,00009,0000F,00009,00005 +00000000,00000000,0A12121E,00000000 +CPI and utilization data + +#1,2,95,100,21,0,0,pm_completion,Completion and cycle counts +##00013,00004,00013,0000F,00009,00005 +00000000,00000000,2608261E,00000000 +Completion and cycle counts + +#2,105,104,101,113,0,0,pm_group_dispatch,Group dispatch events +##120E3,120E4,130E1,00009,00009,00005 +00000000,4000000E,C6C8C212,00000000 +Group dispatch events + +#3,0,2,12,267,0,0,pm_clb1,CLB fullness +##400C0,400C2,410C6,C70A6,00009,00005 +00000000,015B0001,80848C4C,00000001 +CLB fullness + +#4,6,6,292,112,0,0,pm_clb2,CLB fullness +##400C5,400C6,C70E6,00001,00009,00005 +00000000,01430002,8A8CCC02,00000001 +CLB fullness + +#5,98,97,95,98,0,0,pm_gct_empty,GCT empty reasons +##00004,1009C,10084,1009C,00009,00005 +00000000,40000000,08380838,00000000 +GCT empty reasons + +#6,99,98,96,97,0,0,pm_gct_usage,GCT Usage +##0001F,0001F,0001F,0001F,00009,00005 +00000000,00000000,3E3E3E3E,00000000 +GCT Usage + +#7,242,241,234,234,0,0,pm_lsu1,LSU LRQ and LMQ events +##C60E7,C60E6,C30E6,C30E5,00009,00005 +00000000,020F000F,CECCCCCA,00000000 +LSU LRQ and LMQ events + +#8,247,246,244,240,0,0,pm_lsu2,LSU SRQ events +##C20E7,C20E6,830E5,110C3,00009,00005 +00000000,400E000E,CECCCA86,00000000 +LSU SRQ events + +#9,238,247,236,239,0,0,pm_lsu3,LSU SRQ and LMQ events +##C70E5,C6088,00015,00015,00009,00005 +00000000,030F0004,EA102A2A,00000000 +LSU SRQ and LMQ events + +#10,237,244,236,239,0,0,pm_lsu4,LSU SRQ and LMQ events +##C30E7,110C3,00015,00015,00009,00005 +00000000,40030000,EEA62A2A,00000000 +LSU SRQ and LMQ events + +#11,120,115,26,29,0,0,pm_prefetch1,Prefetch stream allocation +##2209B,220E4,C50C2,830E7,00009,00005 +00000000,8432000D,36C884CE,00000000 +Prefetch stream allocation + +#12,115,13,122,108,0,0,pm_prefetch2,Prefetch events +##00001,220E5,C70E7,210C7,00009,00005 +00000000,81030006,02CACE8E,00000001 +Prefetch events + +#13,1,227,172,112,0,0,pm_prefetch3,L2 prefetch and misc events +##400C1,C2088,C50C3,00001,00009,00005 +00000000,047C0004,82108602,00000001 +L2 prefetch and misc events + +#14,216,225,27,171,0,0,pm_prefetch4,Misc prefetch and reject events +##C40C0,C40C4,830E6,C50C3,00009,00005 +00000000,0CF20002,8088CC86,00000000 +Misc prefetch and reject events + +#15,244,242,53,294,0,0,pm_lsu_reject1,LSU reject events +##C4090,C4088,330E3,81088,00009,00005 +00000000,C8E00002,2010C610,00000001 +LSU reject events + +#16,215,224,112,122,0,0,pm_lsu_reject2,LSU rejects due to reload CDF or tag update collision +##C40C2,C40C6,00001,230E7,00009,00005 +00000000,88C00001,848C02CE,00000001 +LSU rejects due to reload CDF or tag update collision + +#17,213,222,245,344,0,0,pm_lsu_reject3,LSU rejects due to ERAT, held instuctions +##C40C3,C40C7,130E0,130E4,00009,00005 +00000000,48C00003,868EC0C8,00000000 +LSU rejects due to ERAT, held instuctions + +#18,214,223,112,9,0,0,pm_lsu_reject4,LSU0/1 reject LMQ full +##C40C1,C40C5,00001,230E4,00009,00005 +00000000,88C00001,828A02C8,00000001 +LSU0/1 reject LMQ full + +#19,245,243,228,53,0,0,pm_lsu_reject5,LSU misc reject and flush events +##C4088,C4090,110C5,110C7,00009,00005 +00000000,48C00000,10208A8E,00000000 +LSU misc reject and flush events + +#20,115,233,53,26,0,0,pm_flush1,Misc flush events +##00001,C0088,330E3,C10C7,00009,00005 +00000000,C0F00002,0210C68E,00000001 +Misc flush events + +#21,124,113,54,57,0,0,pm_flush2,Flushes due to scoreboard and sync +##800C0,00001,330E2,330E1,00009,00005 +00000000,C0800003,8002C4C2,00000001 +Flushes due to scoreboard and sync + +#22,233,230,112,226,0,0,pm_lsu_flush_srq_lrq,LSU flush by SRQ and LRQ events +##C0090,C0090,00001,110C5,00009,00005 +00000000,40C00000,2020028A,00000001 +LSU flush by SRQ and LRQ events + +#23,207,216,228,112,0,0,pm_lsu_flush_lrq,LSU0/1 flush due to LRQ +##C00C2,C00C6,110C5,00001,00009,00005 +00000000,40C00000,848C8A02,00000001 +LSU0/1 flush due to LRQ + +#24,208,217,112,226,0,0,pm_lsu_flush_srq,LSU0/1 flush due to SRQ +##C00C3,C00C7,00001,110C5,00009,00005 +00000000,40C00000,868E028A,00000001 +LSU0/1 flush due to SRQ + +#25,235,233,8,112,0,0,pm_lsu_flush_unaligned,LSU flush due to unaligned data +##C0088,C0088,230E4,00001,00009,00005 +00000000,80C00002,1010C802,00000001 +LSU flush due to unaligned data + +#26,209,218,228,112,0,0,pm_lsu_flush_uld,LSU0/1 flush due to unaligned load +##C00C0,C00C4,110C5,00001,00009,00005 +00000000,40C00000,80888A02,00000001 +LSU0/1 flush due to unaligned load + +#27,210,219,112,226,0,0,pm_lsu_flush_ust,LSU0/1 flush due to unaligned store +##C00C1,C00C5,00001,110C5,00009,00005 +00000000,40C00000,828A028A,00000001 +LSU0/1 flush due to unaligned store + +#28,232,113,290,229,0,0,pm_lsu_flush_full,LSU flush due to LRQ/SRQ full +##320E7,00001,81088,330E0,00009,00005 +00000000,C0200009,CE0210C0,00000001 +LSU flush due to LRQ/SRQ full + +#29,109,17,112,18,0,0,pm_lsu_stall1,LSU Stalls +##00014,11098,00001,1109A,00009,00005 +00000000,40000000,28300234,00000001 +LSU Stalls + +#30,115,14,16,16,0,0,pm_lsu_stall2,LSU Stalls +##00001,1109A,0000F,1109B,00009,00005 +00000000,40000000,02341E36,00000001 +LSU Stalls + +#31,107,16,112,15,0,0,pm_fxu_stall,FXU Stalls +##120E5,11099,00001,11099,00009,00005 +00000000,40000008,CA320232,00000001 +FXU Stalls + +#32,89,15,112,17,0,0,pm_fpu_stall,FPU Stalls +##10090,1109B,00001,11098,00009,00005 +00000000,40000000,20360230,00000001 +FPU Stalls + +#33,198,7,237,231,0,0,pm_queue_full,BRQ LRQ LMQ queue full +##820E7,100C5,110C2,C30E7,00009,00005 +00000000,400B0009,CE8A84CE,00000000 +BRQ LRQ LMQ queue full + +#34,68,80,88,92,0,0,pm_issueq_full,FPU FX full +##100C3,100C7,110C0,110C4,00009,00005 +00000000,40000000,868E8088,00000000 +FPU FX full + +#35,16,200,97,19,0,0,pm_mapper_full1,CR CTR GPR mapper full +##100C4,100C6,130E5,110C1,00009,00005 +00000000,40000002,888CCA82,00000000 +CR CTR GPR mapper full + +#36,57,351,266,112,0,0,pm_mapper_full2,FPR XER mapper full +##100C1,100C2,C709B,00001,00009,00005 +00000000,41030002,82843602,00000001 +FPR XER mapper full + +#37,325,321,208,220,0,0,pm_misc_load,Non-cachable loads and stcx events +##820E1,820E5,C50C1,C50C5,00009,00005 +00000000,0438000C,C2CA828A,00000001 +Non-cachable loads and stcx events + +#38,205,214,106,107,0,0,pm_ic_demand,ICache demand from BR redirect +##C20E1,C20E5,230E0,230E1,00009,00005 +00000000,800C000F,C2CAC0C2,00000000 +ICache demand from BR redirect + +#39,113,110,108,1,0,0,pm_ic_pref,ICache prefetch +##220E7,220E6,210C7,2208D,00009,00005 +00000000,8000000D,CECC8E1A,00000000 +ICache prefetch + +#40,108,106,121,112,0,0,pm_ic_miss,ICache misses +##12099,120E7,C30E4,00001,00009,00005 +00000000,4003000E,32CEC802,00000001 +ICache misses + +#41,356,307,9,11,0,0,pm_branch_miss,Branch mispredict, TLB and SLB misses +##80088,80088,230E5,230E6,00009,00005 +00000000,80800003,1010CACC,00000000 +Branch mispredict, TLB and SLB misses + +#42,12,11,11,12,0,0,pm_branch1,Branch operations +##23087,23087,23087,23087,00009,00005 +00000000,8000000F,0E0E0E0E,00000000 +Branch operations + +#43,102,100,52,112,0,0,pm_branch2,Branch operations +##12091,120E6,110C6,00001,00009,00005 +00000000,4000000C,22CC8C02,00000001 +Branch operations + +#44,25,30,195,196,0,0,pm_L1_tlbmiss,L1 load and TLB misses +##800C7,800C4,C1088,C1090,00009,00005 +00000000,00B00000,8E881020,00000000 +L1 load and TLB misses + +#45,18,228,322,318,0,0,pm_L1_DERAT_miss,L1 store and DERAT misses +##C3087,80090,C1090,C10C3,00009,00005 +00000000,00B30008,0E202086,00000000 +L1 store and DERAT misses + +#46,30,120,196,195,0,0,pm_L1_slbmiss,L1 load and SLB misses +##800C5,800C1,C10C2,C10C6,00009,00005 +00000000,00B00000,8A82848C,00000000 +L1 load and SLB misses + +#47,34,33,33,34,0,0,pm_dtlbref,Data TLB references +##C2086,C2086,C2086,C2086,00009,00005 +00000000,000C000F,0C0C0C0C,00000000 +Data TLB references + +#48,32,31,31,32,0,0,pm_dtlbmiss,Data TLB misses +##C208D,C208D,C208D,C208D,00009,00005 +00000000,000C000F,1A1A1A1A,00000000 +Data TLB misses + +#49,33,30,16,21,0,0,pm_dtlb,Data TLB references and misses +##C20E4,800C4,0000F,0000F,00009,00005 +00000000,008C0008,C8881E1E,00000000 +Data TLB references and misses + +#50,201,323,195,318,0,0,pm_L1_refmiss,L1 load references and misses and store references and misses +##C10A8,C10A8,C1088,C10C3,00009,00005 +00000000,00300000,50501086,00000000 +L1 load references and misses and store references and misses + +#51,21,23,51,112,0,0,pm_dsource1,L3 cache and memory data access +##C308E,C3087,110C7,00001,00009,00005 +00000000,4003000C,1C0E8E02,00000001 +L3 cache and memory data access + +#52,21,23,19,24,0,0,pm_dsource2,L3 cache and memory data access +##C308E,C3087,C309B,C3087,00009,00005 +00000000,0003000F,1C0E360E,00000000 +L3 cache and memory data access + +#53,19,21,18,22,0,0,pm_dsource_L2,L2 cache data access +##C3097,C3097,C3097,C3097,00009,00005 +00000000,0003000F,2E2E2E2E,00000000 +L2 cache data access + +#54,22,22,22,23,0,0,pm_dsource_L3,L3 cache data access +##C309E,C309E,C309E,C309E,00009,00005 +00000000,0003000F,3C3C3C3C,00000000 +L3 cache data access + +#55,121,116,118,117,0,0,pm_isource1,Instruction source information +##2208D,2208D,2208D,22086,00009,00005 +00000000,8000000F,1A1A1A0C,00000000 +Instruction source information + +#56,118,119,112,1,0,0,pm_isource2,Instruction source information +##22086,22086,00001,2208D,00009,00005 +00000000,8000000D,0C0C021A,00000001 +Instruction source information + +#57,119,117,115,115,0,0,pm_isource_L2,L2 instruction source information +##22096,22096,22096,22096,00009,00005 +00000000,8000000F,2C2C2C2C,00000000 +L2 instruction source information + +#58,122,118,117,116,0,0,pm_isource_L3,L3 instruction source information +##2209D,2209D,2209D,2209D,00009,00005 +00000000,8000000F,3A3A3A3A,00000000 +L3 instruction source information + +#59,305,303,299,300,0,0,pm_pteg_source1,PTEG source information +##83097,83097,83097,83097,00009,00005 +00000000,0002000F,2E2E2E2E,00000000 +PTEG source information + +#60,308,304,303,301,0,0,pm_pteg_source2,PTEG source information +##8309E,8309E,8309E,8309E,00009,00005 +00000000,0002000F,3C3C3C3C,00000000 +PTEG source information + +#61,304,305,300,302,0,0,pm_pteg_source3,PTEG source information +##83087,83087,8309B,83087,00009,00005 +00000000,0002000F,0E0E360E,00000000 +PTEG source information + +#62,307,102,103,26,0,0,pm_pteg_source4,L3 PTEG and group disptach events +##8308E,00002,00002,C10C7,00009,00005 +00000000,00320008,1C04048E,00000000 +L3 PTEG and group disptach events + +#63,130,130,127,127,0,0,pm_L2SA_ld,L2 slice A load events +##701C0,721E0,711C0,731E0,00009,00005 +00000000,30554005,80C080C0,00000000 +L2 slice A load events + +#64,134,134,131,131,0,0,pm_L2SA_st,L2 slice A store events +##702C0,722E0,712C0,732E0,00009,00005 +00000000,30558005,80C080C0,00000000 +L2 slice A store events + +#65,138,140,135,137,0,0,pm_L2SA_st2,L2 slice A store events +##703C0,723E0,713C0,733E0,00009,00005 +00000000,3055C005,80C080C0,00000000 +L2 slice A store events + +#66,146,146,143,143,0,0,pm_L2SB_ld,L2 slice B load events +##701C1,721E1,711C1,731E1,00009,00005 +00000000,30554005,82C282C2,00000000 +L2 slice B load events + +#67,150,150,147,147,0,0,pm_L2SB_st,L2 slice B store events +##702C1,722E2,712C1,732E1,00009,00005 +00000000,30558005,82C482C2,00000000 +L2 slice B store events + +#68,154,156,151,153,0,0,pm_L2SB_st2,L2 slice B store events +##703C1,723E1,713C1,733E1,00009,00005 +00000000,3055C005,82C282C2,00000000 +L2 slice B store events + +#69,162,162,159,159,0,0,pm_L2SC_ld,L2 slice C load events +##701C2,721E2,711C2,731E2,00009,00005 +00000000,30554005,84C484C4,00000000 +L2 slice C load events + +#70,166,166,163,163,0,0,pm_L2SC_st,L2 slice C store events +##702C2,722E1,712C2,732E2,00009,00005 +00000000,30558005,84C284C4,00000000 +L2 slice C store events + +#71,170,172,167,169,0,0,pm_L2SC_st2,L2 slice C store events +##703C2,723E2,713C2,733E2,00009,00005 +00000000,3055C005,84C484C4,00000000 +L2 slice C store events + +#72,180,113,175,177,0,0,pm_L3SA_trans,L3 slice A state transistions +##720E3,00001,730E3,710C3,00009,00005 +00000000,3015000A,C602C686,00000001 +L3 slice A state transistions + +#73,115,184,182,184,0,0,pm_L3SB_trans,L3 slice B state transistions +##00001,720E4,730E4,710C4,00009,00005 +00000000,30150006,02C8C888,00000001 +L3 slice B state transistions + +#74,115,191,189,191,0,0,pm_L3SC_trans,L3 slice C state transistions +##00001,720E5,730E5,710C5,00009,00005 +00000000,30150006,02CACA8A,00000001 +L3 slice C state transistions + +#75,129,138,124,135,0,0,pm_L2SA_trans,L2 slice A state transistions +##720E0,700C0,730E0,710C0,00009,00005 +00000000,3055000A,C080C080,00000000 +L2 slice A state transistions + +#76,145,154,140,151,0,0,pm_L2SB_trans,L2 slice B state transistions +##720E1,700C1,730E1,710C1,00009,00005 +00000000,3055000A,C282C282,00000000 +L2 slice B state transistions + +#77,161,170,156,167,0,0,pm_L2SC_trans,L2 slice C state transistions +##720E2,700C2,730E2,710C2,00009,00005 +00000000,3055000A,C484C484,00000000 +L2 slice C state transistions + +#78,177,181,179,185,0,0,pm_L3SAB_retry,L3 slice A/B snoop retry and all CI/CO busy +##721E3,721E4,731E3,731E4,00009,00005 +00000000,3005100F,C6C8C6C8,00000000 +L3 slice A/B snoop retry and all CI/CO busy + +#79,181,185,174,180,0,0,pm_L3SAB_hit,L3 slice A/B hit and reference +##701C3,701C4,711C3,711C4,00009,00005 +00000000,30501000,86888688,00000000 +L3 slice A/B hit and reference + +#80,191,192,193,187,0,0,pm_L3SC_retry_hit,L3 slice C hit & snoop retry +##721E5,701C5,731E5,711C5,00009,00005 +00000000,3055100A,CA8ACA8A,00000000 +L3 slice C hit & snoop retry + +#81,87,84,84,87,0,0,pm_fpu1,Floating Point events +##00088,00088,01088,01090,00009,00005 +00000000,00000000,10101020,00000000 +Floating Point events + +#82,85,86,85,88,0,0,pm_fpu2,Floating Point events +##00090,00090,01090,01088,00009,00005 +00000000,00000000,20202010,00000000 +Floating Point events + +#83,86,87,61,77,0,0,pm_fpu3,Floating point events +##02088,02088,010C3,010C7,00009,00005 +00000000,0000000C,1010868E,00000000 +Floating point events + +#84,90,88,112,230,0,0,pm_fpu4,Floating point events +##02090,02090,00001,C5090,00009,00005 +00000000,0430000C,20200220,00000001 +Floating point events + +#85,67,79,60,76,0,0,pm_fpu5,Floating point events by unit +##000C2,000C6,010C2,010C6,00009,00005 +00000000,00000000,848C848C,00000000 +Floating point events by unit + +#86,59,72,63,79,0,0,pm_fpu6,Floating point events by unit +##020E0,020E4,010C0,010C4,00009,00005 +00000000,0000000C,C0C88088,00000000 +Floating point events by unit + +#87,60,73,65,80,0,0,pm_fpu7,Floating point events by unit +##000C0,000C4,010C1,010C5,00009,00005 +00000000,00000000,8088828A,00000000 +Floating point events by unit + +#88,70,82,112,66,0,0,pm_fpu8,Floating point events by unit +##020E1,020E5,00001,030E0,00009,00005 +00000000,0000000D,C2CA02C0,00000001 +Floating point events by unit + +#89,69,81,207,219,0,0,pm_fpu9,Floating point events by unit +##020E3,020E7,C50C0,C50C4,00009,00005 +00000000,0430000C,C6CE8088,00000000 +Floating point events by unit + +#90,63,76,112,80,0,0,pm_fpu10,Floating point events by unit +##000C1,000C5,00001,010C5,00009,00005 +00000000,00000000,828A028A,00000001 +Floating point events by unit + +#91,58,71,61,112,0,0,pm_fpu11,Floating point events by unit +##000C3,000C7,010C3,00001,00009,00005 +00000000,00000000,868E8602,00000001 +Floating point events by unit + +#92,71,83,207,112,0,0,pm_fpu12,Floating point events by unit +##020E2,020E6,C50C0,00001,00009,00005 +00000000,0430000C,C4CC8002,00000001 +Floating point events by unit + +#93,96,93,90,95,0,0,pm_fxu1,Fixed Point events +##00012,00012,00012,00012,00009,00005 +00000000,00000000,24242424,00000000 +Fixed Point events + +#94,281,283,93,93,0,0,pm_fxu2,Fixed Point events +##00002,12091,13088,11090,00009,00005 +00000000,40000006,04221020,00000001 +Fixed Point events + +#95,4,4,91,96,0,0,pm_fxu3,Fixed Point events +##400C3,400C4,130E2,130E6,00009,00005 +00000000,40400003,8688C4CC,00000000 +Fixed Point events + +#96,337,335,334,331,0,0,pm_smt_priorities1,Thread priority events +##420E3,420E6,430E3,430E4,00009,00005 +00000000,0005000F,C6CCC6C8,00000000 +Thread priority events + +#97,336,334,336,333,0,0,pm_smt_priorities2,Thread priority events +##420E2,420E5,430E5,430E6,00009,00005 +00000000,0005000F,C4CACACC,00000000 +Thread priority events + +#98,335,333,338,335,0,0,pm_smt_priorities3,Thread priority events +##420E1,420E4,430E2,430E1,00009,00005 +00000000,0005000F,C2C8C4C2,00000000 +Thread priority events + +#99,334,107,340,112,0,0,pm_smt_priorities4,Thread priority events +##420E0,0000B,430E0,00001,00009,00005 +00000000,0005000A,C016C002,00000001 +Thread priority events + +#100,333,327,112,322,0,0,pm_smt_both,Thread common events +##0000B,00013,00001,41084,00009,00005 +00000000,00100000,16260208,00000001 +Thread common events + +#101,321,113,345,342,0,0,pm_smt_selection,Thread selection +##800C3,00001,410C0,410C1,00009,00005 +00000000,00900000,86028082,00000001 +Thread selection + +#102,115,0,341,338,0,0,pm_smt_selectover1,Thread selection overide +##00001,400C0,410C2,410C4,00009,00005 +00000000,00500000,02808488,00000001 +Thread selection overide + +#103,115,20,343,340,0,0,pm_smt_selectover2,Thread selection overide +##00001,0000F,410C5,410C3,00009,00005 +00000000,00100000,021E8A86,00000001 +Thread selection overide + +#104,37,38,37,41,0,0,pm_fabric1,Fabric events +##700C7,720E7,710C7,730E7,00009,00005 +00000000,30550005,8ECE8ECE,00000000 +Fabric events + +#105,45,41,45,52,0,0,pm_fabric2,Fabric data movement +##701C7,721E7,711C7,731E7,00009,00005 +00000000,30550085,8ECE8ECE,00000000 +Fabric data movement + +#106,47,48,47,51,0,0,pm_fabric3,Fabric data movement +##703C7,723E7,713C7,733E7,00009,00005 +00000000,30550185,8ECE8ECE,00000000 +Fabric data movement + +#107,43,40,34,45,0,0,pm_fabric4,Fabric data movement +##702C7,722E7,130E3,712C7,00009,00005 +00000000,70540106,8ECEC68E,00000000 +Fabric data movement + +#108,317,308,315,305,0,0,pm_snoop1,Snoop retry +##700C6,720E6,710C6,730E6,00009,00005 +00000000,30550005,8CCC8CCC,00000000 +Snoop retry + +#109,318,315,312,112,0,0,pm_snoop2,Snoop read retry +##705C6,725E6,715C6,00001,00009,00005 +00000000,30540A04,8CCC8C02,00000001 +Snoop read retry + +#110,323,252,317,249,0,0,pm_snoop3,Snoop write retry +##706C6,726E6,716C6,736E6,00009,00005 +00000000,30550C05,8CCC8CCC,00000000 +Snoop write retry + +#111,315,353,309,306,0,0,pm_snoop4,Snoop partial write retry +##707C6,727E6,717C6,707C6,00009,00005 +00000000,30540E04,8CCC8CAC,00000000 +Snoop partial write retry + +#112,261,263,249,36,0,0,pm_mem_rq,Memory read queue dispatch +##701C6,721E6,711C6,130E7,00009,00005 +00000000,70540205,8CCC8CCE,00000000 +Memory read queue dispatch + +#113,260,261,258,37,0,0,pm_mem_read,Memory read complete and cancel +##702C6,722E6,712C6,00003,00009,00005 +00000000,30540404,8CCC8C06,00000000 +Memory read complete and cancel + +#114,268,264,262,260,0,0,pm_mem_wq,Memory write queue dispatch +##703C6,723E6,713C6,733E6,00009,00005 +00000000,30550605,8CCC8CCC,00000000 +Memory write queue dispatch + +#115,256,257,254,251,0,0,pm_mem_pwq,Memory partial write queue +##704C6,724E6,714C6,734E6,00009,00005 +00000000,30550805,8CCC8CCC,00000000 +Memory partial write queue + +#116,281,284,348,293,0,0,pm_threshold,Thresholding +##00002,820E2,0000B,00014,00009,00005 +00000000,00080004,04C41628,00000001 +Thresholding + +#117,281,300,278,278,0,0,pm_mrk_grp1,Marked group events +##00002,820E3,00005,00013,00009,00005 +00000000,00080004,04C60A26,00000001 +Marked group events + +#118,282,268,279,279,0,0,pm_mrk_grp2,Marked group events +##00015,00005,C70E4,12091,00009,00005 +00000000,41030003,2A0AC822,00000001 +Marked group events + +#119,269,272,264,264,0,0,pm_mrk_dsource1,Marked data from +##C7087,C70A0,C70A2,C70A2,00009,00005 +00000000,010B000F,0E404444,00000001 +Marked data from + +#120,270,270,112,88,0,0,pm_mrk_dsource2,Marked data from +##C7097,C70A2,00001,01088,00009,00005 +00000000,010B000C,2E440210,00000001 +Marked data from + +#121,272,276,268,267,0,0,pm_mrk_dsource3,Marked data from +##C708E,C70A4,C70A6,C70A6,00009,00005 +00000000,010B000F,1C484C4C,00000001 +Marked data from + +#122,275,271,265,272,0,0,pm_mrk_dsource4,Marked data from +##C70A1,C70A3,C7097,C70A1,00009,00005 +00000000,010B000F,42462E42,00000001 +Marked data from + +#123,273,274,270,270,0,0,pm_mrk_dsource5,Marked data from +##C709E,C70A6,C70A0,C70A0,00009,00005 +00000000,010B000F,3C4C4040,00000001 +Marked data from + +#124,271,271,112,266,0,0,pm_mrk_dsource6,Marked data from +##C70A3,C70A3,00001,C70A3,00009,00005 +00000000,010B000D,46460246,00000001 +Marked data from + +#125,274,275,269,269,0,0,pm_mrk_dsource7,Marked data from +##C70A7,C70A7,C709E,C70A7,00009,00005 +00000000,010B000F,4E4E3C4E,00000001 +Marked data from + +#126,280,282,275,277,0,0,pm_mrk_dtlbref,Marked data TLB references +##C6086,C6086,C6086,C6086,00009,00005 +00000000,020C000F,0C0C0C0C,00000001 +Marked data TLB references + +#127,278,280,273,275,0,0,pm_mrk_dtlbmiss,Marked data TLB misses +##C608D,C608D,C608D,C608D,00009,00005 +00000000,020C000F,1A1A1A1A,00000001 +Marked data TLB misses + +#128,279,279,271,21,0,0,pm_mrk_dtlb_dslb,Marked data TLB references and misses and marked data SLB misses +##C60E4,C50C6,C50C7,0000F,00009,00005 +00000000,063C0008,C8AC8E1E,00000001 +Marked data TLB references and misses and marked data SLB misses + +#129,280,113,275,273,0,0,pm_mrk_lbref,Marked TLB and SLB references +##C6086,00001,C6086,C50C7,00009,00005 +00000000,063C000A,0C020C8E,00000001 +Marked TLB and SLB references + +#130,285,113,294,263,0,0,pm_mrk_lsmiss,Marked load and store miss +##82088,00001,00003,00005,00009,00005 +00000000,00080008,1002060A,00000001 +Marked load and store miss + +#131,299,300,291,295,0,0,pm_mrk_ulsflush,Mark unaligned load and store flushes +##00003,820E3,81090,81090,00009,00005 +00000000,00280004,06C62020,00000001 +Mark unaligned load and store flushes + +#132,298,299,276,280,0,0,pm_mrk_misc,Misc marked instructions +##820E6,00003,00014,0000B,00009,00005 +00000000,00080008,CC062816,00000001 +Misc marked instructions + +#133,18,116,322,196,0,0,pm_lsref_L1,Load/Store operations and L1 activity +##C3087,2208D,C1090,C1090,00009,00005 +00000000,8033000C,0E1A2020,00000000 +Load/Store operations and L1 activity + +#134,21,23,322,196,0,0,pm_lsref_L2L3,Load/Store operations and L2, L3 activity +##C308E,C3087,C1090,C1090,00009,00005 +00000000,0033000C,1C0E2020,00000000 +Load/Store operations and L2, L3 activity + +#135,124,30,322,196,0,0,pm_lsref_tlbmiss,Load/Store operations and TLB misses +##800C0,800C4,C1090,C1090,00009,00005 +00000000,00B00000,80882020,00000000 +Load/Store operations and TLB misses + +#136,21,23,195,318,0,0,pm_Dmiss,Data cache misses +##C308E,C3087,C1088,C10C3,00009,00005 +00000000,0033000C,1C0E1086,00000000 +Data cache misses + +#137,17,110,122,171,0,0,pm_prefetchX,Prefetch events +##0000F,220E6,C70E7,C50C3,00009,00005 +00000000,85330006,1ECCCE86,00000000 +Prefetch events + +#138,12,11,11,9,0,0,pm_branchX,Branch operations +##23087,23087,23087,230E4,00009,00005 +00000000,8000000F,0E0E0EC8,00000000 +Branch operations + +#139,70,82,61,66,0,0,pm_fpuX1,Floating point events by unit +##020E1,020E5,010C3,030E0,00009,00005 +00000000,0000000D,C2CA86C0,00000000 +Floating point events by unit + +#140,63,76,65,80,0,0,pm_fpuX2,Floating point events by unit +##000C1,000C5,010C1,010C5,00009,00005 +00000000,00000000,828A828A,00000000 +Floating point events by unit + +#141,58,71,61,77,0,0,pm_fpuX3,Floating point events by unit +##000C3,000C7,010C3,010C7,00009,00005 +00000000,00000000,868E868E,00000000 +Floating point events by unit + +#142,85,84,322,196,0,0,pm_fpuX4,Floating point and L1 events +##00090,00088,C1090,C1090,00009,00005 +00000000,00300000,20102020,00000000 +Floating point and L1 events + +#143,90,88,61,77,0,0,pm_fpuX5,Floating point events +##02090,02090,010C3,010C7,00009,00005 +00000000,0000000C,2020868E,00000000 +Floating point events + +#144,87,86,85,88,0,0,pm_fpuX6,Floating point events +##00088,00090,01090,01088,00009,00005 +00000000,00000000,10202010,00000000 +Floating point events + +#145,85,84,87,88,0,0,pm_fpuX7,Floating point events +##00090,00088,020A8,01088,00009,00005 +00000000,00000002,20105010,00000000 +Floating point events + +#146,17,94,16,88,0,0,pm_hpmcount8,HPM group for set 9 +##0000F,00014,0000F,01088,00009,00005 +00000000,00000000,1E281E10,00000000 +HPM group for set 9 + +#147,303,88,113,230,0,0,pm_hpmcount2,HPM group for set 2 +##00009,02090,00009,C5090,00009,00005 +00000000,04300004,12201220,00000000 +HPM group for set 2 + +#148,17,114,195,318,0,0,pm_hpmcount3,HPM group for set 3 +##0000F,120E1,C1088,C10C3,00009,00005 +00000000,40300004,1EC21086,00000000 +HPM group for set 3 + +#149,356,20,322,196,0,0,pm_hpmcount4,HPM group for set 7 +##80088,0000F,C1090,C1090,00009,00005 +00000000,00B00000,101E2020,00000000 +HPM group for set 7 + +#150,87,84,86,86,0,0,pm_flop,Floating point operations +##00088,00088,000A8,000A8,00009,00005 +00000000,00000000,10105050,00000000 +Floating point operations + +#151,303,20,195,26,0,0,pm_eprof1,Group for use with eprof +##00009,0000F,C1088,C10C7,00009,00005 +00000000,00300000,121E108E,00000000 +Group for use with eprof + +#152,303,323,113,196,0,0,pm_eprof2,Group for use with eprof +##00009,C10A8,00009,C1090,00009,00005 +00000000,00300000,12501220,00000000 +Group for use with eprof + +#153,17,84,87,88,0,0,pm_flip,Group for flips +##0000F,00088,020A8,01088,00009,00005 +00000000,00000002,1E105010,00000000 +Group for flips + +#154,17,30,195,196,0,0,pm_hpmcount5,HPM group for set 5 +##0000F,800C4,C1088,C1090,00009,00005 +00000000,00B00000,1E881020,00000000 +HPM group for set 5 + +#155,17,302,322,318,0,0,pm_hpmcount6,HPM group for set 6 +##0000F,00009,C1090,C10C3,00009,00005 +00000000,00300000,1E122086,00000000 +HPM group for set 6 + +#156,303,23,16,24,0,0,pm_hpmcount7,HPM group for set 8 +##00009,C3087,0000F,C3087,00009,00005 +00000000,00030005,120E1E0E,00000000 +HPM group for set 8 + +#157,281,302,348,293,0,0,pm_ep_threshold,Thresholding +##00002,00009,0000B,00014,00009,00005 +00000000,00000000,04121628,00000001 +Thresholding + +#158,281,302,278,278,0,0,pm_ep_mrk_grp1,Marked group events +##00002,00009,00005,00013,00009,00005 +00000000,00000000,04120A26,00000001 +Marked group events + +#159,282,302,279,279,0,0,pm_ep_mrk_grp2,Marked group events +##00015,00009,C70E4,12091,00009,00005 +00000000,41030003,2A12C822,00000001 +Marked group events + +#160,269,302,264,264,0,0,pm_ep_mrk_dsource1,Marked data from +##C7087,00009,C70A2,C70A2,00009,00005 +00000000,010B000B,0E124444,00000001 +Marked data from + +#161,270,302,277,88,0,0,pm_ep_mrk_dsource2,Marked data from +##C7097,00009,820E2,01088,00009,00005 +00000000,010B0008,2E12E410,00000001 +Marked data from + +#162,303,276,268,267,0,0,pm_ep_mrk_dsource3,Marked data from +##00009,C70A4,C70A6,C70A6,00009,00005 +00000000,010B0007,12484C4C,00000001 +Marked data from + +#163,303,271,265,272,0,0,pm_ep_mrk_dsource4,Marked data from +##00009,C70A3,C7097,C70A1,00009,00005 +00000000,010B0007,12462E42,00000001 +Marked data from + +#164,273,302,270,270,0,0,pm_ep_mrk_dsource5,Marked data from +##C709E,00009,C70A0,C70A0,00009,00005 +00000000,010B000B,3C124040,00000001 +Marked data from + +#165,303,271,112,266,0,0,pm_ep_mrk_dsource6,Marked data from +##00009,C70A3,00001,C70A3,00009,00005 +00000000,010B0005,12460246,00000001 +Marked data from + +#166,303,275,269,269,0,0,pm_ep_mrk_dsource7,Marked data from +##00009,C70A7,C709E,C70A7,00009,00005 +00000000,010B0007,124E3C4E,00000001 +Marked data from + +#167,303,280,273,275,0,0,pm_ep_mrk_lbmiss,Marked TLB and SLB misses +##00009,C608D,C608D,C608D,00009,00005 +00000000,020C0007,121A1A1A,00000001 +Marked TLB and SLB misses + +#168,303,282,275,277,0,0,pm_ep_mrk_dtlbref,Marked data TLB references +##00009,C6086,C6086,C6086,00009,00005 +00000000,020C0007,120C0C0C,00000001 +Marked data TLB references + +#169,303,280,273,275,0,0,pm_ep_mrk_dtlbmiss,Marked data TLB misses +##00009,C608D,C608D,C608D,00009,00005 +00000000,020C0007,121A1A1A,00000001 +Marked data TLB misses + +#170,280,302,275,273,0,0,pm_ep_mrk_lbref,Marked TLB and SLB references +##C6086,00009,C6086,C50C7,00009,00005 +00000000,063C000A,0C120C8E,00000001 +Marked TLB and SLB references + +#171,285,302,294,263,0,0,pm_ep_mrk_lsmiss,Marked load and store miss +##82088,00009,00003,00005,00009,00005 +00000000,00080008,1012060A,00000001 +Marked load and store miss + +#172,299,302,291,295,0,0,pm_ep_mrk_ulsflush,Mark unaligned load and store flushes +##00003,00009,81090,81090,00009,00005 +00000000,00200000,06122020,00000001 +Mark unaligned load and store flushes + +#173,303,299,276,280,0,0,pm_ep_mrk_misc1,Misc marked instructions +##00009,00003,00014,0000B,00009,00005 +00000000,00000000,12062816,00000001 +Misc marked instructions + +#174,303,270,267,281,0,0,pm_ep_mrk_misc2,Misc marked instructions +##00009,C70A2,C70AF,820E2,00009,00005 +00000000,010B0006,12445EE4,00000001 +Misc marked instructions + +#175,303,274,272,271,0,0,pm_ep_mrk_misc3,Misc marked instructions +##00009,C70A6,C50C6,C7087,00009,00005 +00000000,053B0005,124C8C0E,00000001 +Misc marked instructions + +#176,278,302,274,265,0,0,pm_ep_mrk_misc4,Misc marked instructions +##C608D,00009,C60E4,C7097,00009,00005 +00000000,030F0009,1A12E82E,00000001 +Misc marked instructions + +#177,280,302,112,286,0,0,pm_ep_mrk_misc5,Misc marked instructions +##C6086,00009,00001,810C3,00009,00005 +00000000,022C0008,0C120286,00000001 +Misc marked instructions + +#178,278,302,288,292,0,0,pm_ep_mrk_misc6,Misc marked instructions +##C608D,00009,810C4,810C5,00009,00005 +00000000,022C0008,1A12888A,00000001 +Misc marked instructions + +#179,303,272,284,288,0,0,pm_ep_mrk_misc7,Misc marked instructions +##00009,C70A0,810C1,810C0,00009,00005 +00000000,012B0004,12408280,00000001 +Misc marked instructions + +#180,303,268,282,286,0,0,pm_ep_mrk_misc8,Misc marked instructions +##00009,00005,810C2,810C3,00009,00005 +00000000,00200000,120A8486,00000001 +Misc marked instructions + +#181,303,292,287,297,0,0,pm_ep_mrk_misc9,Misc marked instructions +##00009,810C6,810C7,820E6,00009,00005 +00000000,00280000,12AC8EEC,00000001 +Misc marked instructions + +#182,303,286,281,298,0,0,pm_ep_mrk_misc10,Misc marked instructions +##00009,820E0,820E4,820E3,00009,00005 +00000000,00080004,12C0E8E6,00000001 +Misc marked instructions + +#183,303,268,264,268,0,0,pm_ep_mrk_misc11,Misc marked instructions +##00009,00005,C70A2,C709E,00009,00005 +00000000,01030003,120A443C,00000001 +Misc marked instructions + +#184,303,296,290,294,0,0,pm_ep_mrk_misc12,Misc marked instructions +##00009,810A8,81088,81088,00009,00005 +00000000,00200000,12501010,00000001 +Misc marked instructions + +#185,269,302,266,296,0,0,pm_ep_mrk_misc13,Misc marked instructions +##C7087,00009,C709B,C70E6,00009,00005 +00000000,0103000B,0E1236CC,00000001 +Misc marked instructions + +#186,303,94,276,293,0,0,pm_ep_mrk_misc14,Misc marked instructions +##00009,00014,00014,00014,00009,00005 +00000000,00000000,12282828,00000001 +Misc marked instructions + +#187,303,283,278,278,0,0,pm_ep_mrk_misc15,Misc marked instructions +##00009,12091,00005,00013,00009,00005 +00000000,40000004,12220A26,00000001 +Misc marked instructions diff --git a/src/event_data/power5/events b/src/event_data/power5/events new file mode 100644 index 0000000..54aa3b8 --- /dev/null +++ b/src/event_data/power5/events @@ -0,0 +1,2443 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/power5/events +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ counter 1 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +Cycles no instructions in CLB +#1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed +##00013 +A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. +#3,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#8,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#9,v,g,n,n,PM_BR_UNCOND,Unconditional branch +##23087 +Unconditional branch +#10,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles CLB full +#11,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#12,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#13,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 +##C3087 +DL1 was reloaded from the local L2 due to a demand load +#14,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared +##C3097 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load +#15,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified +##C30A3 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a demand load. +#16,v,g,n,n,PM_DATA_FROM_L3,Data loaded from L3 +##C308E +DL1 was reloaded from the local L3 due to a demand load +#17,v,g,n,n,PM_DATA_FROM_L35_SHR,Data loaded from L3.5 shared +##C309E +Data loaded from L3.5 shared +#18,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified +##C30A7 +Data loaded from L3.75 modified +#19,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory +##C30A1 +Data loaded from remote memory +#20,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#21,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#22,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#23,v,g,n,n,PM_DTLB_MISS_16M,Data TLB miss for 16M page +##C40C4 +Data TLB miss for 16M page +#24,v,g,n,n,PM_DTLB_MISS_4K,Data TLB miss for 4K page +##C40C0 +Data TLB miss for 4K page +#25,v,g,n,n,PM_DTLB_REF_16M,Data TLB reference for 16M page +##C40C6 +Data TLB reference for 16M page +#26,v,g,n,n,PM_DTLB_REF_4K,Data TLB reference for 4K page +##C40C2 +Data TLB reference for 4K page +#27,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Fabric command issued +#28,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +dclaim issued +#29,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Hold buffer to NN empty +#30,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Hold buffer to VN empty +#31,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +M1 to P1 sidecar empty +#32,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +P1 to M1 sidecar empty +#33,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +PN to NN beat went straight to its destination +#34,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +PN to VN beat went straight to its destination +#35,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#36,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#37,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +This signal is active for one cycle when one of the operands is denormalized. +#38,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#39,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#40,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#41,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#42,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +This signal is active for one cycle when fp0 is executing single precision instruction. +#43,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#44,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +This signal is active for one cycle when fp0 is executing a store instruction. +#45,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#46,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +This signal is active for one cycle when one of the operands is denormalized. +#47,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#48,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#49,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#50,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#51,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +This signal is active for one cycle when fp1 is executing single precision instruction. +#52,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#53,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +This signal is active for one cycle when fp1 is executing a store instruction. +#54,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data +##02088 +This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1 +#55,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction +##00088 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1 +#56,v,g,n,n,PM_FPU_1FLOP,FPU executed one flop instruction +##00090 +This event counts the number of one flop instructions. These could be fadd*, fmul*, fsub*, fneg+, fabs+, fnabs+, fres+, frsqrte+, fcmp**, or fsel where XYZ* means XYZ, XYZs, XYZ., XYZs., XYZ+ means XYZ, XYZ., and XYZ** means XYZu, XYZo. +#57,c,g,n,n,PM_FPU_FULL_CYC,Cycles FPU issue queue full +##10090 +Cycles when one or both FPU issue queues are full +#58,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction +##02090 +FPU is executing single precision instruction. Combined Unit 0 + Unit 1 +#59,u,g,n,n,PM_FXU_IDLE,FXU idle +##00012 +FXU0 and FXU1 are both idle +#60,v,g,n,n,PM_GCT_NOSLOT_CYC,Cycles no GCT slot allocated +##00004 +Cycles this thread does not have any slots allocated in the GCT. +#61,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##100C0 +The ISU sends a signal indicating the gct is full. +#62,v,g,n,s,PM_GCT_USAGE_00to59_CYC,Cycles GCT less than 60% full +##0001F +Cycles GCT less than 60% full +#63,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Group experienced branch redirect +#64,v,g,n,n,PM_GRP_BR_REDIR_NONSPEC,Group experienced non-speculative branch redirect +##120E5 +Group experienced non-speculative branch redirect +#65,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##120E4 +A group that previously attempted dispatch was rejected. +#66,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#67,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Group experienced I cache miss +#68,v,g,n,n,PM_GRP_IC_MISS_BR_REDIR_NONSPEC,Group experienced non-speculative I cache miss or branch redirect +##12091 +Group experienced non-speculative I cache miss or branch redirect +#69,v,g,n,n,PM_GRP_IC_MISS_NONSPEC,Group experienced non-speculative I cache miss +##12099 +Group experienced non-speculative I cache miss +#70,v,g,n,n,PM_GRP_MRK,Group marked in IDU +##00014 +A group was sampled (marked) +#71,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#72,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#73,v,g,n,n,PM_INST_CMPL,Instructions completed +##00001 +Number of Eligible Instructions that completed. +#74,v,g,n,n,PM_INST_DISP,Instructions dispatched +##120E1 +The ISU sends the number of instructions dispatched. +#75,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#76,v,g,n,n,PM_INST_FROM_L2,Instructions fetched from L2 +##22086 +An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions +#77,v,g,n,n,PM_INST_FROM_L25_SHR,Instruction fetched from L2.5 shared +##22096 +Instruction fetched from L2.5 shared +#78,v,g,n,n,PM_INST_FROM_L3,Instruction fetched from L3 +##2208D +An instruction fetch group was fetched from L3. Fetch Groups can contain up to 8 instructions +#79,v,g,n,n,PM_INST_FROM_L35_SHR,Instruction fetched from L3.5 shared +##2209D +Instruction fetched from L3.5 shared +#80,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#81,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#82,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#83,v,g,n,s,PM_L2SA_RCLD_DISP,L2 Slice A RC load dispatch attempt +##701C0 +L2 Slice A RC load dispatch attempt +#84,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 Slice A RC load dispatch attempt failed due to all RC full +##721E0 +L2 Slice A RC load dispatch attempt failed due to all RC full +#85,v,g,n,s,PM_L2SA_RCST_DISP,L2 Slice A RC store dispatch attempt +##702C0 +L2 Slice A RC store dispatch attempt +#86,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 Slice A RC store dispatch attempt failed due to all RC full +##722E0 +L2 Slice A RC store dispatch attempt failed due to all RC full +#87,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 Slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +L2 Slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#88,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#89,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#90,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#91,v,g,n,s,PM_L2SB_RCLD_DISP,L2 Slice B RC load dispatch attempt +##701C1 +L2 Slice B RC load dispatch attempt +#92,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 Slice B RC load dispatch attempt failed due to all RC full +##721E1 +L2 Slice B RC load dispatch attempt failed due to all RC full +#93,v,g,n,s,PM_L2SB_RCST_DISP,L2 Slice B RC store dispatch attempt +##702C1 +L2 Slice B RC store dispatch attempt +#94,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 Slice B RC store dispatch attempt failed due to all RC full +##722E1 +L2 Slice B RC store dispatch attempt failed due to all RC full +#95,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 Slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +L2 Slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#96,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#97,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#98,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#99,v,g,n,s,PM_L2SC_RCLD_DISP,L2 Slice C RC load dispatch attempt +##701C2 +L2 Slice C RC load dispatch attempt +#100,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 Slice C RC load dispatch attempt failed due to all RC full +##721E2 +L2 Slice C RC load dispatch attempt failed due to all RC full +#101,v,g,n,s,PM_L2SC_RCST_DISP,L2 Slice C RC store dispatch attempt +##702C2 +L2 Slice C RC store dispatch attempt +#102,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 Slice C RC store dispatch attempt failed due to all RC full +##722E2 +L2 Slice C RC store dispatch attempt failed due to all RC full +#103,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 Slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +L2 Slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#104,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#105,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#106,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +L3 slice A active for every cycle all CI/CO machines busy +#107,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 slice A transition from modified to TAG +#108,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +L3 slice A references +#109,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +L3 slice B active for every cycle all CI/CO machines busy +#110,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 slice B transition from modified to TAG +#111,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +L3 slice B references +#112,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +L3 slice C active for every cycle all CI/CO machines busy +#113,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 slice C transition from modified to TAG +#114,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +L3 slice C references +#115,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#116,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#117,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E3 +LSU unit 0 busy due to reject +#118,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#119,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#120,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C00C3 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#121,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#122,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#123,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C60E3 +LSU0 reject due to ERAT miss +#124,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C60E1 +LSU0 reject due to LMQ full or missed data coming +#125,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C60E2 +LSU0 reject due to reload CDF or tag update collision +#126,v,g,n,n,PM_LSU0_REJECT_SRQ_LHS,LSU0 SRQ rejects +##C60E0 +LSU0 reject due to load hit store +#127,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C20E0 +Data from a store instruction was forwarded to a load on unit 0 +#128,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E7 +LSU unit 1 is busy due to reject +#129,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#130,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#131,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#132,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#133,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#134,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C60E7 +LSU1 reject due to ERAT miss +#135,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C60E5 +LSU1 reject due to LMQ full or missed data coming +#136,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C60E6 +LSU1 reject due to reload CDF or tag update collision +#137,v,g,n,n,PM_LSU1_REJECT_SRQ_LHS,LSU1 SRQ rejects +##C60E4 +LSU1 reject due to load hit store +#138,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C20E4 +Data from a store instruction was forwarded to a load on unit 1 +#139,v,g,n,n,PM_LSU_BUSY_REJECT,LSU busy due to reject +##C2090 +LSU (unit 0 + unit 1) is busy due to reject +#140,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +Flush caused by LRQ full +#141,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes +##C0090 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#142,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes +##C0088 +A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#143,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C20E6 +LRQ slot zero was allocated +#144,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C20E2 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#145,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss +##C6090 +LSU reject due to ERAT miss +#146,v,g,n,n,PM_LSU_REJECT_SRQ_LHS,LSU SRQ rejects +##C6088 +LSU reject due to load hit store +#147,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E5 +SRQ Slot zero was allocated +#148,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E1 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#149,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded +##C2088 +Data from a store instruction was forwarded to a load +#150,v,g,n,s,PM_MEM_FAST_PATH_RD_CMPL,Fast path memory read completed +##722E6 +Fast path memory read completed +#151,v,g,n,s,PM_MEM_HI_PRIO_PW_CMPL,High priority partial-write completed +##727E6 +High priority partial-write completed +#152,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +High priority write completed +#153,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Memory partial-write queue dispatched +#154,v,g,n,s,PM_MEM_PWQ_DISP_BUSY2or3,Memory partial-write queue dispatched with 2-3 queues busy +##724E6 +Memory partial-write queue dispatched with 2-3 queues busy +#155,v,g,n,s,PM_MEM_READ_CMPL,Memory read completed or canceled +##702C6 +Memory read completed or canceled +#156,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +Memory read queue dispatched +#157,v,g,n,s,PM_MEM_RQ_DISP_BUSY8to15,Memory read queue dispatched with 8-15 queues busy +##721E6 +Memory read queue dispatched with 8-15 queues busy +#158,v,g,n,s,PM_MEM_WQ_DISP_BUSY1to7,Memory write queue dispatched with 1-7 queues busy +##723E6 +Memory write queue dispatched with 1-7 queues busy +#159,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +Memory write queue dispatched due to write +#160,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 +##C7087 +DL1 was reloaded from the local L2 due to a marked demand load +#161,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared +##C7097 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load +#162,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified +##C70A3 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a marked demand load. +#163,v,g,n,n,PM_MRK_DATA_FROM_L3,Marked data loaded from L3 +##C708E +DL1 was reloaded from the local L3 due to a marked demand load +#164,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR,Marked data loaded from L3.5 shared +##C709E +Marked data loaded from L3.5 shared +#165,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified +##C70A7 +Marked data loaded from L3.75 modified +#166,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory +##C70A1 +Marked data loaded from remote memory +#167,v,g,n,n,PM_MRK_DTLB_MISS_16M,Marked Data TLB misses for 16M page +##C40C5 +Marked Data TLB misses for 16M page +#168,v,g,n,n,PM_MRK_DTLB_MISS_4K,Marked Data TLB misses for 4K page +##C40C1 +Marked Data TLB misses for 4K page +#169,v,g,n,n,PM_MRK_DTLB_REF_16M,Marked Data TLB reference for 16M page +##C40C7 +Marked Data TLB reference for 16M page +#170,v,g,n,n,PM_MRK_DTLB_REF_4K,Marked Data TLB reference for 4K page +##C40C3 +Marked Data TLB reference for 4K page +#171,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched +##00002 +A group containing a sampled instruction was dispatched +#172,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued +##00015 +A sampled instruction was issued +#173,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occured due to marked load +#174,v,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of PPC instructions completed. +#175,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses +##82088 +Marked L1 D cache load misses +#176,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##820E0 +A marked load, executing on unit 0, missed the dcache +#177,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##820E4 +A marked load, executing on unit 1, missed the dcache +#178,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#179,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed +##00003 +A sampled store has completed (data home) +#180,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#181,v,g,n,n,PM_PMC4_OVERFLOW,PMC4 Overflow +##0000A +PMC4 Overflow +#182,v,g,n,n,PM_PMC5_OVERFLOW,PMC5 Overflow +##0001A +PMC5 Overflow +#183,v,g,n,n,PM_PTEG_FROM_L2,PTEG loaded from L2 +##83087 +PTEG loaded from L2 +#184,v,g,n,n,PM_PTEG_FROM_L25_SHR,PTEG loaded from L2.5 shared +##83097 +PTEG loaded from L2.5 shared +#185,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified +##830A3 +PTEG loaded from L2.75 modified +#186,v,g,n,n,PM_PTEG_FROM_L3,PTEG loaded from L3 +##8308E +PTEG loaded from L3 +#187,v,g,n,n,PM_PTEG_FROM_L35_SHR,PTEG loaded from L3.5 shared +##8309E +PTEG loaded from L3.5 shared +#188,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified +##830A7 +PTEG loaded from L3.75 modified +#189,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory +##830A1 +PTEG loaded from remote memory +#190,v,g,n,n,PM_RUN_CYC,Run cycles +##00005 +Processor Cycles gated by the run latch +#191,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +Snoop dclaim/flush retry due to write/dclaim queues full +#192,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +Snoop partial-write retry due to collision with active read queue +#193,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +Snoop read retry due to read queue full +#194,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +Snoop read retry due to collision with active read queue +#195,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#196,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#197,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +Snoop write/dclaim retry due to collision with active read queue +#198,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#199,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#200,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +Suspended +#201,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition +##00018 +When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 +#202,v,g,n,s,PM_THRD_ONE_RUN_CYC,One of the threads in run cycles +##0000B +One of the threads in run cycles +#203,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles thread running at priority level 1 +#204,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles thread running at priority level 2 +#205,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles thread running at priority level 3 +#206,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles thread running at priority level 4 +#207,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles thread running at priority level 5 +#208,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles thread running at priority level 6 +#209,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles thread running at priority level 7 +#210,v,g,n,n,PM_TLB_MISS,TLB misses +##80088 +TLB misses +#211,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#212,v,g,n,n,PM_INST_FROM_L2MISS,Instructions fetched missed L2 +##2209B +An instruction fetch group was fetched from beyond L2. + +$$$$$$$$ + +{ counter 2 } +#0,v,g,n,n,PM_0INST_CLB_CYC,Cycles no instructions in CLB +##400C0 +Cycles no instructions in CLB +#1,v,g,n,n,PM_1INST_CLB_CYC,Cycles 1 instruction in CLB +##400C1 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#2,v,g,n,n,PM_2INST_CLB_CYC,Cycles 2 instructions in CLB +##400C2 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#3,v,g,n,n,PM_3INST_CLB_CYC,Cycles 3 instructions in CLB +##400C3 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#4,v,g,n,n,PM_4INST_CLB_CYC,Cycles 4 instructions in CLB +##400C4 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#5,v,g,n,n,PM_5INST_CLB_CYC,Cycles 5 instructions in CLB +##400C5 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#6,v,g,n,n,PM_6INST_CLB_CYC,Cycles 6 instructions in CLB +##400C6 +The cache line buffer (CLB) is an 8-deep, 4-wide instruction buffer. Fullness is indicated in the 8 valid bits associated with each of the 4-wide slots with full(0) correspanding to the number of cycles there are 8 instructions in the queue and full (7) corresponding to the number of cycles there is 1 instruction in the queue. This signal gives a real time history of the number of instruction quads valid in the instruction queue. +#7,u,g,n,s,PM_BRQ_FULL_CYC,Cycles branch queue full +##100C5 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#8,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##23087 +A conditional branch was predicted, target prediction +#9,v,g,n,n,PM_CLB_FULL_CYC,Cycles CLB full +##220E5 +Cycles CLB full +#10,v,g,n,n,PM_CMPLU_STALL_DCACHE_MISS,Completion stall caused by D cache miss +##1109A +Completion stall caused by D cache miss +#11,v,g,n,n,PM_CMPLU_STALL_FDIV,Completion stall caused by FDIV or FQRT instruction +##1109B +Completion stall caused by FDIV or FQRT instruction +#12,v,g,n,n,PM_CMPLU_STALL_FXU,Completion stall caused by FXU instruction +##11099 +Completion stall caused by FXU instruction +#13,v,g,n,n,PM_CMPLU_STALL_LSU,Completion stall caused by LSU instruction +##11098 +Completion stall caused by LSU instruction +#14,v,g,n,s,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##100C4 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#15,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#16,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##C3097 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load +#17,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified +##C309E +Data loaded from L3.5 modified +#18,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory +##C3087 +Data loaded from local memory +#19,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##800C7 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#20,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##800C5 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#21,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##800C4 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#22,v,g,n,n,PM_DTLB_MISS_16M,Data TLB miss for 16M page +##C40C4 +Data TLB miss for 16M page +#23,v,g,n,n,PM_DTLB_MISS_4K,Data TLB miss for 4K page +##C40C0 +Data TLB miss for 4K page +#24,v,g,n,n,PM_DTLB_REF_16M,Data TLB reference for 16M page +##C40C6 +Data TLB reference for 16M page +#25,v,g,n,n,PM_DTLB_REF_4K,Data TLB reference for 4K page +##C40C2 +Data TLB reference for 4K page +#26,v,g,n,s,PM_FAB_CMD_ISSUED,Fabric command issued +##700C7 +Fabric command issued +#27,v,g,n,s,PM_FAB_DCLAIM_ISSUED,dclaim issued +##720E7 +dclaim issued +#28,v,g,n,s,PM_FAB_HOLDtoNN_EMPTY,Hold buffer to NN empty +##722E7 +Hold buffer to NN empty +#29,v,g,n,s,PM_FAB_HOLDtoVN_EMPTY,Hold buffer to VN empty +##721E7 +Hold buffer to VN empty +#30,v,g,n,s,PM_FAB_M1toP1_SIDECAR_EMPTY,M1 to P1 sidecar empty +##702C7 +M1 to P1 sidecar empty +#31,v,g,n,s,PM_FAB_P1toM1_SIDECAR_EMPTY,P1 to M1 sidecar empty +##701C7 +P1 to M1 sidecar empty +#32,v,g,n,s,PM_FAB_PNtoNN_DIRECT,PN to NN beat went straight to its destination +##703C7 +PN to NN beat went straight to its destination +#33,v,g,n,s,PM_FAB_PNtoVN_DIRECT,PN to VN beat went straight to its destination +##723E7 +PN to VN beat went straight to its destination +#34,v,g,n,s,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##100C1 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#35,v,g,n,n,PM_FPU0_1FLOP,FPU0 executed add, mult, sub, cmp or sel instruction +##000C3 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#36,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##020E0 +This signal is active for one cycle when one of the operands is denormalized. +#37,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##000C0 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#38,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##000C1 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#39,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##000C2 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#40,v,g,n,s,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##100C3 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#41,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##020E3 +This signal is active for one cycle when fp0 is executing single precision instruction. +#42,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##020E1 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#43,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##020E2 +This signal is active for one cycle when fp0 is executing a store instruction. +#44,v,g,n,n,PM_FPU1_1FLOP,FPU1 executed add, mult, sub, cmp or sel instruction +##000C7 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#45,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##020E4 +This signal is active for one cycle when one of the operands is denormalized. +#46,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##000C4 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#47,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##000C5 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#48,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##000C6 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#49,v,g,n,s,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##100C7 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#50,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##020E7 +This signal is active for one cycle when fp1 is executing single precision instruction. +#51,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##020E5 +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#52,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##020E6 +This signal is active for one cycle when fp1 is executing a store instruction. +#53,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction +##00090 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#54,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction +##00088 +This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#55,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 +##02088 +FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1 +#56,v,g,n,n,PM_FPU_STF,FPU executed store instruction +##02090 +FPU is executing a store instruction. Combined Unit 0 + Unit 1 +#57,u,g,n,n,PM_FXU_BUSY,FXU busy +##00012 +FXU0 and FXU1 are both busy +#58,v,g,n,n,PM_FXU_FIN,FXU produced a result +##00014 +The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. +#59,v,g,n,n,PM_GCT_NOSLOT_IC_MISS,No slot in GCT caused by I cache miss +##1009C +This thread has no slot in the GCT because of an I cache miss +#60,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##100C0 +The ISU sends a signal indicating the gct is full. +#61,v,g,n,s,PM_GCT_USAGE_60to79_CYC,Cycles GCT 60-79% full +##0001F +Cycles GCT 60-79% full +#62,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##120E6 +Group experienced branch redirect +#63,v,g,n,n,PM_GRP_BR_REDIR_NONSPEC,Group experienced non-speculative branch redirect +##120E5 +Group experienced non-speculative branch redirect +#64,v,g,n,n,PM_GRP_DISP,Group dispatches +##00002 +A group was dispatched +#65,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##120E4 +A group that previously attempted dispatch was rejected. +#66,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##120E3 +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#67,v,g,n,n,PM_GRP_IC_MISS,Group experienced I cache miss +##120E7 +Group experienced I cache miss +#68,v,g,n,n,PM_HV_CYC,Hypervisor Cycles +##0000B +Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0) +#69,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##220E6 +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#70,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##220E7 +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#71,v,g,n,n,PM_INST_CMPL,Instructions completed +##00001 +Number of Eligible Instructions that completed. +#72,v,g,n,n,PM_INST_DISP,Instructions dispatched +##120E1 +The ISU sends the number of instructions dispatched. +#73,v,g,n,n,PM_INST_FETCH_CYC,Cycles at least 1 instruction fetched +##220E4 +Asserted each cycle when the IFU sends at least one instruction to the IDU. +#74,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 +##2208D +An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions +#75,v,g,n,n,PM_INST_FROM_L25_MOD,Instruction fetched from L2.5 modified +##22096 +Instruction fetched from L2.5 modified +#76,v,g,n,n,PM_INST_FROM_L35_MOD,Instruction fetched from L3.5 modified +##2209D +Instruction fetched from L3.5 modified +#77,v,g,n,n,PM_INST_FROM_LMEM,Instruction fetched from local memory +##22086 +Instruction fetched from local memory +#78,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##800C1 +A SLB miss for an instruction fetch as occurred +#79,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##800C0 +A TLB miss for an Instruction Fetch has occurred +#80,v,g,n,s,PM_L2SA_MOD_TAG,L2 slice A transition from modified to tagged +##720E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#81,v,g,n,s,PM_L2SA_RCLD_DISP,L2 Slice A RC load dispatch attempt +##701C0 +L2 Slice A RC load dispatch attempt +#82,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_RC_FULL,L2 Slice A RC load dispatch attempt failed due to all RC full +##721E0 +L2 Slice A RC load dispatch attempt failed due to all RC full +#83,v,g,n,s,PM_L2SA_RCST_DISP,L2 Slice A RC store dispatch attempt +##702C0 +L2 Slice A RC store dispatch attempt +#84,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_RC_FULL,L2 Slice A RC store dispatch attempt failed due to all RC full +##722E0 +L2 Slice A RC store dispatch attempt failed due to all RC full +#85,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY,L2 Slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C0 +L2 Slice A RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#86,v,g,n,s,PM_L2SA_SHR_MOD,L2 slice A transition from shared to modified +##700C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#87,v,g,n,n,PM_L2SA_ST_REQ,L2 slice A store requests +##723E0 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#88,v,g,n,s,PM_L2SB_MOD_TAG,L2 slice B transition from modified to tagged +##720E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#89,v,g,n,s,PM_L2SB_RCLD_DISP,L2 Slice B RC load dispatch attempt +##701C1 +L2 Slice B RC load dispatch attempt +#90,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_RC_FULL,L2 Slice B RC load dispatch attempt failed due to all RC full +##721E1 +L2 Slice B RC load dispatch attempt failed due to all RC full +#91,v,g,n,s,PM_L2SB_RCST_DISP,L2 Slice B RC store dispatch attempt +##702C1 +L2 Slice B RC store dispatch attempt +#92,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_RC_FULL,L2 Slice B RC store dispatch attempt failed due to all RC full +##722E1 +L2 Slice B RC store dispatch attempt failed due to all RC full +#93,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY,L2 Slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C1 +L2 Slice B RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#94,v,g,n,s,PM_L2SB_SHR_MOD,L2 slice B transition from shared to modified +##700C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#95,v,g,n,n,PM_L2SB_ST_REQ,L2 slice B store requests +##723E1 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#96,v,g,n,s,PM_L2SC_MOD_TAG,L2 slice C transition from modified to tagged +##720E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Tagged state. This transition was caused by a read snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#97,v,g,n,s,PM_L2SC_RCLD_DISP,L2 Slice C RC load dispatch attempt +##701C2 +L2 Slice C RC load dispatch attempt +#98,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_RC_FULL,L2 Slice C RC load dispatch attempt failed due to all RC full +##721E2 +L2 Slice C RC load dispatch attempt failed due to all RC full +#99,v,g,n,s,PM_L2SC_RCST_DISP,L2 Slice C RC store dispatch attempt +##702C2 +L2 Slice C RC store dispatch attempt +#100,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_RC_FULL,L2 Slice C RC store dispatch attempt failed due to all RC full +##722E2 +L2 Slice C RC store dispatch attempt failed due to all RC full +#101,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY,L2 Slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +##703C2 +L2 Slice C RC dispatch attempt failed due to RC/CO pair chosen was miss and CO already busy +#102,v,g,n,s,PM_L2SC_SHR_MOD,L2 slice C transition from shared to modified +##700C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L , or Tagged) to the Modified state. This transition was caused by a store from either of the two local CPUs to a cache line in any of the Shared states. The event is provided on each of the three slices A,B,and C. +#103,v,g,n,n,PM_L2SC_ST_REQ,L2 slice C store requests +##723E2 +A store request as seen at the L2 directory has been made from the core. Stores are counted after gathering in the L2 store queues. The event is provided on each of the three slices A,B,and C. +#104,v,g,n,s,PM_L3SA_ALL_BUSY,L3 slice A active for every cycle all CI/CO machines busy +##721E3 +L3 slice A active for every cycle all CI/CO machines busy +#105,v,g,n,s,PM_L3SA_MOD_TAG,L3 slice A transition from modified to TAG +##720E3 +L3 slice A transition from modified to TAG +#106,v,g,n,s,PM_L3SA_REF,L3 slice A references +##701C3 +L3 slice A references +#107,v,g,n,s,PM_L3SB_ALL_BUSY,L3 slice B active for every cycle all CI/CO machines busy +##721E4 +L3 slice B active for every cycle all CI/CO machines busy +#108,v,g,n,s,PM_L3SB_MOD_TAG,L3 slice B transition from modified to TAG +##720E4 +L3 slice B transition from modified to TAG +#109,v,g,n,s,PM_L3SB_REF,L3 slice B references +##701C4 +L3 slice B references +#110,v,g,n,s,PM_L3SC_ALL_BUSY,L3 slice C active for every cycle all CI/CO machines busy +##721E5 +L3 slice C active for every cycle all CI/CO machines busy +#111,v,g,n,s,PM_L3SC_MOD_TAG,L3 slice C transition from modified to TAG +##720E5 +L3 slice C transition from modified to TAG +#112,v,g,n,s,PM_L3SC_REF,L3 slice C references +##701C5 +L3 slice C references +#113,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##820E7 +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#114,u,g,n,s,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##100C6 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#115,v,g,n,n,PM_LSU0_BUSY_REJECT,LSU0 busy due to reject +##C20E3 +LSU unit 0 busy due to reject +#116,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##800C2 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#117,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C00C2 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#118,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C00C3 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#119,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C00C0 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#120,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C00C1 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#121,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C60E3 +LSU0 reject due to ERAT miss +#122,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C60E1 +LSU0 reject due to LMQ full or missed data coming +#123,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C60E2 +LSU0 reject due to reload CDF or tag update collision +#124,v,g,n,n,PM_LSU0_REJECT_SRQ_LHS,LSU0 SRQ rejects +##C60E0 +LSU0 reject due to load hit store +#125,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C20E0 +Data from a store instruction was forwarded to a load on unit 0 +#126,v,g,n,n,PM_LSU1_BUSY_REJECT,LSU1 busy due to reject +##C20E7 +LSU unit 1 is busy due to reject +#127,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##800C6 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#128,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C00C6 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#129,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C00C7 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#130,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C00C4 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#131,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C00C5 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#132,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C60E7 +LSU1 reject due to ERAT miss +#133,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C60E5 +LSU1 reject due to LMQ full or missed data coming +#134,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C60E6 +LSU1 reject due to reload CDF or tag update collision +#135,v,g,n,n,PM_LSU1_REJECT_SRQ_LHS,LSU1 SRQ rejects +##C60E4 +LSU1 reject due to load hit store +#136,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C20E4 +Data from a store instruction was forwarded to a load on unit 1 +#137,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses +##80090 +Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#138,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes +##C0090 +A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#139,v,g,n,s,PM_LSU_FLUSH_LRQ_FULL,Flush caused by LRQ full +##320E7 +Flush caused by LRQ full +#140,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes +##C0088 +A store was flushed because it was unaligned +#141,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00015 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#142,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C20E6 +LRQ slot zero was allocated +#143,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C20E2 +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#144,v,g,n,n,PM_LSU_REJECT_LMQ_FULL,LSU reject due to LMQ full or missed data coming +##C6088 +LSU reject due to LMQ full or missed data coming +#145,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision +##C6090 +LSU reject due to reload CDF or tag update collision +#146,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C20E5 +SRQ Slot zero was allocated +#147,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C20E1 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#148,v,g,n,s,PM_MEM_FAST_PATH_RD_CMPL,Fast path memory read completed +##722E6 +Fast path memory read completed +#149,v,g,n,s,PM_MEM_HI_PRIO_PW_CMPL,High priority partial-write completed +##727E6 +High priority partial-write completed +#150,v,g,n,s,PM_MEM_HI_PRIO_WR_CMPL,High priority write completed +##726E6 +High priority write completed +#151,v,g,n,s,PM_MEM_PWQ_DISP,Memory partial-write queue dispatched +##704C6 +Memory partial-write queue dispatched +#152,v,g,n,s,PM_MEM_PWQ_DISP_BUSY2or3,Memory partial-write queue dispatched with 2-3 queues busy +##724E6 +Memory partial-write queue dispatched with 2-3 queues busy +#153,v,g,n,s,PM_MEM_READ_CMPL,Memory read completed or canceled +##702C6 +Memory read completed or canceled +#154,v,g,n,s,PM_MEM_RQ_DISP,Memory read queue dispatched +##701C6 +Memory read queue dispatched +#155,v,g,n,s,PM_MEM_RQ_DISP_BUSY8to15,Memory read queue dispatched with 8-15 queues busy +##721E6 +Memory read queue dispatched with 8-15 queues busy +#156,v,g,n,s,PM_MEM_WQ_DISP_BUSY1to7,Memory write queue dispatched with 1-7 queues busy +##723E6 +Memory write queue dispatched with 1-7 queues busy +#157,v,g,n,s,PM_MEM_WQ_DISP_WRITE,Memory write queue dispatched due to write +##703C6 +Memory write queue dispatched due to write +#158,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished +##00005 +The branch unit finished a marked instruction. Instructions that finish may not necessary complete +#159,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##C7097 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load +#160,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR_CYC,Marked load latency from L2.5 shared +##C70A2 +Marked load latency from L2.5 shared +#161,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR_CYC,Marked load latency from L2.75 shared +##C70A3 +Marked load latency from L2.75 shared +#162,v,g,n,n,PM_MRK_DATA_FROM_L2_CYC,Marked load latency from L2 +##C70A0 +Marked load latency from L2 +#163,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified +##C709E +Marked data loaded from L3.5 modified +#164,v,g,n,n,PM_MRK_DATA_FROM_L35_SHR_CYC,Marked load latency from L3.5 shared +##C70A6 +Marked load latency from L3.5 shared +#165,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR_CYC,Marked load latency from L3.75 shared +##C70A7 +Marked load latency from L3.75 shared +#166,v,g,n,n,PM_MRK_DATA_FROM_L3_CYC,Marked load latency from L3 +##C70A4 +Marked load latency from L3 +#167,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory +##C7087 +Marked data loaded from local memory +#168,v,g,n,n,PM_MRK_DTLB_MISS_16M,Marked Data TLB misses for 16M page +##C40C5 +Marked Data TLB misses for 16M page +#169,v,g,n,n,PM_MRK_DTLB_MISS_4K,Marked Data TLB misses for 4K page +##C40C1 +Marked Data TLB misses for 4K page +#170,v,g,n,n,PM_MRK_DTLB_REF_16M,Marked Data TLB reference for 16M page +##C40C7 +Marked Data TLB reference for 16M page +#171,v,g,n,n,PM_MRK_DTLB_REF_4K,Marked Data TLB reference for 4K page +##C40C3 +Marked Data TLB reference for 4K page +#172,v,g,n,n,PM_MRK_GRP_BR_REDIR,Group experienced marked branch redirect +##12091 +Group experienced marked branch redirect +#173,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##820E2 +A DL1 reload occured due to marked load +#174,v,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of PPC instructions completed. +#175,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##820E0 +A marked load, executing on unit 0, missed the dcache +#176,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##820E4 +A marked load, executing on unit 1, missed the dcache +#177,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##820E6 +A marked stcx (stwcx or stdcx) failed +#178,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS +##00003 +A sampled store has been sent to the memory subsystem +#179,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##820E3 +A marked store missed the dcache +#180,v,g,n,n,PM_PMC1_OVERFLOW,PMC1 Overflow +##0000A +PMC1 Overflow +#181,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified +##83097 +PTEG loaded from L2.5 modified +#182,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified +##8309E +PTEG loaded from L3.5 modified +#183,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory +##83087 +PTEG loaded from local memory +#184,v,g,n,n,PM_SLB_MISS,SLB misses +##80088 +SLB misses +#185,v,g,n,s,PM_SNOOP_DCLAIM_RETRY_QFULL,Snoop dclaim/flush retry due to write/dclaim queues full +##720E6 +Snoop dclaim/flush retry due to write/dclaim queues full +#186,v,g,n,s,PM_SNOOP_PW_RETRY_RQ,Snoop partial-write retry due to collision with active read queue +##707C6 +Snoop partial-write retry due to collision with active read queue +#187,v,g,n,s,PM_SNOOP_RD_RETRY_QFULL,Snoop read retry due to read queue full +##700C6 +Snoop read retry due to read queue full +#188,v,g,n,s,PM_SNOOP_RD_RETRY_RQ,Snoop read retry due to collision with active read queue +##705C6 +Snoop read retry due to collision with active read queue +#189,v,g,n,s,PM_SNOOP_RETRY_1AHEAD,Snoop retry due to one ahead collision +##725E6 +Snoop retry due to one ahead collision +#190,u,g,n,s,PM_SNOOP_TLBIE,Snoop TLBIE +##800C3 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#191,v,g,n,s,PM_SNOOP_WR_RETRY_RQ,Snoop write/dclaim retry due to collision with active read queue +##706C6 +Snoop write/dclaim retry due to collision with active read queue +#192,v,g,n,n,PM_STCX_FAIL,STCX failed +##820E1 +A stcx (stwcx or stdcx) failed +#193,v,g,n,n,PM_STCX_PASS,Stcx passes +##820E5 +A stcx (stwcx or stdcx) instruction was successful +#194,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +Suspended +#195,v,g,n,s,PM_GCT_EMPTY_CYC,Cycles GCT empty +##00004 +The Global Completion Table is completely empty +#196,v,g,n,n,PM_THRD_GRP_CMPL_BOTH_CYC,Cycles group completed by both threads +##00013 +Cycles group completed by both threads +#197,v,g,n,n,PM_THRD_PRIO_1_CYC,Cycles thread running at priority level 1 +##420E0 +Cycles thread running at priority level 1 +#198,v,g,n,n,PM_THRD_PRIO_2_CYC,Cycles thread running at priority level 2 +##420E1 +Cycles thread running at priority level 2 +#199,v,g,n,n,PM_THRD_PRIO_3_CYC,Cycles thread running at priority level 3 +##420E2 +Cycles thread running at priority level 3 +#200,v,g,n,n,PM_THRD_PRIO_4_CYC,Cycles thread running at priority level 4 +##420E3 +Cycles thread running at priority level 4 +#201,v,g,n,n,PM_THRD_PRIO_5_CYC,Cycles thread running at priority level 5 +##420E4 +Cycles thread running at priority level 5 +#202,v,g,n,n,PM_THRD_PRIO_6_CYC,Cycles thread running at priority level 6 +##420E5 +Cycles thread running at priority level 6 +#203,v,g,n,n,PM_THRD_PRIO_7_CYC,Cycles thread running at priority level 7 +##420E6 +Cycles thread running at priority level 7 +#204,v,g,n,s,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##100C2 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. + +$$$$$$$$ + +{ counter 3 } +#0,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#3,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##23087,230E2 +A conditional branch was predicted, CR prediction +#4,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##230E3 +A conditional branch was predicted, target prediction +#5,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#6,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#7,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##C30A2 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load +#8,v,g,n,n,PM_DATA_FROM_L275_SHR,Data loaded from L2.75 shared +##C3097 +DL1 was reloaded with shared (T) data from the L2 of another MCM due to a demand load +#9,v,g,n,n,PM_DATA_FROM_L35_MOD,Data loaded from L3.5 modified +##C30A6 +Data loaded from L3.5 modified +#10,v,g,n,n,PM_DATA_FROM_L375_SHR,Data loaded from L3.75 shared +##C309E +Data loaded from L3.75 shared +#11,v,g,n,n,PM_DATA_FROM_LMEM,Data loaded from local memory +##C30A0 +Data loaded from local memory +#12,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#13,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +DST (Data Stream Touch) stream start +#14,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated +#15,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +The number of Cycles MSR(EE) bit was off. +#16,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles MSR(EE) bit off and external interrupt pending +#17,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Fabric command retried +#18,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +dclaim retried +#19,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +M1 to VN/NN sidecar empty +#20,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +P1 to VN/NN sidecar empty +#21,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +PN to NN beat went to sidecar first +#22,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +PN to VN beat went to sidecar first +#23,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Vertical bypass buffer empty +#24,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +Flush caused by branch mispredict +#25,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +Flush caused by thread GCT imbalance +#26,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes +#27,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +Flush caused by scoreboard operation +#28,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +Flush caused by sync +#29,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#30,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +fp0 finished, produced a result This only indicates finish, not completion. +#31,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#32,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#33,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#34,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#35,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +fp1 finished, produced a result. This only indicates finish, not completion. +#36,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##010C4 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#37,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#38,v,g,n,n,PM_FPU_FMOV_FEST,FPU executing FMOV or FEST instructions +##01088 +This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1 +#39,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions +##01090 +This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#40,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#41,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#42,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle +##00012 +FXU0 is busy while FXU1 was idle +#43,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result +#44,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result +#45,v,g,n,n,PM_FXU_FIN,FXU produced a result +##13088 +The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. +#46,v,g,n,n,PM_GCT_NOSLOT_SRQ_FULL,No slot in GCT caused by SRQ full +##10084 +This thread has no slot in the GCT because the SRQ is full +#47,v,g,n,s,PM_GCT_USAGE_80to99_CYC,Cycles GCT 80-99% full +##0001F +Cycles GCT 80-99% full +#48,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#49,v,g,n,n,PM_GRP_CMPL,Group completed +##00013 +A group completed. Microcoded instructions that span multiple groups will generate this event once per group. +#50,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#51,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success +##00002 +Number of groups sucessfully dispatched (not rejected) +#52,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +L2 I cache demand request due to BHT redirect +#53,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +L2 I cache demand request due to branch redirect +#54,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##210C7 +New line coming into the prefetch buffer +#55,v,g,n,n,PM_INST_CMPL,Instructions completed +##00001 +Number of Eligible Instructions that completed. +#56,v,g,n,n,PM_INST_DISP,Instructions dispatched +##00009 +The ISU sends the number of instructions dispatched. +#57,v,g,n,n,PM_INST_FROM_L275_SHR,Instruction fetched from L2.75 shared +##22096 +Instruction fetched from L2.75 shared +#58,v,g,n,n,PM_INST_FROM_L375_SHR,Instruction fetched from L3.75 shared +##2209D +Instruction fetched from L3.75 shared +#59,v,g,n,n,PM_INST_FROM_PREF,Instructions fetched from prefetch +##2208D +An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions +#60,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid +#61,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#62,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +This signal is asserted each cycle a cache write is active. +#63,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#64,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 Slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +L2 Slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#65,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 Slice A RC load dispatch attempt failed due to other reasons +##731E0 +L2 Slice A RC load dispatch attempt failed due to other reasons +#66,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 Slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +L2 Slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#67,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 Slice A RC store dispatch attempt failed due to other reasons +##732E0 +L2 Slice A RC store dispatch attempt failed due to other reasons +#68,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice A RC dispatch attempt failed due to all CO busy +##713C0 +L2 Slice A RC dispatch attempt failed due to all CO busy +#69,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#70,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#71,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#72,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 Slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +L2 Slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#73,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 Slice B RC load dispatch attempt failed due to other reasons +##731E1 +L2 Slice B RC load dispatch attempt failed due to other reasons +#74,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 Slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +L2 Slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#75,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 Slice B RC store dispatch attempt failed due to other reasons +##732E1 +L2 Slice B RC store dispatch attempt failed due to other reasons +#76,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice B RC dispatch attempt failed due to all CO busy +##713C1 +L2 Slice B RC dispatch attempt failed due to all CO busy +#77,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#78,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#79,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#80,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 Slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +L2 Slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#81,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 Slice C RC load dispatch attempt failed due to other reasons +##731E2 +L2 Slice C RC load dispatch attempt failed due to other reasons +#82,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 Slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +L2 Slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#83,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 Slice C RC store dispatch attempt failed due to other reasons +##732E2 +L2 Slice C RC store dispatch attempt failed due to other reasons +#84,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice C RC dispatch attempt failed due to all CO busy +##713C2 +L2 Slice C RC dispatch attempt failed due to all CO busy +#85,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#86,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +L2 slice C store hits +#87,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#88,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +L3 slice A hits +#89,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 slice A transition from modified to invalid +#90,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 slice A transition from shared to invalid +#91,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +L3 slice A snoop retries +#92,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +L3 slice B hits +#93,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 slice B transition from modified to invalid +#94,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 slice B transition from shared to invalid +#95,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +L3 slice B snoop retries +#96,v,g,n,s,PM_L3SC_HIT,L3 Slice C hits +##711C5 +L3 Slice C hits +#97,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 slice C transition from modified to invalid +#98,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 slice C transition from shared to invalid +#99,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +L3 slice C snoop retries +#100,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses +##C1088 +Total DL1 Load references that miss the DL1 +#101,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +A load, executing on unit 0, missed the dcache +#102,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C6 +A load, executing on unit 1, missed the dcache +#103,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +A load executed on unit 0 +#104,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C10C4 +A load executed on unit 1 +#105,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed from LSU unit 0 +#106,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +LSU0 non-cacheable loads +#107,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed from LSU unit 1 +#108,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +LSU1 non-cacheable loads +#109,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +Flush initiated by LSU +#110,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +Flush caused by SRQ full +#111,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The LMQ was full +#112,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#113,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#114,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#115,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00015 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#116,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +The ISU sends this signal when the LRQ is full. +#117,u,g,n,n,PM_DC_PREF_STREAM_ALLOC_BLK,D cache out of prefech streams +##C50C2 +D cache out of prefech streams +#118,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +The ISU sends this signal when the srq is full. +#119,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +This signal is asserted every cycle when a sync is in the SRQ. +#120,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +LWSYNC held at dispatch +#121,v,g,n,s,PM_MEM_LO_PRIO_PW_CMPL,Low priority partial-write completed +##737E6 +Low priority partial-write completed +#122,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +Low priority write completed +#123,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##734E6 +Memory partial-write completed +#124,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Memory partial-write gathered +#125,v,g,n,s,PM_MEM_RQ_DISP_BUSY1to7,Memory read queue dispatched with 1-7 queues busy +##711C6 +Memory read queue dispatched with 1-7 queues busy +#126,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read canceled +##712C6 +Speculative memory read canceled +#127,v,g,n,s,PM_MEM_WQ_DISP_BUSY8to15,Memory write queue dispatched with 8-15 queues busy +##733E6 +Memory write queue dispatched with 8-15 queues busy +#128,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +Memory write queue dispatched due to dclaim/flush +#129,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##C70A2 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load +#130,v,g,n,n,PM_MRK_DATA_FROM_L275_SHR,Marked data loaded from L2.75 shared +##C7097 +DL1 was reloaded with shared (T) data from the L2 of another MCM due to a marked demand load +#131,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD,Marked data loaded from L3.5 modified +##C70A6 +Marked data loaded from L3.5 modified +#132,v,g,n,n,PM_MRK_DATA_FROM_L375_SHR,Marked data loaded from L3.75 shared +##C709E +Marked data loaded from L3.75 shared +#133,v,g,n,n,PM_MRK_DATA_FROM_LMEM,Marked data loaded from local memory +##C70A0 +Marked data loaded from local memory +#134,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +Marked Data SLB misses +#135,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6 +Marked Data TLB misses +#136,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished +##00014 +One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete +#137,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished +##00005 +One of the execution units finished a marked instruction. Instructions that finish may not necessary complete +#138,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#139,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#140,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#141,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C1 +A marked store was flushed from unit 0 because it was unaligned +#142,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C0 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#143,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#144,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#145,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#146,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#147,v,g,n,n,PM_MRK_LSU_FLUSH_LRQ,Marked LRQ flushes +##81088 +A marked load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#148,v,g,n,n,PM_MRK_LSU_FLUSH_UST,Marked unaligned store flushes +##81090 +A marked store was flushed because it was unaligned +#149,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#150,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention +##00003 +A marked store previously sent to the memory subsystem completed (data home) after requiring intervention +#151,v,g,n,n,PM_PMC2_OVERFLOW,PMC2 Overflow +##0000A +PMC2 Overflow +#152,v,g,n,n,PM_PMC6_OVERFLOW,PMC6 Overflow +##0001A +PMC6 Overflow +#153,v,g,n,n,PM_PTEG_FROM_L25_MOD,PTEG loaded from L2.5 modified +##830A2 +PTEG loaded from L2.5 modified +#154,v,g,n,n,PM_PTEG_FROM_L275_SHR,PTEG loaded from L2.75 shared +##83097 +PTEG loaded from L2.75 shared +#155,v,g,n,n,PM_PTEG_FROM_L35_MOD,PTEG loaded from L3.5 modified +##830A6 +PTEG loaded from L3.5 modified +#156,v,g,n,n,PM_PTEG_FROM_L375_SHR,PTEG loaded from L3.75 shared +##8309E +PTEG loaded from L3.75 shared +#157,v,g,n,n,PM_PTEG_FROM_LMEM,PTEG loaded from local memory +##830A0 +PTEG loaded from local memory +#158,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +Snoop partial write retry due to partial-write queues full +#159,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +Snoop partial-write retry due to collision with active write or partial-write queue +#160,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +Snoop read retry due to collision with active write queue +#161,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +Snoop read retry due to read queue full +#162,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +Snoop write/dclaim retry due to collision with active write queue +#163,v,g,n,n,PM_STOP_COMPLETION,Completion stopped +##00018 +RAS Unit has signaled completion to stop +#164,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache +#165,v,g,n,n,PM_ST_REF_L1,L1 D cache store references +##C1090 +Total DL1 Store references +#166,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +A store executed on unit 0 +#167,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C5 +A store executed on unit 1 +#168,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +Suspended +#169,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles CLB completely empty +#170,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##410C7 +Cycles both threads in L2 misses +#171,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles no thread priority difference +#172,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles thread priority difference is 1 or 2 +#173,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles thread priority difference is 3 or 4 +#174,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles thread priority difference is 5 or 6 +#175,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles thread priority difference is -1 or -2 +#176,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles thread priority difference is -3 or -4 +#177,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles thread priority difference is -5 or -6 +#178,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overides caused by CLB empty +##410C2 +Thread selection overides caused by CLB empty +#179,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overides caused by GCT imbalance +##410C4 +Thread selection overides caused by GCT imbalance +#180,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overides caused by ISU holds +##410C5 +Thread selection overides caused by ISU holds +#181,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overides caused by L2 misses +##410C3 +Thread selection overides caused by L2 misses +#182,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Decode selected thread 0 +#183,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Decode selected thread 1 +#184,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +SMT hang detected +#185,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout +##0000B +The threshold timer expired +#186,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +TLBIE held at dispatch +#187,v,g,n,n,PM_DATA_FROM_L2MISS,Data loaded missed L2 +##C309B +DL1 was reloaded from beyond L2. +#188,v,g,n,n,PM_MRK_DATA_FROM_L2MISS,Marked data loaded missed L2 +##C709B +DL1 was reloaded from beyond L2 due to a marked demand load. +#189,v,g,n,n,PM_PTEG_FROM_L2MISS,PTEG loaded from L2 miss +##8309B +PTEG loaded from L2 miss + +$$$$$$$$ + +{ counter 4 } +#0,v,g,n,n,PM_0INST_FETCH,No instructions fetched +##2208D +No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) +#1,v,g,n,n,PM_BR_ISSUED,Branches issued +##230E4 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#2,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##230E5 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#3,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##230E6 +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#4,v,g,n,n,PM_BR_PRED_CR,A conditional branch was predicted, CR prediction +##230E2 +A conditional branch was predicted, CR prediction +#5,v,g,n,n,PM_BR_PRED_CR_TA,A conditional branch was predicted, CR and target prediction +##23087 +A conditional branch was predicted, CR and target prediction +#6,v,g,n,n,PM_BR_PRED_TA,A conditional branch was predicted, target prediction +##230E3 +A conditional branch was predicted, target prediction +#7,v,g,n,n,PM_CMPLU_STALL_DIV,Completion stall caused by DIV instruction +##11099 +Completion stall caused by DIV instruction +#8,v,g,n,n,PM_CMPLU_STALL_ERAT_MISS,Completion stall caused by ERAT miss +##1109B +Completion stall caused by ERAT miss +#9,v,g,n,n,PM_CMPLU_STALL_FPU,Completion stall caused by FPU instruction +##11098 +Completion stall caused by FPU instruction +#10,v,g,n,n,PM_CMPLU_STALL_REJECT,Completion stall caused by reject +##1109A +Completion stall caused by reject +#11,u,g,n,s,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##110C1 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#12,v,g,n,s,PM_CYC,Processor cycles +##0000F +Processor cycles +#13,v,g,n,n,PM_DATA_FROM_L275_MOD,Data loaded from L2.75 modified +##C3097 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a demand load. +#14,v,g,n,n,PM_DATA_FROM_L375_MOD,Data loaded from L3.75 modified +##C309E +Data loaded from L3.75 modified +#15,v,g,n,n,PM_DATA_FROM_RMEM,Data loaded from remote memory +##C3087 +Data loaded from remote memory +#16,u,g,n,s,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C10C7 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#17,v,g,n,n,PM_DC_PREF_DST,DST (Data Stream Touch) stream start +##830E6 +DST (Data Stream Touch) stream start +#18,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##830E7 +A new Prefetch Stream was allocated +#19,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##130E3 +The number of Cycles MSR(EE) bit was off. +#20,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##130E7 +Cycles MSR(EE) bit off and external interrupt pending +#21,v,g,n,n,PM_EXT_INT,External interrupts +##00003 +An external interrupt occurred +#22,v,g,n,n,PM_FAB_CMD_RETRIED,Fabric command retried +##710C7 +Fabric command retried +#23,v,g,n,s,PM_FAB_DCLAIM_RETRIED,dclaim retried +##730E7 +dclaim retried +#24,v,g,n,s,PM_FAB_M1toVNorNN_SIDECAR_EMPTY,M1 to VN/NN sidecar empty +##712C7 +M1 to VN/NN sidecar empty +#25,v,g,n,s,PM_FAB_P1toVNorNN_SIDECAR_EMPTY,P1 to VN/NN sidecar empty +##711C7 +P1 to VN/NN sidecar empty +#26,v,g,n,s,PM_FAB_PNtoNN_SIDECAR,PN to NN beat went to sidecar first +##713C7 +PN to NN beat went to sidecar first +#27,v,g,n,s,PM_FAB_PNtoVN_SIDECAR,PN to VN beat went to sidecar first +##733E7 +PN to VN beat went to sidecar first +#28,v,g,n,s,PM_FAB_VBYPASS_EMPTY,Vertical bypass buffer empty +##731E7 +Vertical bypass buffer empty +#29,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##110C6 +Flush caused by branch mispredict +#30,v,g,n,s,PM_FLUSH_IMBAL,Flush caused by thread GCT imbalance +##330E3 +Flush caused by thread GCT imbalance +#31,v,g,n,n,PM_FLUSH,Flushes +##110C7 +Flushes +#32,v,g,n,s,PM_FLUSH_SB,Flush caused by scoreboard operation +##330E2 +Flush caused by scoreboard operation +#33,v,g,n,s,PM_FLUSH_SYNC,Flush caused by sync +##330E1 +Flush caused by sync +#34,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##010C2 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#35,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##010C3 +fp0 finished, produced a result This only indicates finish, not completion. +#36,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##010C0 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#37,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##030E0 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#38,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##010C1 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#39,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##010C6 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#40,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##010C7 +fp1 finished, produced a result. This only indicates finish, not completion. +#41,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##010C4 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#42,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##010C5 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#43,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction +##01090 +This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. +#44,v,g,n,n,PM_FPU_FIN,FPU produced a result +##01088 +FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1 +#45,v,g,n,s,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##110C0 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#46,v,g,n,s,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##110C4 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#47,c,g,n,n,PM_FXLS_FULL_CYC,Cycles FXLS queue is full +##11090 +Cycles when one or both FXU/LSU issue queue are full +#48,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##130E2 +The Fixed Point unit 0 finished an instruction and produced a result +#49,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle +##00012 +FXU0 was idle while FXU1 was busy +#50,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##130E6 +The Fixed Point unit 1 finished an instruction and produced a result +#51,v,g,n,n,PM_GCT_NOSLOT_BR_MPRED,No slot in GCT caused by branch mispredict +##1009C +This thread has no slot in the GCT because of branch mispredict +#52,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##0001F +The ISU sends a signal indicating the gct is full. +#53,v,g,n,s,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##130E5 +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#54,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##130E1 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#55,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##00002 +A group that previously attempted dispatch was rejected. +#56,v,g,n,n,PM_IC_DEMAND_L2_BHT_REDIRECT,L2 I cache demand request due to BHT redirect +##230E0 +L2 I cache demand request due to BHT redirect +#57,v,g,n,n,PM_IC_DEMAND_L2_BR_REDIRECT,L2 I cache demand request due to branch redirect +##230E1 +L2 I cache demand request due to branch redirect +#58,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##210C7 +New line coming into the prefetch buffer +#59,v,g,n,n,PM_INST_CMPL,Instructions completed +##00001 +Number of Eligible Instructions that completed. +#60,v,g,n,n,PM_INST_DISP,Instructions dispatched +##00009 +The ISU sends the number of instructions dispatched. +#61,v,g,n,n,PM_INST_FROM_L275_MOD,Instruction fetched from L2.75 modified +##22096 +Instruction fetched from L2.75 modified +#62,v,g,n,n,PM_INST_FROM_L375_MOD,Instruction fetched from L3.75 modified +##2209D +Instruction fetched from L3.75 modified +#63,v,g,n,n,PM_INST_FROM_RMEM,Instruction fetched from remote memory +##22086 +Instruction fetched from remote memory +#64,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C30E4 +The data source information is valid +#65,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##C70E7 +A request to prefetch data into the L1 was made +#66,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##230E7 +This signal is asserted each cycle a cache write is active. +#67,v,g,n,s,PM_L2SA_MOD_INV,L2 slice A transition from modified to invalid +##730E0 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#68,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_ADDR,L2 Slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C0 +L2 Slice A RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#69,v,g,n,s,PM_L2SA_RCLD_DISP_FAIL_OTHER,L2 Slice A RC load dispatch attempt failed due to other reasons +##731E0 +L2 Slice A RC load dispatch attempt failed due to other reasons +#70,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_ADDR,L2 Slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C0 +L2 Slice A RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#71,v,g,n,s,PM_L2SA_RCST_DISP_FAIL_OTHER,L2 Slice A RC store dispatch attempt failed due to other reasons +##732E0 +L2 Slice A RC store dispatch attempt failed due to other reasons +#72,v,g,n,s,PM_L2SA_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice A RC dispatch attempt failed due to all CO busy +##713C0 +L2 Slice A RC dispatch attempt failed due to all CO busy +#73,v,g,n,s,PM_L2SA_SHR_INV,L2 slice A transition from shared to invalid +##710C0 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#74,v,g,n,n,PM_L2SA_ST_HIT,L2 slice A store hits +##733E0 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#75,v,g,n,s,PM_L2SB_MOD_INV,L2 slice B transition from modified to invalid +##730E1 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#76,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_ADDR,L2 Slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C1 +L2 Slice B RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#77,v,g,n,s,PM_L2SB_RCLD_DISP_FAIL_OTHER,L2 Slice B RC load dispatch attempt failed due to other reasons +##731E1 +L2 Slice B RC load dispatch attempt failed due to other reasons +#78,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_ADDR,L2 Slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C1 +L2 Slice B RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#79,v,g,n,s,PM_L2SB_RCST_DISP_FAIL_OTHER,L2 Slice B RC store dispatch attempt failed due to other reasons +##732E1 +L2 Slice B RC store dispatch attempt failed due to other reasons +#80,v,g,n,s,PM_L2SB_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice B RC dispatch attempt failed due to all CO busy +##713C1 +L2 Slice B RC dispatch attempt failed due to all CO busy +#81,v,g,n,s,PM_L2SB_SHR_INV,L2 slice B transition from shared to invalid +##710C1 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#82,v,g,n,n,PM_L2SB_ST_HIT,L2 slice B store hits +##733E1 +A store request made from the core hit in the L2 directory. This event is provided on each of the three L2 slices A,B, and C. +#83,v,g,n,s,PM_L2SC_MOD_INV,L2 slice C transition from modified to invalid +##730E2 +A cache line in the local L2 directory made a state transition from the Modified state to the Invalid state. This transition was caused by any RWITM snoop request that hit against a modified entry in the local L2. The event is provided on each of the three slices A,B,and C. +#84,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_ADDR,L2 Slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +##711C2 +L2 Slice C RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ +#85,v,g,n,s,PM_L2SC_RCLD_DISP_FAIL_OTHER,L2 Slice C RC load dispatch attempt failed due to other reasons +##731E2 +L2 Slice C RC load dispatch attempt failed due to other reasons +#86,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_ADDR,L2 Slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +##712C2 +L2 Slice C RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ +#87,v,g,n,s,PM_L2SC_RCST_DISP_FAIL_OTHER,L2 Slice C RC store dispatch attempt failed due to other reasons +##732E2 +L2 Slice C RC store dispatch attempt failed due to other reasons +#88,v,g,n,s,PM_L2SC_RC_DISP_FAIL_CO_BUSY_ALL,L2 Slice C RC dispatch attempt failed due to all CO busy +##713C2 +L2 Slice C RC dispatch attempt failed due to all CO busy +#89,v,g,n,s,PM_L2SC_SHR_INV,L2 slice C transition from shared to invalid +##710C2 +A cache line in the local L2 directory made a state transition from Shared (Shared, Shared L, or Tagged) to the Invalid state. This transition was caused by any external snoop request. The event is provided on each of the three slices A,B,and C. NOTE: For this event to be useful the tablewalk duration event should also be counted. +#90,v,g,n,n,PM_L2SC_ST_HIT,L2 slice C store hits +##733E2 +L2 slice C store hits +#91,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##C50C3 +A request to prefetch data into L2 was made +#92,v,g,n,s,PM_L3SA_HIT,L3 slice A hits +##711C3 +L3 slice A hits +#93,v,g,n,s,PM_L3SA_MOD_INV,L3 slice A transition from modified to invalid +##730E3 +L3 slice A transition from modified to invalid +#94,v,g,n,s,PM_L3SA_SHR_INV,L3 slice A transition from shared to invalid +##710C3 +L3 slice A transition from shared to invalid +#95,v,g,n,s,PM_L3SA_SNOOP_RETRY,L3 slice A snoop retries +##731E3 +L3 slice A snoop retries +#96,v,g,n,s,PM_L3SB_HIT,L3 slice B hits +##711C4 +L3 slice B hits +#97,v,g,n,s,PM_L3SB_MOD_INV,L3 slice B transition from modified to invalid +##730E4 +L3 slice B transition from modified to invalid +#98,v,g,n,s,PM_L3SB_SHR_INV,L3 slice B transition from shared to invalid +##710C4 +L3 slice B transition from shared to invalid +#99,v,g,n,s,PM_L3SB_SNOOP_RETRY,L3 slice B snoop retries +##731E4 +L3 slice B snoop retries +#100,v,g,n,s,PM_L3SC_HIT,L3 Slice C hits +##711C5 +L3 Slice C hits +#101,v,g,n,s,PM_L3SC_MOD_INV,L3 slice C transition from modified to invalid +##730E5 +L3 slice C transition from modified to invalid +#102,v,g,n,s,PM_L3SC_SHR_INV,L3 slice C transition from shared to invalid +##710C5 +L3 slice C transition from shared to invalid +#103,v,g,n,s,PM_L3SC_SNOOP_RETRY,L3 slice C snoop retries +##731E5 +L3 slice C snoop retries +#104,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C10C2 +A load, executing on unit 0, missed the dcache +#105,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C10C6 +A load, executing on unit 1, missed the dcache +#106,v,g,n,n,PM_LD_REF_L1,L1 D cache load references +##C1090 +Total DL1 Load references +#107,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C10C0 +A load executed on unit 0 +#108,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C10C4 +A load executed on unit 1 +#109,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##C50C0 +A floating point load was executed from LSU unit 0 +#110,v,g,n,n,PM_LSU0_NCLD,LSU0 non-cacheable loads +##C50C1 +LSU0 non-cacheable loads +#111,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##C50C4 +A floating point load was executed from LSU unit 1 +#112,v,g,n,n,PM_LSU1_NCLD,LSU1 non-cacheable loads +##C50C5 +LSU1 non-cacheable loads +#113,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##110C5 +Flush initiated by LSU +#114,v,g,n,s,PM_LSU_FLUSH_SRQ_FULL,Flush caused by SRQ full +##330E0 +Flush caused by SRQ full +#115,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction +##C5090 +LSU executed Floating Point load instruction +#116,u,g,n,s,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C30E7 +The LMQ was full +#117,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C70E5 +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#118,v,g,n,s,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C30E6 +The first entry in the LMQ was allocated. +#119,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C30E5 +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#120,v,g,n,s,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##110C2 +The ISU sends this signal when the LRQ is full. +#121,u,g,n,n,PM_DC_PREF_STREAM_ALLOC_BLK,D cache out of prefech streams +##C50C2 +D cache out of prefech streams +#122,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty +##00015 +The Store Request Queue is empty +#123,v,g,n,s,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##110C3 +The ISU sends this signal when the srq is full. +#124,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##830E5 +This signal is asserted every cycle when a sync is in the SRQ. +#125,v,g,n,n,PM_LWSYNC_HELD,LWSYNC held at dispatch +##130E0 +LWSYNC held at dispatch +#126,v,g,n,s,PM_MEM_LO_PRIO_PW_CMPL,Low priority partial-write completed +##737E6 +Low priority partial-write completed +#127,v,g,n,s,PM_MEM_LO_PRIO_WR_CMPL,Low priority write completed +##736E6 +Low priority write completed +#128,v,g,n,s,PM_MEM_PW_CMPL,Memory partial-write completed +##734E6 +Memory partial-write completed +#129,v,g,n,s,PM_MEM_PW_GATH,Memory partial-write gathered +##714C6 +Memory partial-write gathered +#130,v,g,n,s,PM_MEM_RQ_DISP_BUSY1to7,Memory read queue dispatched with 1-7 queues busy +##711C6 +Memory read queue dispatched with 1-7 queues busy +#131,v,g,n,s,PM_MEM_SPEC_RD_CANCEL,Speculative memory read canceled +##712C6 +Speculative memory read canceled +#132,v,g,n,s,PM_MEM_WQ_DISP_BUSY8to15,Memory write queue dispatched with 8-15 queues busy +##733E6 +Memory write queue dispatched with 8-15 queues busy +#133,v,g,n,s,PM_MEM_WQ_DISP_DCLAIM,Memory write queue dispatched due to dclaim/flush +##713C6 +Memory write queue dispatched due to dclaim/flush +#134,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished +##00005 +The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete +#135,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD_CYC,Marked load latency from L2.5 modified +##C70A2 +Marked load latency from L2.5 modified +#136,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD,Marked data loaded from L2.75 modified +##C7097 +DL1 was reloaded with modified (M) data from the L2 of another MCM due to a marked demand load. +#137,v,g,n,n,PM_MRK_DATA_FROM_L275_MOD_CYC,Marked load latency from L2.75 modified +##C70A3 +Marked load latency from L2.75 modified +#138,v,g,n,n,PM_MRK_DATA_FROM_L35_MOD_CYC,Marked load latency from L3.5 modified +##C70A6 +Marked load latency from L3.5 modified +#139,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD,Marked data loaded from L3.75 modified +##C709E +Marked data loaded from L3.75 modified +#140,v,g,n,n,PM_MRK_DATA_FROM_L375_MOD_CYC,Marked load latency from L3.75 modified +##C70A7 +Marked load latency from L3.75 modified +#141,v,g,n,n,PM_MRK_DATA_FROM_LMEM_CYC,Marked load latency from local memory +##C70A0 +Marked load latency from local memory +#142,v,g,n,n,PM_MRK_DATA_FROM_RMEM,Marked data loaded from remote memory +##C7087 +Marked data loaded from remote memory +#143,v,g,n,n,PM_MRK_DATA_FROM_RMEM_CYC,Marked load latency from remote memory +##C70A1 +Marked load latency from remote memory +#144,v,g,n,n,PM_MRK_DSLB_MISS,Marked Data SLB misses +##C50C7 +Marked Data SLB misses +#145,v,g,n,n,PM_MRK_DTLB_MISS,Marked Data TLB misses +##C50C6 +Marked Data TLB misses +#146,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed +##00013 +A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. +#147,v,g,n,n,PM_MRK_GRP_IC_MISS,Group experienced marked I cache miss +##12091 +Group experienced marked I cache miss +#148,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout +##0000B +The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor +#149,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C70E4 +The source information is valid and is for a marked load +#150,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##810C2 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#151,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##810C3 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#152,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##810C1 +A marked store was flushed from unit 0 because it was unaligned +#153,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##810C0 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#154,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##810C6 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#155,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##810C7 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#156,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##810C4 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#157,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##810C5 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#158,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished +##00014 +One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete +#159,v,g,n,n,PM_MRK_LSU_FLUSH_SRQ,Marked SRQ flushes +##81088 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#160,v,g,n,n,PM_MRK_LSU_FLUSH_ULD,Marked unaligned load flushes +##81090 +A marked load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#161,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C70E6 +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#162,v,g,n,n,PM_PMC3_OVERFLOW,PMC3 Overflow +##0000A +PMC3 Overflow +#163,v,g,n,n,PM_PTEG_FROM_L275_MOD,PTEG loaded from L2.75 modified +##83097 +PTEG loaded from L2.75 modified +#164,v,g,n,n,PM_PTEG_FROM_L375_MOD,PTEG loaded from L3.75 modified +##8309E +PTEG loaded from L3.75 modified +#165,v,g,n,n,PM_PTEG_FROM_RMEM,PTEG loaded from remote memory +##83087 +PTEG loaded from remote memory +#166,v,g,n,s,PM_SNOOP_PARTIAL_RTRY_QFULL,Snoop partial write retry due to partial-write queues full +##730E6 +Snoop partial write retry due to partial-write queues full +#167,v,g,n,s,PM_SNOOP_PW_RETRY_WQ_PWQ,Snoop partial-write retry due to collision with active write or partial-write queue +##717C6 +Snoop partial-write retry due to collision with active write or partial-write queue +#168,v,g,n,s,PM_SNOOP_RD_RETRY_WQ,Snoop read retry due to collision with active write queue +##715C6 +Snoop read retry due to collision with active write queue +#169,v,g,n,s,PM_SNOOP_WR_RETRY_QFULL,Snoop read retry due to read queue full +##710C6 +Snoop read retry due to read queue full +#170,v,g,n,s,PM_SNOOP_WR_RETRY_WQ,Snoop write/dclaim retry due to collision with active write queue +##716C6 +Snoop write/dclaim retry due to collision with active write queue +#171,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C10C3 +A store missed the dcache +#172,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C10C1 +A store executed on unit 0 +#173,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C10C5 +A store executed on unit 1 +#174,v,g,n,n,PM_SUSPENDED,Suspended +##00000 +Suspended +#175,v,g,n,s,PM_CLB_EMPTY_CYC,Cycles CLB empty +##410C6 +Cycles CLB completely empty +#176,v,g,n,s,PM_THRD_L2MISS_BOTH_CYC,Cycles both threads in L2 misses +##41084,410C7 +Cycles both threads in L2 misses +#177,v,g,n,n,PM_THRD_PRIO_DIFF_0_CYC,Cycles no thread priority difference +##430E3 +Cycles no thread priority difference +#178,v,g,n,n,PM_THRD_PRIO_DIFF_1or2_CYC,Cycles thread priority difference is 1 or 2 +##430E4 +Cycles thread priority difference is 1 or 2 +#179,v,g,n,n,PM_THRD_PRIO_DIFF_3or4_CYC,Cycles thread priority difference is 3 or 4 +##430E5 +Cycles thread priority difference is 3 or 4 +#180,v,g,n,n,PM_THRD_PRIO_DIFF_5or6_CYC,Cycles thread priority difference is 5 or 6 +##430E6 +Cycles thread priority difference is 5 or 6 +#181,v,g,n,n,PM_THRD_PRIO_DIFF_minus1or2_CYC,Cycles thread priority difference is -1 or -2 +##430E2 +Cycles thread priority difference is -1 or -2 +#182,v,g,n,n,PM_THRD_PRIO_DIFF_minus3or4_CYC,Cycles thread priority difference is -3 or -4 +##430E1 +Cycles thread priority difference is -3 or -4 +#183,v,g,n,n,PM_THRD_PRIO_DIFF_minus5or6_CYC,Cycles thread priority difference is -5 or -6 +##430E0 +Cycles thread priority difference is -5 or -6 +#184,v,g,n,s,PM_THRD_SEL_OVER_CLB_EMPTY,Thread selection overides caused by CLB empty +##410C2 +Thread selection overides caused by CLB empty +#185,v,g,n,s,PM_THRD_SEL_OVER_GCT_IMBAL,Thread selection overides caused by GCT imbalance +##410C4 +Thread selection overides caused by GCT imbalance +#186,v,g,n,s,PM_THRD_SEL_OVER_ISU_HOLD,Thread selection overides caused by ISU holds +##410C5 +Thread selection overides caused by ISU holds +#187,v,g,n,s,PM_THRD_SEL_OVER_L2MISS,Thread selection overides caused by L2 misses +##410C3 +Thread selection overides caused by L2 misses +#188,v,g,n,s,PM_THRD_SEL_T0,Decode selected thread 0 +##410C0 +Decode selected thread 0 +#189,v,g,n,s,PM_THRD_SEL_T1,Decode selected thread 1 +##410C1 +Decode selected thread 1 +#190,v,g,n,s,PM_THRD_SMT_HANG,SMT hang detected +##330E7 +SMT hang detected +#191,v,g,n,n,PM_TLBIE_HELD,TLBIE held at dispatch +##130E4 +TLBIE held at dispatch +#192,v,g,n,n,PM_WORK_HELD,Work held +##0000C +RAS Unit has signaled completion to stop and there are groups waiting to complete + +$$$$$$$$ + +{ counter 5 } +#0,v,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of PPC instructions completed. + +$$$$$$$$ + +{ counter 6 } +#0,v,g,n,n,PM_RUN_CYC,Run cycles +##00005 +Processor Cycles gated by the run latch diff --git a/src/event_data/power5/groups b/src/event_data/power5/groups new file mode 100644 index 0000000..3fcc6a8 --- /dev/null +++ b/src/event_data/power5/groups @@ -0,0 +1,743 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/power5/groups +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ Number of groups + 145 + +{ Group descriptions + +#0,190,71,56,12,0,0,pm_utilization,CPI and utilization data +##00005,00001,00009,0000F,00009,00005 +00000000,00000000,0A02121E,00000000 +CPI and utilization data + +#1,2,195,49,12,0,0,pm_completion,Completion and cycle counts +##00013,00004,00013,0000F,00009,00005 +00000000,00000000,2608261E,00000000 +Completion and cycle counts + +#2,66,65,50,60,0,0,pm_group_dispatch,Group dispatch events +##120E3,120E4,130E1,00009,00009,00005 +00000000,4000000E,C6C8C212,00000000 +Group dispatch events + +#3,0,2,169,138,0,0,pm_clb1,CLB fullness +##400C0,400C2,410C6,C70A6,00009,00005 +00000000,015B0001,80848C4C,00000001 +CLB fullness + +#4,6,6,149,59,0,0,pm_clb2,CLB fullness +##400C5,400C6,C70E6,00001,00009,00005 +00000000,01430002,8A8CCC02,00000001 +CLB fullness + +#5,60,59,46,51,0,0,pm_gct_empty,GCT empty reasons +##00004,1009C,10084,1009C,00009,00005 +00000000,40000000,08380838,00000000 +GCT empty reasons + +#6,62,61,47,52,0,0,pm_gct_usage,GCT Usage +##0001F,0001F,0001F,0001F,00009,00005 +00000000,00000000,3E3E3E3E,00000000 +GCT Usage + +#7,143,143,113,119,0,0,pm_lsu1,LSU LRQ and LMQ events +##C20E6,C20E2,C30E6,C30E5,00009,00005 +00000000,000F000F,CCC4CCCA,00000000 +LSU LRQ and LMQ events + +#8,147,147,119,123,0,0,pm_lsu2,LSU SRQ events +##C20E5,C20E1,830E5,110C3,00009,00005 +00000000,400E000E,CAC2CA86,00000000 +LSU SRQ events + +#9,149,141,112,122,0,0,pm_lsu3,LSU SRQ and LMQ events +##C2088,00015,C70E5,00015,00009,00005 +00000000,010F000A,102ACA2A,00000000 +LSU SRQ and LMQ events + +#10,212,73,117,18,0,0,pm_prefetch1,Prefetch stream allocation +##2209B,220E4,C50C2,830E7,00009,00005 +00000000,8432000D,36C884CE,00000000 +Prefetch stream allocation + +#11,73,9,61,58,0,0,pm_prefetch2,Prefetch events +##00001,220E5,C70E7,210C7,00009,00005 +00000000,81030006,02CACE8E,00000001 +Prefetch events + +#12,139,1,87,59,0,0,pm_prefetch3,L2 prefetch and misc events +##C2090,400C1,C50C3,00001,00009,00005 +00000000,047C0008,20828602,00000001 +L2 prefetch and misc events + +#13,126,135,13,91,0,0,pm_prefetch4,Misc prefetch and reject events +##C60E0,C60E4,830E6,C50C3,00009,00005 +00000000,063E000E,C0C8CC86,00000000 +Misc prefetch and reject events + +#14,145,144,25,159,0,0,pm_lsu_reject1,LSU reject events +##C6090,C6088,330E3,81088,00009,00005 +00000000,C22C000E,2010C610,00000001 +LSU reject events + +#15,125,134,55,66,0,0,pm_lsu_reject2,LSU rejects due to reload CDF or tag update collision +##C60E2,C60E6,00001,230E7,00009,00005 +00000000,820C000D,C4CC02CE,00000001 +LSU rejects due to reload CDF or tag update collision + +#16,123,132,120,191,0,0,pm_lsu_reject3,LSU rejects due to ERAT, held instuctions +##C60E3,C60E7,130E0,130E4,00009,00005 +00000000,420C000F,C6CEC0C8,00000000 +LSU rejects due to ERAT, held instuctions + +#17,124,133,55,1,0,0,pm_lsu_reject4,LSU0/1 reject LMQ full +##C60E1,C60E5,00001,230E4,00009,00005 +00000000,820C000D,C2CA02C8,00000001 +LSU0/1 reject LMQ full + +#18,146,145,109,31,0,0,pm_lsu_reject5,LSU misc reject and flush events +##C6088,C6090,110C5,110C7,00009,00005 +00000000,420C000C,10208A8E,00000000 +LSU misc reject and flush events + +#19,73,140,25,16,0,0,pm_flush1,Misc flush events +##00001,C0088,330E3,C10C7,00009,00005 +00000000,C0F00002,0210C68E,00000001 +Misc flush events + +#20,81,71,27,33,0,0,pm_flush2,Flushes due to scoreboard and sync +##800C0,00001,330E2,330E1,00009,00005 +00000000,C0800003,8002C4C2,00000001 +Flushes due to scoreboard and sync + +#21,141,138,55,113,0,0,pm_lsu_flush_srq_lrq,LSU flush by SRQ and LRQ events +##C0090,C0090,00001,110C5,00009,00005 +00000000,40C00000,2020028A,00000001 +LSU flush by SRQ and LRQ events + +#22,119,128,109,59,0,0,pm_lsu_flush_lrq,LSU0/1 flush due to LRQ +##C00C2,C00C6,110C5,00001,00009,00005 +00000000,40C00000,848C8A02,00000001 +LSU0/1 flush due to LRQ + +#23,120,129,55,113,0,0,pm_lsu_flush_srq,LSU0/1 flush due to SRQ +##C00C3,C00C7,00001,110C5,00009,00005 +00000000,40C00000,868E028A,00000001 +LSU0/1 flush due to SRQ + +#24,142,140,0,59,0,0,pm_lsu_flush_unaligned,LSU flush due to unaligned data +##C0088,C0088,230E4,00001,00009,00005 +00000000,80C00002,1010C802,00000001 +LSU flush due to unaligned data + +#25,121,130,109,59,0,0,pm_lsu_flush_uld,LSU0/1 flush due to unaligned load +##C00C0,C00C4,110C5,00001,00009,00005 +00000000,40C00000,80888A02,00000001 +LSU0/1 flush due to unaligned load + +#26,122,131,55,113,0,0,pm_lsu_flush_ust,LSU0/1 flush due to unaligned store +##C00C1,C00C5,00001,110C5,00009,00005 +00000000,40C00000,828A028A,00000001 +LSU0/1 flush due to unaligned store + +#27,140,71,147,114,0,0,pm_lsu_flush_full,LSU flush due to LRQ/SRQ full +##320E7,00001,81088,330E0,00009,00005 +00000000,C0200009,CE0210C0,00000001 +LSU flush due to LRQ/SRQ full + +#28,70,13,55,10,0,0,pm_lsu_stall1,LSU Stalls +##00014,11098,00001,1109A,00009,00005 +00000000,40000000,28300234,00000001 +LSU Stalls + +#29,73,10,6,8,0,0,pm_lsu_stall2,LSU Stalls +##00001,1109A,0000F,1109B,00009,00005 +00000000,40000000,02341E36,00000001 +LSU Stalls + +#30,68,12,55,7,0,0,pm_fxu_stall,FXU Stalls +##12091,11099,00001,11099,00009,00005 +00000000,40000008,22320232,00000001 +FXU Stalls + +#31,57,11,55,9,0,0,pm_fpu_stall,FPU Stalls +##10090,1109B,00001,11098,00009,00005 +00000000,40000000,20360230,00000001 +FPU Stalls + +#32,115,7,116,116,0,0,pm_queue_full,BRQ LRQ LMQ queue full +##820E7,100C5,110C2,C30E7,00009,00005 +00000000,400B0009,CE8A84CE,00000000 +BRQ LRQ LMQ queue full + +#33,41,49,40,46,0,0,pm_issueq_full,FPU FX full +##100C3,100C7,110C0,110C4,00009,00005 +00000000,40000000,868E8088,00000000 +FPU FX full + +#34,11,114,48,11,0,0,pm_mapper_full1,CR CTR GPR mapper full +##100C4,100C6,130E5,110C1,00009,00005 +00000000,40000002,888CCA82,00000000 +CR CTR GPR mapper full + +#35,35,204,188,59,0,0,pm_mapper_full2,FPR XER mapper full +##100C1,100C2,C709B,00001,00009,00005 +00000000,41030002,82843602,00000001 +FPR XER mapper full + +#36,198,193,106,112,0,0,pm_misc_load,Non-cachable loads and stcx events +##820E1,820E5,C50C1,C50C5,00009,00005 +00000000,0438000C,C2CA828A,00000001 +Non-cachable loads and stcx events + +#37,117,126,52,57,0,0,pm_ic_demand,ICache demand from BR redirect +##C20E3,C20E7,230E0,230E1,00009,00005 +00000000,800C000F,C6CEC0C2,00000000 +ICache demand from BR redirect + +#38,72,69,54,0,0,0,pm_ic_pref,ICache prefetch +##220E7,220E6,210C7,2208D,00009,00005 +00000000,8000000C,CECC8E1A,00000000 +ICache prefetch + +#39,69,67,60,59,0,0,pm_ic_miss,ICache misses +##12099,120E7,C30E4,00001,00009,00005 +00000000,4003000E,32CEC802,00000001 +ICache misses + +#40,210,184,1,3,0,0,pm_branch_miss,Branch mispredict, TLB and SLB misses +##80088,80088,230E5,230E6,00009,00005 +00000000,80800003,1010CACC,00000000 +Branch mispredict, TLB and SLB misses + +#41,9,8,3,5,0,0,pm_branch1,Branch operations +##23087,23087,23087,23087,00009,00005 +00000000,80000003,0E0E0E0E,00000000 +Branch operations + +#42,64,62,24,59,0,0,pm_branch2,Branch operations +##120E5,120E6,110C6,00001,00009,00005 +00000000,4000000C,CACC8C02,00000001 +Branch operations + +#43,20,21,100,106,0,0,pm_L1_tlbmiss,L1 load and TLB misses +##800C7,800C4,C1088,C1090,00009,00005 +00000000,00B00000,8E881020,00000000 +L1 load and TLB misses + +#44,13,137,165,171,0,0,pm_L1_DERAT_miss,L1 store and DERAT misses +##C3087,80090,C1090,C10C3,00009,00005 +00000000,00B30000,0E202086,00000000 +L1 store and DERAT misses + +#45,21,78,101,105,0,0,pm_L1_slbmiss,L1 load and SLB misses +##800C5,800C1,C10C2,C10C6,00009,00005 +00000000,00B00000,8A82848C,00000000 +L1 load and SLB misses + +#46,26,23,103,108,0,0,pm_L1_dtlbmiss_4K,L1 load references and 4K Data TLB references and misses +##C40C2,C40C0,C10C0,C10C4,00009,00005 +00000000,08F00000,84808088,00000000 +L1 load references and 4K Data TLB references and misses + +#47,25,22,166,173,0,0,pm_L1_dtlbmiss_16M,L1 store references and 16M Data TLB references and misses +##C40C6,C40C4,C10C1,C10C5,00009,00005 +00000000,08F00000,8C88828A,00000000 +L1 store references and 16M Data TLB references and misses + +#48,16,18,26,59,0,0,pm_dsource1,L3 cache and memory data access +##C308E,C3087,110C7,00001,00009,00005 +00000000,40030000,1C0E8E02,00000001 +L3 cache and memory data access + +#49,16,18,187,15,0,0,pm_dsource2,L3 cache and memory data access +##C308E,C3087,C309B,C3087,00009,00005 +00000000,00030003,1C0E360E,00000000 +L3 cache and memory data access + +#50,14,16,8,13,0,0,pm_dsource_L2,L2 cache data access +##C3097,C3097,C3097,C3097,00009,00005 +00000000,00030003,2E2E2E2E,00000000 +L2 cache data access + +#51,17,17,10,14,0,0,pm_dsource_L3,L3 cache data access +##C309E,C309E,C309E,C309E,00009,00005 +00000000,00030003,3C3C3C3C,00000000 +L3 cache data access + +#52,78,74,59,63,0,0,pm_isource1,Instruction source information +##2208D,2208D,2208D,22086,00009,00005 +00000000,8000000C,1A1A1A0C,00000000 +Instruction source information + +#53,76,77,55,0,0,0,pm_isource2,Instruction source information +##22086,22086,00001,2208D,00009,00005 +00000000,8000000C,0C0C021A,00000001 +Instruction source information + +#54,77,75,57,61,0,0,pm_isource_L2,L2 instruction source information +##22096,22096,22096,22096,00009,00005 +00000000,8000000C,2C2C2C2C,00000000 +L2 instruction source information + +#55,79,76,58,62,0,0,pm_isource_L3,L3 instruction source information +##2209D,2209D,2209D,2209D,00009,00005 +00000000,8000000C,3A3A3A3A,00000000 +L3 instruction source information + +#56,184,181,154,163,0,0,pm_pteg_source1,PTEG source information +##83097,83097,83097,83097,00009,00005 +00000000,00020003,2E2E2E2E,00000000 +PTEG source information + +#57,187,182,156,164,0,0,pm_pteg_source2,PTEG source information +##8309E,8309E,8309E,8309E,00009,00005 +00000000,00020003,3C3C3C3C,00000000 +PTEG source information + +#58,183,183,189,165,0,0,pm_pteg_source3,PTEG source information +##83087,83087,8309B,83087,00009,00005 +00000000,00020003,0E0E360E,00000000 +PTEG source information + +#59,186,64,51,16,0,0,pm_pteg_source4,L3 PTEG and group disptach events +##8308E,00002,00002,C10C7,00009,00005 +00000000,00320000,1C04048E,00000000 +L3 PTEG and group disptach events + +#60,83,82,64,69,0,0,pm_L2SA_ld,L2 slice A load events +##701C0,721E0,711C0,731E0,00009,00005 +00000000,30554005,80C080C0,00000000 +L2 slice A load events + +#61,85,84,66,71,0,0,pm_L2SA_st,L2 slice A store events +##702C0,722E0,712C0,732E0,00009,00005 +00000000,30558005,80C080C0,00000000 +L2 slice A store events + +#62,87,87,68,74,0,0,pm_L2SA_st2,L2 slice A store events +##703C0,723E0,713C0,733E0,00009,00005 +00000000,3055C005,80C080C0,00000000 +L2 slice A store events + +#63,91,90,72,77,0,0,pm_L2SB_ld,L2 slice B load events +##701C1,721E1,711C1,731E1,00009,00005 +00000000,30554005,82C282C2,00000000 +L2 slice B load events + +#64,93,92,74,79,0,0,pm_L2SB_st,L2 slice B store events +##702C1,722E1,712C1,732E1,00009,00005 +00000000,30558005,82C282C2,00000000 +L2 slice B store events + +#65,95,95,76,82,0,0,pm_L2SB_st2,L2 slice B store events +##703C1,723E1,713C1,733E1,00009,00005 +00000000,3055C005,82C282C2,00000000 +L2 slice B store events + +#66,99,98,80,85,0,0,pm_L2SB_ld,L2 slice C load events +##701C2,721E2,711C2,731E2,00009,00005 +00000000,30554005,84C484C4,00000000 +L2 slice C load events + +#67,101,100,82,87,0,0,pm_L2SB_st,L2 slice C store events +##702C2,722E2,712C2,732E2,00009,00005 +00000000,30558005,84C484C4,00000000 +L2 slice C store events + +#68,103,103,84,90,0,0,pm_L2SB_st2,L2 slice C store events +##703C2,723E2,713C2,733E2,00009,00005 +00000000,3055C005,84C484C4,00000000 +L2 slice C store events + +#69,107,71,89,94,0,0,pm_L3SA_trans,L3 slice A state transistions +##720E3,00001,730E3,710C3,00009,00005 +00000000,3015000A,C602C686,00000001 +L3 slice A state transistions + +#70,73,108,93,98,0,0,pm_L3SB_trans,L3 slice B state transistions +##00001,720E4,730E4,710C4,00009,00005 +00000000,30150006,02C8C888,00000001 +L3 slice B state transistions + +#71,73,111,97,102,0,0,pm_L3SC_trans,L3 slice C state transistions +##00001,720E5,730E5,710C5,00009,00005 +00000000,30150006,02CACA8A,00000001 +L3 slice C state transistions + +#72,82,86,63,73,0,0,pm_L2SA_trans,L2 slice A state transistions +##720E0,700C0,730E0,710C0,00009,00005 +00000000,3055000A,C080C080,00000000 +L2 slice A state transistions + +#73,90,94,71,81,0,0,pm_L2SB_trans,L2 slice B state transistions +##720E1,700C1,730E1,710C1,00009,00005 +00000000,3055000A,C282C282,00000000 +L2 slice B state transistions + +#74,98,102,79,89,0,0,pm_L2SC_trans,L2 slice C state transistions +##720E2,700C2,730E2,710C2,00009,00005 +00000000,3055000A,C484C484,00000000 +L2 slice C state transistions + +#75,106,107,91,99,0,0,pm_L3SAB_retry,L3 slice A/B snoop retry and all CI/CO busy +##721E3,721E4,731E3,731E4,00009,00005 +00000000,3005100F,C6C8C6C8,00000000 +L3 slice A/B snoop retry and all CI/CO busy + +#76,108,109,88,96,0,0,pm_L3SAB_hit,L3 slice A/B hit and reference +##701C3,701C4,711C3,711C4,00009,00005 +00000000,30501000,86888688,00000000 +L3 slice A/B hit and reference + +#77,112,112,99,100,0,0,pm_L3SC_retry_hit,L3 slice C hit & snoop retry +##721E5,701C5,731E5,711C5,00009,00005 +00000000,3055100A,CA8ACA8A,00000000 +L3 slice C hit & snoop retry + +#78,55,54,38,43,0,0,pm_fpu1,Floating Point events +##00088,00088,01088,01090,00009,00005 +00000000,00000000,10101020,00000000 +Floating Point events + +#79,56,53,39,44,0,0,pm_fpu2,Floating Point events +##00090,00090,01090,01088,00009,00005 +00000000,00000000,20202010,00000000 +Floating Point events + +#80,54,55,30,40,0,0,pm_fpu3,Floating point events +##02088,02088,010C3,010C7,00009,00005 +00000000,0000000C,1010868E,00000000 +Floating point events + +#81,58,56,55,115,0,0,pm_fpu4,Floating point events +##02090,02090,00001,C5090,00009,00005 +00000000,0430000C,20200220,00000001 +Floating point events + +#82,40,48,29,39,0,0,pm_fpu5,Floating point events by unit +##000C2,000C6,010C2,010C6,00009,00005 +00000000,00000000,848C848C,00000000 +Floating point events by unit + +#83,37,45,31,41,0,0,pm_fpu6,Floating point events by unit +##020E0,020E4,010C0,010C4,00009,00005 +00000000,0000000C,C0C88088,00000000 +Floating point events by unit + +#84,38,46,33,42,0,0,pm_fpu7,Floating point events by unit +##000C0,000C4,010C1,010C5,00009,00005 +00000000,00000000,8088828A,00000000 +Floating point events by unit + +#85,43,51,55,37,0,0,pm_fpu8,Floating point events by unit +##020E1,020E5,00001,030E0,00009,00005 +00000000,0000000D,C2CA02C0,00000001 +Floating point events by unit + +#86,42,50,105,111,0,0,pm_fpu9,Floating point events by unit +##020E3,020E7,C50C0,C50C4,00009,00005 +00000000,0430000C,C6CE8088,00000000 +Floating point events by unit + +#87,39,47,55,42,0,0,pm_fpu10,Floating point events by unit +##000C1,000C5,00001,010C5,00009,00005 +00000000,00000000,828A028A,00000001 +Floating point events by unit + +#88,36,44,30,59,0,0,pm_fpu11,Floating point events by unit +##000C3,000C7,010C3,00001,00009,00005 +00000000,00000000,868E8602,00000001 +Floating point events by unit + +#89,44,52,105,59,0,0,pm_fpu12,Floating point events by unit +##020E2,020E6,C50C0,00001,00009,00005 +00000000,0430000C,C4CC8002,00000001 +Floating point events by unit + +#90,59,57,42,49,0,0,pm_fxu1,Fixed Point events +##00012,00012,00012,00012,00009,00005 +00000000,00000000,24242424,00000000 +Fixed Point events + +#91,171,172,45,47,0,0,pm_fxu2,Fixed Point events +##00002,12091,13088,11090,00009,00005 +00000000,40000006,04221020,00000001 +Fixed Point events + +#92,4,4,43,50,0,0,pm_fxu3,Fixed Point events +##400C3,400C4,130E2,130E6,00009,00005 +00000000,40400003,8688C4CC,00000000 +Fixed Point events + +#93,206,203,171,178,0,0,pm_smt_priorities1,Thread priority events +##420E3,420E6,430E3,430E4,00009,00005 +00000000,0005000F,C6CCC6C8,00000000 +Thread priority events + +#94,205,202,173,180,0,0,pm_smt_priorities2,Thread priority events +##420E2,420E5,430E5,430E6,00009,00005 +00000000,0005000F,C4CACACC,00000000 +Thread priority events + +#95,204,201,175,182,0,0,pm_smt_priorities3,Thread priority events +##420E1,420E4,430E2,430E1,00009,00005 +00000000,0005000F,C2C8C4C2,00000000 +Thread priority events + +#96,203,68,177,59,0,0,pm_smt_priorities4,Thread priority events +##420E0,0000B,430E0,00001,00009,00005 +00000000,0005000A,C016C002,00000001 +Thread priority events + +#97,202,196,55,176,0,0,pm_smt_both,Thread common events +##0000B,00013,00001,41084,00009,00005 +00000000,00100000,16260208,00000001 +Thread common events + +#98,196,71,182,189,0,0,pm_smt_selection,Thread selection +##800C3,00001,410C0,410C1,00009,00005 +00000000,00900000,86028082,00000001 +Thread selection + +#99,73,0,178,185,0,0,pm_smt_selectover1,Thread selection overide +##00001,400C0,410C2,410C4,00009,00005 +00000000,00500000,02808488,00000001 +Thread selection overide + +#100,73,15,180,187,0,0,pm_smt_selectover2,Thread selection overide +##00001,0000F,410C5,410C3,00009,00005 +00000000,00100000,021E8A86,00000001 +Thread selection overide + +#101,27,27,17,23,0,0,pm_fabric1,Fabric events +##700C7,720E7,710C7,730E7,00009,00005 +00000000,30550005,8ECE8ECE,00000000 +Fabric events + +#102,32,29,20,28,0,0,pm_fabric2,Fabric data movement +##701C7,721E7,711C7,731E7,00009,00005 +00000000,30550085,8ECE8ECE,00000000 +Fabric data movement + +#103,33,33,21,27,0,0,pm_fabric3,Fabric data movement +##703C7,723E7,713C7,733E7,00009,00005 +00000000,30550185,8ECE8ECE,00000000 +Fabric data movement + +#104,31,28,15,24,0,0,pm_fabric4,Fabric data movement +##702C7,722E7,130E3,712C7,00009,00005 +00000000,70540106,8ECEC68E,00000000 +Fabric data movement + +#105,193,185,161,166,0,0,pm_snoop1,Snoop retry +##700C6,720E6,710C6,730E6,00009,00005 +00000000,30550005,8CCC8CCC,00000000 +Snoop retry + +#106,194,189,160,59,0,0,pm_snoop2,Snoop read retry +##705C6,725E6,715C6,00001,00009,00005 +00000000,30540A04,8CCC8C02,00000001 +Snoop read retry + +#107,197,150,162,127,0,0,pm_snoop3,Snoop write retry +##706C6,726E6,716C6,736E6,00009,00005 +00000000,30550C05,8CCC8CCC,00000000 +Snoop write retry + +#108,192,149,159,126,0,0,pm_snoop4,Snoop partial write retry +##707C6,727E6,717C6,737E6,00009,00005 +00000000,30550E05,8CCC8CCC,00000000 +Snoop partial write retry + +#109,156,155,125,20,0,0,pm_mem_rq,Memory read queue dispatch +##701C6,721E6,711C6,130E7,00009,00005 +00000000,70540205,8CCC8CCE,00000000 +Memory read queue dispatch + +#110,155,148,126,21,0,0,pm_mem_read,Memory read complete and cancel +##702C6,722E6,712C6,00003,00009,00005 +00000000,30540404,8CCC8C06,00000000 +Memory read complete and cancel + +#111,159,156,128,132,0,0,pm_mem_wq,Memory write queue dispatch +##703C6,723E6,713C6,733E6,00009,00005 +00000000,30550605,8CCC8CCC,00000000 +Memory write queue dispatch + +#112,153,152,124,128,0,0,pm_mem_pwq,Memory partial write queue +##704C6,724E6,714C6,734E6,00009,00005 +00000000,30550805,8CCC8CCC,00000000 +Memory partial write queue + +#113,171,173,185,158,0,0,pm_threshold,Thresholding +##00002,820E2,0000B,00014,00009,00005 +00000000,00080004,04C41628,00000001 +Thresholding + +#114,171,179,137,146,0,0,pm_mrk_grp1,Marked group events +##00002,820E3,00005,00013,00009,00005 +00000000,00080004,04C60A26,00000001 +Marked group events + +#115,172,158,138,147,0,0,pm_mrk_grp2,Marked group events +##00015,00005,C70E4,12091,00009,00005 +00000000,41030002,2A0AC822,00000001 +Marked group events + +#116,160,162,129,135,0,0,pm_mrk_dsource1,Marked data from +##C7087,C70A0,C70A2,C70A2,00009,00005 +00000000,010B0003,0E404444,00000001 +Marked data from + +#117,161,160,55,44,0,0,pm_mrk_dsource2,Marked data from +##C7097,C70A2,00001,01088,00009,00005 +00000000,010B0000,2E440210,00000001 +Marked data from + +#118,163,166,131,138,0,0,pm_mrk_dsource3,Marked data from +##C708E,C70A4,C70A6,C70A6,00009,00005 +00000000,010B0003,1C484C4C,00000001 +Marked data from + +#119,166,161,130,143,0,0,pm_mrk_dsource4,Marked data from +##C70A1,C70A3,C7097,C70A1,00009,00005 +00000000,010B0003,42462E42,00000001 +Marked data from + +#120,164,164,133,141,0,0,pm_mrk_dsource5,Marked data from +##C709E,C70A6,C70A0,C70A0,00009,00005 +00000000,010B0003,3C4C4040,00000001 +Marked data from + +#121,162,161,55,137,0,0,pm_mrk_dsource6,Marked data from +##C70A3,C70A3,00001,C70A3,00009,00005 +00000000,010B0001,46460246,00000001 +Marked data from + +#122,165,165,132,140,0,0,pm_mrk_dsource7,Marked data from +##C70A7,C70A7,C709E,C70A7,00009,00005 +00000000,010B0003,4E4E3C4E,00000001 +Marked data from + +#123,168,168,135,144,0,0,pm_mrk_lbmiss,Marked TLB and SLB misses +##C40C1,C40C5,C50C6,C50C7,00009,00005 +00000000,0CF00000,828A8C8E,00000001 +Marked TLB and SLB misses + +#124,170,170,55,144,0,0,pm_mrk_lbref,Marked TLB and SLB references +##C40C3,C40C7,00001,C50C7,00009,00005 +00000000,0CF00000,868E028E,00000001 +Marked TLB and SLB references + +#125,175,71,150,134,0,0,pm_mrk_lsmiss,Marked load and store miss +##82088,00001,00003,00005,00009,00005 +00000000,00080008,1002060A,00000001 +Marked load and store miss + +#126,179,179,148,160,0,0,pm_mrk_ulsflush,Mark unaligned load and store flushes +##00003,820E3,81090,81090,00009,00005 +00000000,00280004,06C62020,00000001 +Mark unaligned load and store flushes + +#127,178,178,136,148,0,0,pm_mrk_misc,Misc marked instructions +##820E6,00003,00014,0000B,00009,00005 +00000000,00080008,CC062816,00000001 +Misc marked instructions + +#128,13,74,165,106,0,0,pm_lsref_L1,Load/Store operations and L1 activity +##C3087,2208D,C1090,C1090,00009,00005 +00000000,80330004,0E1A2020,00000000 +Load/Store operations and L1 activity + +#129,16,18,165,106,0,0,pm_lsref_L2L3,Load/Store operations and L2,L3 activity +##C308E,C3087,C1090,C1090,00009,00005 +00000000,00330000,1C0E2020,00000000 +Load/Store operations and L2,L3 activity + +#130,81,21,165,106,0,0,pm_lsref_tlbmiss,Load/Store operations and TLB misses +##800C0,800C4,C1090,C1090,00009,00005 +00000000,00B00000,80882020,00000000 +Load/Store operations and TLB misses + +#131,16,18,100,171,0,0,pm_Dmiss,Data cache misses +##C308E,C3087,C1088,C10C3,00009,00005 +00000000,00330000,1C0E1086,00000000 +Data cache misses + +#132,12,69,61,91,0,0,pm_prefetchX,Prefetch events +##0000F,220E6,C70E7,C50C3,00009,00005 +00000000,85330006,1ECCCE86,00000000 +Prefetch events + +#133,9,8,3,1,0,0,pm_branchX,Branch operations +##23087,23087,23087,230E4,00009,00005 +00000000,80000003,0E0E0EC8,00000000 +Branch operations + +#134,43,51,30,37,0,0,pm_fpuX1,Floating point events by unit +##020E1,020E5,010C3,030E0,00009,00005 +00000000,0000000D,C2CA86C0,00000000 +Floating point events by unit + +#135,39,47,33,42,0,0,pm_fpuX2,Floating point events by unit +##000C1,000C5,010C1,010C5,00009,00005 +00000000,00000000,828A828A,00000000 +Floating point events by unit + +#136,36,44,30,40,0,0,pm_fpuX3,Floating point events by unit +##000C3,000C7,010C3,010C7,00009,00005 +00000000,00000000,868E868E,00000000 +Floating point events by unit + +#137,56,54,165,106,0,0,pm_fpuX4,Floating point and L1 events +##00090,00088,C1090,C1090,00009,00005 +00000000,00300000,20102020,00000000 +Floating point and L1 events + +#138,58,56,30,40,0,0,pm_fpuX5,Floating point events +##02090,02090,010C3,010C7,00009,00005 +00000000,0000000C,2020868E,00000000 +Floating point events + +#139,55,53,39,44,0,0,pm_fpuX6,Floating point events +##00088,00090,01090,01088,00009,00005 +00000000,00000000,10202010,00000000 +Floating point events + +#140,12,58,6,44,0,0,pm_hpmcount1,HPM group for set 1 +##0000F,00014,0000F,01088,00009,00005 +00000000,00000000,1E281E10,00000000 +HPM group for set 1 + +#141,12,56,56,115,0,0,pm_hpmcount2,HPM group for set 2 +##0000F,02090,00009,C5090,00009,00005 +00000000,04300004,1E201220,00000000 +HPM group for set 2 + +#142,12,72,100,171,0,0,pm_hpmcount3,HPM group for set 3 +##0000F,120E1,C1088,C10C3,00009,00005 +00000000,40300004,1EC21086,00000000 +HPM group for set 3 + +#143,210,15,165,106,0,0,pm_hpmcount4,HPM group for set 7 +##80088,0000F,C1090,C1090,00009,00005 +00000000,00B00000,101E2020,00000000 +HPM group for set 7 + +#144,56,54,6,59,0,0,pm_1flop_with_fma,One flop instructions plus FMA +##00090,00088,0000F,00001,00009,00005 +00000000,00000000,20101E02,00000000 +One flop instructions plus FMA + diff --git a/src/event_data/ppc970/events b/src/event_data/ppc970/events new file mode 100644 index 0000000..865c008 --- /dev/null +++ b/src/event_data/ppc970/events @@ -0,0 +1,1911 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/ppc970/events +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ counter 1 } +#0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##10095,60095 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##10094,60094 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#2,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#3,v,g,n,n,PM_DATA_FROM_L2,Data loaded from L2 +##C3087 +DL1 was reloaded from the local L2 due to a demand load +#4,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##80097 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##80095 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##80094 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##10091,60091 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##00093 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##02098 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##00090 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##00091 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##00092 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##10093,60093 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0209B +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##02099 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0209A +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##00097 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0209C +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##00094 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##00095 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##00096 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##10097,60097 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0209F +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0209D +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0209E +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_FPU_DENORM,FPU received denormalized data +##02080 +This signal is active for one cycle when one of the operands is denormalized. Combined Unit 0 + Unit 1 +#27,v,g,n,n,PM_FPU_FDIV,FPU executed FDIV instruction +##00080 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. Combined Unit 0 + Unit 1 +#28,v,g,n,n,PM_GCT_EMPTY_CYC,Cycles GCT empty +##00004 +The Global Completion Table is completely empty +#29,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##10090,60090 +The ISU sends a signal indicating the gct is full. +#30,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict +##1209F,6209F +Group experienced a branch mispredict +#31,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##1209E,6209E +Group experienced branch redirect +#32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##1209C,6209C +A group that previously attempted dispatch was rejected. +#33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##1209B,6209B +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##2209E +New line coming into the prefetch buffer +#35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##2209D +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##2209F +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#37,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#38,v,g,n,n,PM_INST_DISP,Instructions dispatched +##12098,12099,1209A,62098,62099,6209A +The ISU sends the number of instructions dispatched. +#39,v,g,n,n,PM_INST_FROM_L1,Instruction fetched from L1 +##2208D +An instruction fetch group was fetched from L1. Fetch Groups can contain up to 8 instructions +#40,v,g,n,n,PM_INST_FROM_L2,Instructions fetched from L2 +##22086 +An instruction fetch group was fetched from L2. Fetch Groups can contain up to 8 instructions +#41,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##80091 +A SLB miss for an instruction fetch as occurred +#42,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##80090 +A TLB miss for an Instruction Fetch has occurred +#43,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##8209F +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#44,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##10096,60096 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#45,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##80092 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#46,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C0092 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#47,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C0093 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#48,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C0090 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#49,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C0091 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#50,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C609B +LSU0 reject due to ERAT miss +#51,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C6099 +LSU0 reject due to LMQ full or missed data coming +#52,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C609A +LSU0 reject due to reload CDF or tag update collision +#53,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects +##C6098 +LSU0 SRQ rejects +#54,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C2098 +Data from a store instruction was forwarded to a load on unit 0 +#55,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##80096 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#56,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C0096 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#57,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C0097 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#58,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C0094 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#59,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C0095 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#60,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C609F +LSU1 reject due to ERAT miss +#61,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C609D +LSU1 reject due to LMQ full or missed data coming +#62,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C609E +LSU1 reject due to reload CDF or tag update collision +#63,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects +##C609C +LSU1 SRQ rejects +#64,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C209C +Data from a store instruction was forwarded to a load on unit 1 +#65,v,g,n,n,PM_LSU_FLUSH_ULD,LRQ unaligned load flushes +##C0080 +A load was flushed because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C209E +LRQ slot zero was allocated +#67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C209A +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#68,v,g,n,n,PM_LSU_REJECT_SRQ,LSU SRQ rejects +##C6080 +LSU SRQ rejects +#69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C209D +SRQ Slot zero was allocated +#70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C2099 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#71,c,g,n,n,PM_LSU_SRQ_STFWD,SRQ store forwarded +##C2080 +Data from a store instruction was forwarded to a load +#72,v,g,n,n,PM_MRK_DATA_FROM_L2,Marked data loaded from L2 +##C7087 +DL1 was reloaded from the local L2 due to a marked demand load +#73,v,g,n,n,PM_MRK_GRP_DISP,Marked group dispatched +##00002 +A group containing a sampled instruction was dispatched +#74,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##8209A +A DL1 reload occured due to marked load +#75,v,g,n,n,PM_MRK_LD_MISS_L1,Marked L1 D cache load misses +##82080 +Marked L1 D cache load misses +#76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##82098 +A marked load, executing on unit 0, missed the dcache +#77,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##8209C +A marked load, executing on unit 1, missed the dcache +#78,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##8209E +A marked stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_MRK_ST_CMPL,Marked store instruction completed +##00003 +A sampled store has completed (data home) +#80,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##8209B +A marked store missed the dcache +#81,v,g,n,n,PM_PMC8_OVERFLOW,PMC8 Overflow +##0000A +PMC8 Overflow +#82,v,g,n,n,PM_RUN_CYC,Run cycles +##00005 +Processor Cycles gated by the run latch +#83,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##80093 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#84,v,g,n,n,PM_STCX_FAIL,STCX failed +##82099 +A stcx (stwcx or stdcx) failed +#85,v,g,n,n,PM_STCX_PASS,Stcx passes +##8209D +A stcx (stwcx or stdcx) instruction was successful +#86,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C209B +A store missed the dcache +#87,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended +#88,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##10092,60092 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. + +$$$$$$$$ + +{ counter 2 } +#0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##10095,60095 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##10094,60094 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#2,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#3,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##80097 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#4,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##80095 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#5,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##80094 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#6,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##10091,60091 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#7,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##00093 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#8,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##02098 +This signal is active for one cycle when one of the operands is denormalized. +#9,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##00090 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#10,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##00091 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#11,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##00092 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##10093,60093 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#13,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0209B +This signal is active for one cycle when fp0 is executing single precision instruction. +#14,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##02099 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#15,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0209A +This signal is active for one cycle when fp0 is executing a store instruction. +#16,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##00097 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#17,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0209C +This signal is active for one cycle when one of the operands is denormalized. +#18,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##00094 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#19,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##00095 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#20,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##00096 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##10097,60097 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#22,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0209F +This signal is active for one cycle when fp1 is executing single precision instruction. +#23,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0209D +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#24,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0209E +This signal is active for one cycle when fp1 is executing a store instruction. +#25,v,g,n,n,PM_FPU_FMA,FPU executed multiply-add instruction +##00080 +This signal is active for one cycle when FPU is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#26,v,g,n,n,PM_FPU_STALL3,FPU stalled in pipe3 +##02080 +FPU has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. Combined Unit 0 + Unit 1 +#27,v,g,n,n,PM_GCT_EMPTY_SRQ_FULL,GCT empty caused by SRQ full +##0000B +GCT empty caused by SRQ full +#28,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##10090,60090 +The ISU sends a signal indicating the gct is full. +#29,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict +##1209F,6209F +Group experienced a branch mispredict +#30,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##1209E,6209E +Group experienced branch redirect +#31,v,g,n,n,PM_GRP_DISP,Group dispatches +##00004 +A group was dispatched +#32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##1209C,6209C +A group that previously attempted dispatch was rejected. +#33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##1209B,6209B +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##2209E +New line coming into the prefetch buffer +#35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##2209D +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##2209F +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#37,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#38,v,g,n,n,PM_INST_DISP,Instructions dispatched +##12098,12099,1209A,62098,62099,6209A +The ISU sends the number of instructions dispatched. +#39,v,g,n,n,PM_INST_FROM_MEM,Instruction fetched from memory +##22086 +Instruction fetched from memory +#40,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##80091 +A SLB miss for an instruction fetch as occurred +#41,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##80090 +A TLB miss for an Instruction Fetch has occurred +#42,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##8209F +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#43,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##10096,60096 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#44,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##80092 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#45,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C0092 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#46,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C0093 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#47,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C0090 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#48,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C0091 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#49,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C609B +LSU0 reject due to ERAT miss +#50,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C6099 +LSU0 reject due to LMQ full or missed data coming +#51,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C609A +LSU0 reject due to reload CDF or tag update collision +#52,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects +##C6098 +LSU0 SRQ rejects +#53,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C2098 +Data from a store instruction was forwarded to a load on unit 0 +#54,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##80096 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C0096 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C0097 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C0094 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C0095 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#59,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C609F +LSU1 reject due to ERAT miss +#60,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C609D +LSU1 reject due to LMQ full or missed data coming +#61,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C609E +LSU1 reject due to reload CDF or tag update collision +#62,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects +##C609C +LSU1 SRQ rejects +#63,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C209C +Data from a store instruction was forwarded to a load on unit 1 +#64,v,g,n,n,PM_LSU_FLUSH_UST,SRQ unaligned store flushes +##C0080 +A store was flushed because it was unaligned +#65,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00002 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C209E +LRQ slot zero was allocated +#67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C209A +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#68,v,g,n,n,PM_LSU_REJECT_LMQ_FULL,LSU reject due to LMQ full or missed data coming +##C6080 +LSU reject due to LMQ full or missed data coming +#69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C209D +SRQ Slot zero was allocated +#70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C2099 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#71,v,g,n,n,PM_MRK_BRU_FIN,Marked instruction BRU processing finished +##00005 +The branch unit finished a marked instruction. Instructions that finish may not necessary complete +#72,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##8209A +A DL1 reload occured due to marked load +#73,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##82098 +A marked load, executing on unit 0, missed the dcache +#74,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##8209C +A marked load, executing on unit 1, missed the dcache +#75,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##8209E +A marked stcx (stwcx or stdcx) failed +#76,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##8209B +A marked store missed the dcache +#77,v,g,n,n,PM_PMC1_OVERFLOW,PMC1 Overflow +##0000A +PMC1 Overflow +#78,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##80093 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#79,v,g,n,n,PM_STCX_FAIL,STCX failed +##82099 +A stcx (stwcx or stdcx) failed +#80,v,g,n,n,PM_STCX_PASS,Stcx passes +##8209D +A stcx (stwcx or stdcx) instruction was successful +#81,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C209B +A store missed the dcache +#82,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended +#83,v,g,t,n,PM_THRESH_TIMEO,Threshold timeout +##00003 +The threshold timer expired +#84,v,g,n,n,PM_WORK_HELD,Work held +##00001 +RAS Unit has signaled completion to stop and there are groups waiting to complete +#85,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##10092,60092 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. + +$$$$$$$$ + +{ counter 3 } +#0,v,g,n,n,PM_BR_ISSUED,Branches issued +##23098 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##23099 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##2309A +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##11091,61091 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#4,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#5,v,g,n,n,PM_DATA_FROM_MEM,Data loaded from memory +##C3087 +Data loaded from memory +#6,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C1097 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#7,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams +##8309A +out of streams +#8,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##8309F +A new Prefetch Stream was allocated +#9,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##1309B,6309B +The number of Cycles MSR(EE) bit was off. +#10,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##1309F,6309F +Cycles MSR(EE) bit off and external interrupt pending +#11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##11096,61096 +Flush caused by branch mispredict +#12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict +##11097,61097 +Flush caused by LSU or branch mispredict +#13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##01092 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##01093 +fp0 finished, produced a result This only indicates finish, not completion. +#15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##01090 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##03098 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##01091 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##01096 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##01097 +fp1 finished, produced a result. This only indicates finish, not completion. +#20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##01094 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##01095 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU_FEST,FPU executed FEST instruction +##01080 +This signal is active for one cycle when executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. Combined Unit 0 + Unit 1. +#23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##11090,61090 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##11094,61094 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##1309A,6309A +The Fixed Point unit 0 finished an instruction and produced a result +#26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##1309E,6309E +The Fixed Point unit 1 finished an instruction and produced a result +#27,v,g,n,n,PM_FXU_FIN,FXU produced a result +##63080 +The fixed point unit (Unit 0 + Unit 1) finished a marked instruction. Instructions that finish may not necessary complete. +#28,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##1309D,6309D +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##13099,63099 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#30,v,g,n,n,PM_HV_CYC,Hypervisor Cycles +##00004 +Cycles when the processor is executing in Hypervisor (MSR[HV] = 1 and MSR[PR]=0) +#31,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#32,v,g,n,n,PM_INST_FROM_PREF,Instructions fetched from prefetch +##2208D +An instruction fetch group was fetched from the prefetch buffer. Fetch Groups can contain up to 8 instructions +#33,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C309C +The data source information is valid +#34,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##83099 +A request to prefetch data into the L1 was made +#35,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##2309B +This signal is asserted each cycle a cache write is active. +#36,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##8309B +A request to prefetch data into L2 was made +#37,v,g,n,n,PM_LD_MISS_L1,L1 D cache load misses +##C1080 +Total DL1 Load references that miss the DL1 +#38,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C1092 +A load, executing on unit 0, missed the dcache +#39,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C1096 +A load, executing on unit 1, missed the dcache +#40,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C1090 +A load executed on unit 0 +#41,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C1094 +A load executed on unit 1 +#42,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##83098 +A floating point load was executed from LSU unit 0 +#43,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##8309C +A floating point load was executed from LSU unit 1 +#44,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##11095,61095 +Flush initiated by LSU +#45,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C309F +The LMQ was full +#46,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C709D +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#47,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C309E +The first entry in the LMQ was allocated. +#48,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C309D +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#49,u,g,n,n,PM_LSU_LMQ_SRQ_EMPTY_CYC,Cycles LMQ and SRQ empty +##00002 +Cycles when both the LMQ and SRQ are empty (LSU is idle) +#50,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##11092,61092 +The ISU sends this signal when the LRQ is full. +#51,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##11093,61093 +The ISU sends this signal when the srq is full. +#52,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##8309D +This signal is asserted every cycle when a sync is in the SRQ. +#53,v,g,n,n,PM_MRK_DATA_FROM_MEM,Marked data loaded from memory +##C7087 +Marked data loaded from memory +#54,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C709C +The source information is valid and is for a marked load +#55,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##81092 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##81093 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##81090 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##81091 +A marked store was flushed from unit 0 because it was unaligned +#59,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##81096 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#60,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##81097 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#61,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##81094 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#62,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##81095 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#63,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C709E +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#64,v,g,n,n,PM_MRK_ST_CMPL_INT,Marked store completed with intervention +##00003 +A marked store previously sent to the memory subsystem completed (data home) after requiring intervention +#65,v,g,n,n,PM_MRK_VMX_FIN,Marked instruction VMX processing finished +##00005 +Marked instruction VMX processing finished +#66,v,g,n,n,PM_PMC2_OVERFLOW,PMC2 Overflow +##0000A +PMC2 Overflow +#67,v,g,n,n,PM_STOP_COMPLETION,Completion stopped +##00001 +RAS Unit has signaled completion to stop +#68,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C1093 +A store missed the dcache +#69,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C1091 +A store executed on unit 0 +#70,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C1095 +A store executed on unit 1 +#71,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended + +$$$$$$$$ + +{ counter 4 } +#0,v,g,n,n,PM_0INST_FETCH,No instructions fetched +##2208D +No instructions were fetched this cycles (due to IFU hold, redirect, or icache miss) +#1,v,g,n,n,PM_BR_ISSUED,Branches issued +##23098 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#2,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##23099 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#3,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##2309A +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#4,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##11091,61091 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#5,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#6,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C1097 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#7,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams +##8309A +out of streams +#8,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##8309F +A new Prefetch Stream was allocated +#9,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##1309B,6309B +The number of Cycles MSR(EE) bit was off. +#10,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##1309F,6309F +Cycles MSR(EE) bit off and external interrupt pending +#11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##11096,61096 +Flush caused by branch mispredict +#12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict +##11097,61097 +Flush caused by LSU or branch mispredict +#13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##01092 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##01093 +fp0 finished, produced a result This only indicates finish, not completion. +#15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##01090 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##03098 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##01091 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##01096 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##01097 +fp1 finished, produced a result. This only indicates finish, not completion. +#20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##01094 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##01095 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU_FIN,FPU produced a result +##01080 +FPU finished, produced a result This only indicates finish, not completion. Combined Unit 0 + Unit 1 +#23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##11090,61090 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##11094,61094 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##1309A,6309A +The Fixed Point unit 0 finished an instruction and produced a result +#26,u,g,n,n,PM_FXU1_BUSY_FXU0_IDLE,FXU1 busy FXU0 idle +##00002 +FXU0 was idle while FXU1 was busy +#27,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##1309E,6309E +The Fixed Point unit 1 finished an instruction and produced a result +#28,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##1309D,6309D +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##13099,63099 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#30,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C309C +The data source information is valid +#32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##83099 +A request to prefetch data into the L1 was made +#33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##2309B +This signal is asserted each cycle a cache write is active. +#34,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##8309B +A request to prefetch data into L2 was made +#35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C1092 +A load, executing on unit 0, missed the dcache +#36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C1096 +A load, executing on unit 1, missed the dcache +#37,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C1090 +A load executed on unit 0 +#38,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C1094 +A load executed on unit 1 +#39,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##83098 +A floating point load was executed from LSU unit 0 +#40,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##8309C +A floating point load was executed from LSU unit 1 +#41,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##11095,61095 +Flush initiated by LSU +#42,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C309F +The LMQ was full +#43,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C709D +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#44,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C309E +The first entry in the LMQ was allocated. +#45,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C309D +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#46,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##11092,61092 +The ISU sends this signal when the LRQ is full. +#47,u,g,n,n,PM_LSU_SRQ_EMPTY_CYC,Cycles SRQ empty +##00003 +The Store Request Queue is empty +#48,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##11093,61093 +The ISU sends this signal when the srq is full. +#49,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##8309D +This signal is asserted every cycle when a sync is in the SRQ. +#50,v,g,n,n,PM_MRK_CRU_FIN,Marked instruction CRU processing finished +##00005 +The Condition Register Unit finished a marked instruction. Instructions that finish may not necessary complete +#51,v,g,n,n,PM_MRK_GRP_CMPL,Marked group completed +##00004 +A group containing a sampled instruction completed. Microcoded instructions that span multiple groups will generate this event once per group. +#52,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C709C +The source information is valid and is for a marked load +#53,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##81092 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#54,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##81093 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#55,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##81090 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#56,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##81091 +A marked store was flushed from unit 0 because it was unaligned +#57,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##81096 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#58,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##81097 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#59,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##81094 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#60,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##81095 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#61,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C709E +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#62,v,g,n,n,PM_PMC3_OVERFLOW,PMC3 Overflow +##0000A +PMC3 Overflow +#63,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C1093 +A store missed the dcache +#64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C1091 +A store executed on unit 0 +#65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C1095 +A store executed on unit 1 +#66,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended + +$$$$$$$$ + +{ counter 5 } +#0,v,g,n,n,PM_1PLUS_PPC_CMPL,One or more PPC instruction completed +##00003 +A group containing at least one PPC instruction completed. For microcoded instructions that span multiple groups, this will only occur once. +#1,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##10095,60095 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#2,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##10094,60094 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#3,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#4,v,g,n,n,PM_DATA_FROM_L25_SHR,Data loaded from L2.5 shared +##C3087 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a demand load +#5,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##80097 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#6,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##80095 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#7,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##80094 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#8,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##10091,60091 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#9,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##00093 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#10,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##02098 +This signal is active for one cycle when one of the operands is denormalized. +#11,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##00090 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#12,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##00091 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##00092 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#14,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##10093,60093 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#15,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0209B +This signal is active for one cycle when fp0 is executing single precision instruction. +#16,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##02099 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#17,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0209A +This signal is active for one cycle when fp0 is executing a store instruction. +#18,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##00097 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#19,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0209C +This signal is active for one cycle when one of the operands is denormalized. +#20,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##00094 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#21,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##00095 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##00096 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#23,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##10097,60097 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#24,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0209F +This signal is active for one cycle when fp1 is executing single precision instruction. +#25,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0209D +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#26,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0209E +This signal is active for one cycle when fp1 is executing a store instruction. +#27,v,g,n,n,PM_FPU_ALL,FPU executed add, mult, sub, cmp or sel instruction +##00080 +This signal is active for one cycle when FPU is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo. Combined Unit 0 + Unit 1 +#28,v,g,n,n,PM_FPU_SINGLE,FPU executed single precision instruction +##02080 +FPU is executing single precision instruction. Combined Unit 0 + Unit 1 +#29,u,g,n,n,PM_FXU_IDLE,FXU idle +##00002 +FXU0 and FXU1 are both idle +#30,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##10090,60090 +The ISU sends a signal indicating the gct is full. +#31,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict +##1209F,6209F +Group experienced a branch mispredict +#32,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##1209E,6209E +Group experienced branch redirect +#33,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##1209C,6209C +A group that previously attempted dispatch was rejected. +#34,v,g,n,n,PM_GRP_DISP_SUCCESS,Group dispatch success +##00001 +Number of groups sucessfully dispatched (not rejected) +#35,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##1209B,6209B +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#36,v,g,n,n,PM_GRP_MRK,Group marked in IDU +##00004 +A group was sampled (marked) +#37,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##2209E +New line coming into the prefetch buffer +#38,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##2209D +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#39,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##2209F +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#40,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#41,v,g,n,n,PM_INST_DISP,Instructions dispatched +##12098,12099,1209A,62098,62099,6209A +The ISU sends the number of instructions dispatched. +#42,v,g,n,n,PM_INST_FROM_L25_SHR,Instruction fetched from L2.5 shared +##22086 +Instruction fetched from L2.5 shared +#43,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##80091 +A SLB miss for an instruction fetch as occurred +#44,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##80090 +A TLB miss for an Instruction Fetch has occurred +#45,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##8209F +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#46,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##10096,60096 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#47,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##80092 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#48,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C0092 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#49,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C0093 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#50,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C0090 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#51,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C0091 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#52,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C609B +LSU0 reject due to ERAT miss +#53,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C6099 +LSU0 reject due to LMQ full or missed data coming +#54,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C609A +LSU0 reject due to reload CDF or tag update collision +#55,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects +##C6098 +LSU0 SRQ rejects +#56,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C2098 +Data from a store instruction was forwarded to a load on unit 0 +#57,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##80096 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#58,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C0096 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#59,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C0097 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#60,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C0094 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#61,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C0095 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#62,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C609F +LSU1 reject due to ERAT miss +#63,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C609D +LSU1 reject due to LMQ full or missed data coming +#64,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C609E +LSU1 reject due to reload CDF or tag update collision +#65,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects +##C609C +LSU1 SRQ rejects +#66,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C209C +Data from a store instruction was forwarded to a load on unit 1 +#67,u,g,n,n,PM_LSU_FLUSH_SRQ,SRQ flushes +##C0080 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#68,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C209E +LRQ slot zero was allocated +#69,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C209A +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#70,v,g,n,n,PM_LSU_REJECT_ERAT_MISS,LSU reject due to ERAT miss +##C6080 +LSU reject due to ERAT miss +#71,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C209D +SRQ Slot zero was allocated +#72,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C2099 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#73,v,g,n,n,PM_MRK_DATA_FROM_L25_SHR,Marked data loaded from L2.5 shared +##C7087 +DL1 was reloaded with shared (T or SL) data from the L2 of a chip on this MCM due to a marked demand load +#74,v,g,n,n,PM_MRK_GRP_TIMEO,Marked group completion timeout +##00005 +The sampling timeout expired indicating that the previously sampled instruction is no longer in the processor +#75,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##8209A +A DL1 reload occured due to marked load +#76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##82098 +A marked load, executing on unit 0, missed the dcache +#77,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##8209C +A marked load, executing on unit 1, missed the dcache +#78,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##8209E +A marked stcx (stwcx or stdcx) failed +#79,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##8209B +A marked store missed the dcache +#80,v,g,n,n,PM_PMC4_OVERFLOW,PMC4 Overflow +##0000A +PMC4 Overflow +#81,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##80093 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#82,v,g,n,n,PM_STCX_FAIL,STCX failed +##82099 +A stcx (stwcx or stdcx) failed +#83,v,g,n,n,PM_STCX_PASS,Stcx passes +##8209D +A stcx (stwcx or stdcx) instruction was successful +#84,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C209B +A store missed the dcache +#85,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended +#86,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##10092,60092 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. + +$$$$$$$$ + +{ counter 6 } +#0,u,g,n,n,PM_BRQ_FULL_CYC,Cycles branch queue full +##10095,60095 +The ISU sends a signal indicating that the issue queue that feeds the ifu br unit cannot accept any more group (queue is full of groups). +#1,v,g,n,n,PM_CR_MAP_FULL_CYC,Cycles CR logical operation mapper full +##10094,60094 +The ISU sends a signal indicating that the cr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#2,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#3,v,g,n,n,PM_DATA_FROM_L25_MOD,Data loaded from L2.5 modified +##C3087 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a demand load +#4,v,g,n,n,PM_DATA_TABLEWALK_CYC,Cycles doing data tablewalks +##80097 +This signal is asserted every cycle when a tablewalk is active. While a tablewalk is active any request attempting to access the TLB will be rejected and retried. +#5,v,g,n,n,PM_DSLB_MISS,Data SLB misses +##80095 +A SLB miss for a data request occurred. SLB misses trap to the operating system to resolve +#6,v,g,n,n,PM_DTLB_MISS,Data TLB misses +##80094 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#7,v,g,n,n,PM_FPR_MAP_FULL_CYC,Cycles FPR mapper full +##10091,60091 +The ISU sends a signal indicating that the FPR mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#8,v,g,n,n,PM_FPU0_ALL,FPU0 executed add, mult, sub, cmp or sel instruction +##00093 +This signal is active for one cycle when fp0 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#9,v,g,n,n,PM_FPU0_DENORM,FPU0 received denormalized data +##02098 +This signal is active for one cycle when one of the operands is denormalized. +#10,v,g,n,n,PM_FPU0_FDIV,FPU0 executed FDIV instruction +##00090 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#11,v,g,n,n,PM_FPU0_FMA,FPU0 executed multiply-add instruction +##00091 +This signal is active for one cycle when fp0 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#12,v,g,n,n,PM_FPU0_FSQRT,FPU0 executed FSQRT instruction +##00092 +This signal is active for one cycle at the end of the microcode executed when fp0 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#13,v,g,n,n,PM_FPU0_FULL_CYC,Cycles FPU0 issue queue full +##10093,60093 +The issue queue for FPU unit 0 cannot accept any more instructions. Issue is stopped +#14,v,g,n,n,PM_FPU0_SINGLE,FPU0 executed single precision instruction +##0209B +This signal is active for one cycle when fp0 is executing single precision instruction. +#15,v,g,n,n,PM_FPU0_STALL3,FPU0 stalled in pipe3 +##02099 +This signal indicates that fp0 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#16,v,g,n,n,PM_FPU0_STF,FPU0 executed store instruction +##0209A +This signal is active for one cycle when fp0 is executing a store instruction. +#17,v,g,n,n,PM_FPU1_ALL,FPU1 executed add, mult, sub, cmp or sel instruction +##00097 +This signal is active for one cycle when fp1 is executing an add, mult, sub, compare, or fsel kind of instruction. This could be fadd*, fmul*, fsub*, fcmp**, fsel where XYZ* means XYZ, XYZs, XYZ., XYZs. and XYZ** means XYZu, XYZo +#18,v,g,n,n,PM_FPU1_DENORM,FPU1 received denormalized data +##0209C +This signal is active for one cycle when one of the operands is denormalized. +#19,v,g,n,n,PM_FPU1_FDIV,FPU1 executed FDIV instruction +##00094 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a divide instruction. This could be fdiv, fdivs, fdiv. fdivs. +#20,v,g,n,n,PM_FPU1_FMA,FPU1 executed multiply-add instruction +##00095 +This signal is active for one cycle when fp1 is executing multiply-add kind of instruction. This could be fmadd*, fnmadd*, fmsub*, fnmsub* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU1_FSQRT,FPU1 executed FSQRT instruction +##00096 +This signal is active for one cycle at the end of the microcode executed when fp1 is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU1_FULL_CYC,Cycles FPU1 issue queue full +##10097,60097 +The issue queue for FPU unit 1 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FPU1_SINGLE,FPU1 executed single precision instruction +##0209F +This signal is active for one cycle when fp1 is executing single precision instruction. +#24,v,g,n,n,PM_FPU1_STALL3,FPU1 stalled in pipe3 +##0209D +This signal indicates that fp1 has generated a stall in pipe3 due to overflow, underflow, massive cancel, convert to integer (sometimes), or convert from integer (always). This signal is active during the entire duration of the stall. +#25,v,g,n,n,PM_FPU1_STF,FPU1 executed store instruction +##0209E +This signal is active for one cycle when fp1 is executing a store instruction. +#26,v,g,n,n,PM_FPU_FSQRT,FPU executed FSQRT instruction +##00080 +This signal is active for one cycle at the end of the microcode executed when FPU is executing a square root instruction. This could be fsqrt* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#27,v,g,n,n,PM_FPU_STF,FPU executed store instruction +##02080 +FPU is executing a store instruction. Combined Unit 0 + Unit 1 +#28,u,g,n,n,PM_FXU_BUSY,FXU busy +##00002 +FXU0 and FXU1 are both busy +#29,v,g,n,n,PM_GCT_FULL_CYC,Cycles GCT full +##10090,60090 +The ISU sends a signal indicating the gct is full. +#30,v,g,n,n,PM_GRP_BR_MPRED,Group experienced a branch mispredict +##1209F,6209F +Group experienced a branch mispredict +#31,v,g,n,n,PM_GRP_BR_REDIR,Group experienced branch redirect +##1209E,6209E +Group experienced branch redirect +#32,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##1209C,6209C +A group that previously attempted dispatch was rejected. +#33,v,g,n,n,PM_GRP_DISP_VALID,Group dispatch valid +##1209B,6209B +Dispatch has been attempted for a valid group. Some groups may be rejected. The total number of successful dispatches is the number of dispatch valid minus dispatch reject. +#34,v,g,n,n,PM_IC_PREF_INSTALL,Instruction prefetched installed in prefetch +##2209E +New line coming into the prefetch buffer +#35,v,g,n,n,PM_IC_PREF_REQ,Instruction prefetch requests +##2209D +Asserted when a non-canceled prefetch is made to the cache interface unit (CIU). +#36,v,g,n,n,PM_IERAT_XLATE_WR,Translation written to ierat +##2209F +This signal will be asserted each time the I-ERAT is written. This indicates that an ERAT miss has been serviced. ERAT misses will initiate a sequence resulting in the ERAT being written. ERAT misses that are later ignored will not be counted unless the ERAT is written before the instruction stream is changed, This should be a fairly accurate count of ERAT missed (best available). +#37,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#38,v,g,n,n,PM_INST_DISP,Instructions dispatched +##12098,12099,1209A,62098,62099,6209A +The ISU sends the number of instructions dispatched. +#39,v,g,n,n,PM_INST_FROM_L25_MOD,Instruction fetched from L2.5 modified +##22086 +Instruction fetched from L2.5 modified +#40,u,g,n,n,PM_ISLB_MISS,Instruction SLB misses +##80091 +A SLB miss for an instruction fetch as occurred +#41,v,g,n,n,PM_ITLB_MISS,Instruction TLB misses +##80090 +A TLB miss for an Instruction Fetch has occurred +#42,v,g,n,n,PM_LARX_LSU0,Larx executed on LSU0 +##8209F +A larx (lwarx or ldarx) was executed on side 0 (there is no coresponding unit 1 event since larx instructions can only execute on unit 0) +#43,u,g,n,n,PM_LR_CTR_MAP_FULL_CYC,Cycles LR/CTR mapper full +##10096,60096 +The ISU sends a signal indicating that the lr/ctr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#44,v,g,n,n,PM_LSU0_DERAT_MISS,LSU0 DERAT misses +##80092 +A data request (load or store) from LSU Unit 0 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#45,v,g,n,n,PM_LSU0_FLUSH_LRQ,LSU0 LRQ flushes +##C0092 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#46,u,g,n,n,PM_LSU0_FLUSH_SRQ,LSU0 SRQ flushes +##C0093 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#47,v,g,n,n,PM_LSU0_FLUSH_ULD,LSU0 unaligned load flushes +##C0090 +A load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#48,v,g,n,n,PM_LSU0_FLUSH_UST,LSU0 unaligned store flushes +##C0091 +A store was flushed from unit 0 because it was unaligned (crossed a 4k boundary) +#49,v,g,n,n,PM_LSU0_REJECT_ERAT_MISS,LSU0 reject due to ERAT miss +##C609B +LSU0 reject due to ERAT miss +#50,v,g,n,n,PM_LSU0_REJECT_LMQ_FULL,LSU0 reject due to LMQ full or missed data coming +##C6099 +LSU0 reject due to LMQ full or missed data coming +#51,v,g,n,n,PM_LSU0_REJECT_RELOAD_CDF,LSU0 reject due to reload CDF or tag update collision +##C609A +LSU0 reject due to reload CDF or tag update collision +#52,v,g,n,n,PM_LSU0_REJECT_SRQ,LSU0 SRQ rejects +##C6098 +LSU0 SRQ rejects +#53,u,g,n,n,PM_LSU0_SRQ_STFWD,LSU0 SRQ store forwarded +##C2098 +Data from a store instruction was forwarded to a load on unit 0 +#54,v,g,n,n,PM_LSU1_DERAT_MISS,LSU1 DERAT misses +##80096 +A data request (load or store) from LSU Unit 1 missed the ERAT and resulted in an ERAT reload. Multiple instructions may miss the ERAT entry for the same 4K page, but only one reload will occur. +#55,v,g,n,n,PM_LSU1_FLUSH_LRQ,LSU1 LRQ flushes +##C0096 +A load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#56,u,g,n,n,PM_LSU1_FLUSH_SRQ,LSU1 SRQ flushes +##C0097 +A store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#57,v,g,n,n,PM_LSU1_FLUSH_ULD,LSU1 unaligned load flushes +##C0094 +A load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#58,u,g,n,n,PM_LSU1_FLUSH_UST,LSU1 unaligned store flushes +##C0095 +A store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#59,v,g,n,n,PM_LSU1_REJECT_ERAT_MISS,LSU1 reject due to ERAT miss +##C609F +LSU1 reject due to ERAT miss +#60,v,g,n,n,PM_LSU1_REJECT_LMQ_FULL,LSU1 reject due to LMQ full or missed data coming +##C609D +LSU1 reject due to LMQ full or missed data coming +#61,v,g,n,n,PM_LSU1_REJECT_RELOAD_CDF,LSU1 reject due to reload CDF or tag update collision +##C609E +LSU1 reject due to reload CDF or tag update collision +#62,v,g,n,n,PM_LSU1_REJECT_SRQ,LSU1 SRQ rejects +##C609C +LSU1 SRQ rejects +#63,u,g,n,n,PM_LSU1_SRQ_STFWD,LSU1 SRQ store forwarded +##C209C +Data from a store instruction was forwarded to a load on unit 1 +#64,v,g,n,n,PM_LSU_DERAT_MISS,DERAT misses +##80080 +Total D-ERAT Misses (Unit 0 + Unit 1). Requests that miss the Derat are rejected and retried until the request hits in the Erat. This may result in multiple erat misses for the same instruction. +#65,v,g,n,n,PM_LSU_FLUSH_LRQ,LRQ flushes +##C0080 +A load was flushed because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#66,v,g,n,n,PM_LSU_LRQ_S0_ALLOC,LRQ slot 0 allocated +##C209E +LRQ slot zero was allocated +#67,v,g,n,n,PM_LSU_LRQ_S0_VALID,LRQ slot 0 valid +##C209A +This signal is asserted every cycle that the Load Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#68,v,g,n,n,PM_LSU_REJECT_RELOAD_CDF,LSU reject due to reload CDF or tag update collision +##C6080 +LSU reject due to reload CDF or tag update collision +#69,v,g,n,n,PM_LSU_SRQ_S0_ALLOC,SRQ slot 0 allocated +##C209D +SRQ Slot zero was allocated +#70,v,g,n,n,PM_LSU_SRQ_S0_VALID,SRQ slot 0 valid +##C2099 +This signal is asserted every cycle that the Store Request Queue slot zero is valid. The SRQ is 32 entries long and is allocated round-robin. +#71,v,g,n,n,PM_MRK_DATA_FROM_L25_MOD,Marked data loaded from L2.5 modified +##C7087 +DL1 was reloaded with modified (M) data from the L2 of a chip on this MCM due to a marked demand load +#72,v,g,n,n,PM_MRK_FXU_FIN,Marked instruction FXU processing finished +##00004 +Marked instruction FXU processing finished +#73,v,g,n,n,PM_MRK_GRP_ISSUED,Marked group issued +##00005 +A sampled instruction was issued +#74,v,g,n,n,PM_MRK_IMR_RELOAD,Marked IMR reloaded +##8209A +A DL1 reload occured due to marked load +#75,v,g,n,n,PM_MRK_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##82098 +A marked load, executing on unit 0, missed the dcache +#76,v,g,n,n,PM_MRK_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##8209C +A marked load, executing on unit 1, missed the dcache +#77,v,g,n,n,PM_MRK_STCX_FAIL,Marked STCX failed +##8209E +A marked stcx (stwcx or stdcx) failed +#78,v,g,n,n,PM_MRK_ST_GPS,Marked store sent to GPS +##00003 +A sampled store has been sent to the memory subsystem +#79,v,g,n,n,PM_MRK_ST_MISS_L1,Marked L1 D cache store misses +##8209B +A marked store missed the dcache +#80,v,g,n,n,PM_PMC5_OVERFLOW,PMC5 Overflow +##0000A +PMC5 Overflow +#81,u,g,n,n,PM_SNOOP_TLBIE,Snoop TLBIE +##80093 +A TLB miss for a data request occurred. Requests that miss the TLB may be retried until the instruction is in the next to complete group (unless HID4 is set to allow speculative tablewalks). This may result in multiple TLB misses for the same instruction. +#82,v,g,n,n,PM_STCX_FAIL,STCX failed +##82099 +A stcx (stwcx or stdcx) failed +#83,v,g,n,n,PM_STCX_PASS,Stcx passes +##8209D +A stcx (stwcx or stdcx) instruction was successful +#84,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C209B +A store missed the dcache +#85,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended +#86,v,g,n,n,PM_XER_MAP_FULL_CYC,Cycles XER mapper full +##10092,60092 +The ISU sends a signal indicating that the xer mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. + +$$$$$$$$ + +{ counter 7 } +#0,v,g,n,n,PM_BR_ISSUED,Branches issued +##23098 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##23099 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##2309A +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##11091,61091 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#4,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#5,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C1097 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#6,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams +##8309A +out of streams +#7,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##8309F +A new Prefetch Stream was allocated +#8,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##1309B,6309B +The number of Cycles MSR(EE) bit was off. +#9,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##1309F,6309F +Cycles MSR(EE) bit off and external interrupt pending +#10,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##11096,61096 +Flush caused by branch mispredict +#11,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict +##11097,61097 +Flush caused by LSU or branch mispredict +#12,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##01092 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#13,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##01093 +fp0 finished, produced a result This only indicates finish, not completion. +#14,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##01090 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#15,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##03098 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#16,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##01091 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#17,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##01096 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#18,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##01097 +fp1 finished, produced a result. This only indicates finish, not completion. +#19,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##01094 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#20,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##01095 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#21,v,g,n,n,PM_FPU_FRSP_FCONV,FPU executed FRSP or FCONV instructions +##01080 +This signal is active for one cycle when executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. Combined Unit 0 + Unit 1 +#22,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##11090,61090 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#23,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##11094,61094 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#24,u,g,n,n,PM_FXU0_BUSY_FXU1_IDLE,FXU0 busy FXU1 idle +##00002 +FXU0 is busy while FXU1 was idle +#25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##1309A,6309A +The Fixed Point unit 0 finished an instruction and produced a result +#26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##1309E,6309E +The Fixed Point unit 1 finished an instruction and produced a result +#27,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##1309D,6309D +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#28,v,g,n,n,PM_GRP_CMPL,Group completed +##00003 +A group completed. Microcoded instructions that span multiple groups will generate this event once per group. +#29,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##13099,63099 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#30,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C309C +The data source information is valid +#32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##83099 +A request to prefetch data into the L1 was made +#33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##2309B +This signal is asserted each cycle a cache write is active. +#34,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##8309B +A request to prefetch data into L2 was made +#35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C1092 +A load, executing on unit 0, missed the dcache +#36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C1096 +A load, executing on unit 1, missed the dcache +#37,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C1090 +A load executed on unit 0 +#38,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C1094 +A load executed on unit 1 +#39,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##83098 +A floating point load was executed from LSU unit 0 +#40,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##8309C +A floating point load was executed from LSU unit 1 +#41,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##11095,61095 +Flush initiated by LSU +#42,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C309F +The LMQ was full +#43,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C709D +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#44,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C309E +The first entry in the LMQ was allocated. +#45,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C309D +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#46,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##11092,61092 +The ISU sends this signal when the LRQ is full. +#47,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##11093,61093 +The ISU sends this signal when the srq is full. +#48,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##8309D +This signal is asserted every cycle when a sync is in the SRQ. +#49,v,g,n,n,PM_MRK_FPU_FIN,Marked instruction FPU processing finished +##00004 +One of the Floating Point Units finished a marked instruction. Instructions that finish may not necessary complete +#50,v,g,n,n,PM_MRK_INST_FIN,Marked instruction finished +##00005 +One of the execution units finished a marked instruction. Instructions that finish may not necessary complete +#51,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C709C +The source information is valid and is for a marked load +#52,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##81092 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#53,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##81093 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#54,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##81090 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#55,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##81091 +A marked store was flushed from unit 0 because it was unaligned +#56,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##81096 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#57,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##81097 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#58,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##81094 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#59,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##81095 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#60,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C709E +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#61,v,g,n,n,PM_PMC6_OVERFLOW,PMC6 Overflow +##0000A +PMC6 Overflow +#62,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C1093 +A store missed the dcache +#63,v,g,n,n,PM_ST_REF_L1,L1 D cache store references +##C1080 +Total DL1 Store references +#64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C1091 +A store executed on unit 0 +#65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C1095 +A store executed on unit 1 +#66,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended + +$$$$$$$$ + +{ counter 8 } +#0,v,g,n,n,PM_BR_ISSUED,Branches issued +##23098 +This signal will be asserted each time the ISU issues a branch instruction. This signal will be asserted each time the ISU selects a branch instruction to issue. +#1,v,g,n,n,PM_BR_MPRED_CR,Branch mispredictions due to CR bit setting +##23099 +This signal is asserted when the branch execution unit detects a branch mispredict because the CR value is opposite of the predicted value. This signal is asserted after a branch issue event and will result in a branch redirect flush if not overridden by a flush of an older instruction. +#2,v,g,n,n,PM_BR_MPRED_TA,Branch mispredictions due to target address +##2309A +branch miss predict due to a target address prediction. This signal will be asserted each time the branch execution unit detects an incorrect target address prediction. This signal will be asserted after a valid branch execution unit issue and will cause a branch mispredict flush unless a flush is detected from an older instruction. +#3,u,g,n,n,PM_CRQ_FULL_CYC,Cycles CR issue queue full +##11091,61091 +The ISU sends a signal indicating that the issue queue that feeds the ifu cr unit cannot accept any more group (queue is full of groups). +#4,v,g,n,n,PM_CYC,Processor cycles +##0000F +Processor cycles +#5,u,g,n,n,PM_DC_INV_L2,L1 D cache entries invalidated from L2 +##C1097 +A dcache invalidated was received from the L2 because a line in L2 was castout. +#6,u,g,n,n,PM_DC_PREF_OUT_OF_STREAMS,D cache out of streams +##8309A +out of streams +#7,v,g,n,n,PM_DC_PREF_STREAM_ALLOC,D cache new prefetch stream allocated +##8309F +A new Prefetch Stream was allocated +#8,v,g,n,n,PM_EE_OFF,Cycles MSR(EE) bit off +##1309B,6309B +The number of Cycles MSR(EE) bit was off. +#9,u,g,n,n,PM_EE_OFF_EXT_INT,Cycles MSR(EE) bit off and external interrupt pending +##1309F,6309F +Cycles MSR(EE) bit off and external interrupt pending +#10,v,g,n,n,PM_EXT_INT,External interrupts +##00002 +An external interrupt occurred +#11,v,g,n,n,PM_FLUSH_BR_MPRED,Flush caused by branch mispredict +##11096,61096 +Flush caused by branch mispredict +#12,v,g,n,n,PM_FLUSH_LSU_BR_MPRED,Flush caused by LSU or branch mispredict +##11097,61097 +Flush caused by LSU or branch mispredict +#13,v,g,n,n,PM_FPU0_FEST,FPU0 executed FEST instruction +##01092 +This signal is active for one cycle when fp0 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#14,v,g,n,n,PM_FPU0_FIN,FPU0 produced a result +##01093 +fp0 finished, produced a result This only indicates finish, not completion. +#15,v,g,n,n,PM_FPU0_FMOV_FEST,FPU0 executed FMOV or FEST instructions +##01090 +This signal is active for one cycle when fp0 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#16,v,g,n,n,PM_FPU0_FPSCR,FPU0 executed FPSCR instruction +##03098 +This signal is active for one cycle when fp0 is executing fpscr move related instruction. This could be mtfsfi*, mtfsb0*, mtfsb1*. mffs*, mtfsf*, mcrsf* where XYZ* means XYZ, XYZs, XYZ., XYZs +#17,v,g,n,n,PM_FPU0_FRSP_FCONV,FPU0 executed FRSP or FCONV instructions +##01091 +This signal is active for one cycle when fp0 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#18,v,g,n,n,PM_FPU1_FEST,FPU1 executed FEST instruction +##01096 +This signal is active for one cycle when fp1 is executing one of the estimate instructions. This could be fres* or frsqrte* where XYZ* means XYZ or XYZ. +#19,v,g,n,n,PM_FPU1_FIN,FPU1 produced a result +##01097 +fp1 finished, produced a result. This only indicates finish, not completion. +#20,v,g,n,n,PM_FPU1_FMOV_FEST,FPU1 executing FMOV or FEST instructions +##01094 +This signal is active for one cycle when fp1 is executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ +#21,v,g,n,n,PM_FPU1_FRSP_FCONV,FPU1 executed FRSP or FCONV instructions +##01095 +This signal is active for one cycle when fp1 is executing frsp or convert kind of instruction. This could be frsp*, fcfid*, fcti* where XYZ* means XYZ, XYZs, XYZ., XYZs. +#22,v,g,n,n,PM_FPU_FMOV_FEST,FPU executing FMOV or FEST instructions +##01080 +This signal is active for one cycle when executing a move kind of instruction or one of the estimate instructions.. This could be fmr*, fneg*, fabs*, fnabs* , fres* or frsqrte* where XYZ* means XYZ or XYZ . Combined Unit 0 + Unit 1 +#23,v,g,n,n,PM_FXLS0_FULL_CYC,Cycles FXU0/LS0 queue full +##11090,61090 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#24,v,g,n,n,PM_FXLS1_FULL_CYC,Cycles FXU1/LS1 queue full +##11094,61094 +The issue queue for FXU/LSU unit 0 cannot accept any more instructions. Issue is stopped +#25,v,g,n,n,PM_FXU0_FIN,FXU0 produced a result +##1309A,6309A +The Fixed Point unit 0 finished an instruction and produced a result +#26,v,g,n,n,PM_FXU1_FIN,FXU1 produced a result +##1309E,6309E +The Fixed Point unit 1 finished an instruction and produced a result +#27,v,g,n,n,PM_GPR_MAP_FULL_CYC,Cycles GPR mapper full +##1309D,6309D +The ISU sends a signal indicating that the gpr mapper cannot accept any more groups. Dispatch is stopped. Note: this condition indicates that a pool of mapper is full but the entire mapper may not be. +#28,v,g,n,n,PM_GRP_DISP_BLK_SB_CYC,Cycles group dispatch blocked by scoreboard +##13099,63099 +The ISU sends a signal indicating that dispatch is blocked by scoreboard. +#29,v,g,n,n,PM_GRP_DISP_REJECT,Group dispatch rejected +##00003 +A group that previously attempted dispatch was rejected. +#30,c,g,n,n,PM_INST_CMPL,Instructions completed +##00009 +Number of Eligible Instructions that completed. +#31,v,g,n,n,PM_L1_DCACHE_RELOAD_VALID,L1 reload data source valid +##C309C +The data source information is valid +#32,v,g,n,n,PM_L1_PREF,L1 cache data prefetches +##83099 +A request to prefetch data into the L1 was made +#33,v,g,n,n,PM_L1_WRITE_CYC,Cycles writing to instruction L1 +##2309B +This signal is asserted each cycle a cache write is active. +#34,v,g,n,n,PM_L2_PREF,L2 cache prefetches +##8309B +A request to prefetch data into L2 was made +#35,v,g,n,n,PM_LD_MISS_L1_LSU0,LSU0 L1 D cache load misses +##C1092 +A load, executing on unit 0, missed the dcache +#36,v,g,n,n,PM_LD_MISS_L1_LSU1,LSU1 L1 D cache load misses +##C1096 +A load, executing on unit 1, missed the dcache +#37,v,g,n,n,PM_LD_REF_L1,L1 D cache load references +##C1080 +Total DL1 Load references +#38,v,g,n,n,PM_LD_REF_L1_LSU0,LSU0 L1 D cache load references +##C1090 +A load executed on unit 0 +#39,v,g,n,n,PM_LD_REF_L1_LSU1,LSU1 L1 D cache load references +##C1094 +A load executed on unit 1 +#40,v,g,n,n,PM_LSU0_LDF,LSU0 executed Floating Point load instruction +##83098 +A floating point load was executed from LSU unit 0 +#41,v,g,n,n,PM_LSU1_LDF,LSU1 executed Floating Point load instruction +##8309C +A floating point load was executed from LSU unit 1 +#42,v,g,n,n,PM_LSU_FLUSH,Flush initiated by LSU +##11095,61095 +Flush initiated by LSU +#43,v,g,n,n,PM_LSU_LDF,LSU executed Floating Point load instruction +##83080 +LSU executed Floating Point load instruction +#44,u,g,n,n,PM_LSU_LMQ_FULL_CYC,Cycles LMQ full +##C309F +The LMQ was full +#45,v,g,n,n,PM_LSU_LMQ_LHR_MERGE,LMQ LHR merges +##C709D +A dcache miss occured for the same real cache line address as an earlier request already in the Load Miss Queue and was merged into the LMQ entry. +#46,v,g,n,n,PM_LSU_LMQ_S0_ALLOC,LMQ slot 0 allocated +##C309E +The first entry in the LMQ was allocated. +#47,v,g,n,n,PM_LSU_LMQ_S0_VALID,LMQ slot 0 valid +##C309D +This signal is asserted every cycle when the first entry in the LMQ is valid. The LMQ had eight entries that are allocated FIFO +#48,v,g,n,n,PM_LSU_LRQ_FULL_CYC,Cycles LRQ full +##11092,61092 +The ISU sends this signal when the LRQ is full. +#49,v,g,n,n,PM_LSU_SRQ_FULL_CYC,Cycles SRQ full +##11093,61093 +The ISU sends this signal when the srq is full. +#50,u,g,n,n,PM_LSU_SRQ_SYNC_CYC,SRQ sync duration +##8309D +This signal is asserted every cycle when a sync is in the SRQ. +#51,v,g,n,n,PM_MRK_L1_RELOAD_VALID,Marked L1 reload data source valid +##C709C +The source information is valid and is for a marked load +#52,v,g,n,n,PM_MRK_LSU0_FLUSH_LRQ,LSU0 marked LRQ flushes +##81092 +A marked load was flushed by unit 0 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#53,u,g,n,n,PM_MRK_LSU0_FLUSH_SRQ,LSU0 marked SRQ flushes +##81093 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#54,v,g,n,n,PM_MRK_LSU0_FLUSH_ULD,LSU0 marked unaligned load flushes +##81090 +A marked load was flushed from unit 0 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#55,v,g,n,n,PM_MRK_LSU0_FLUSH_UST,LSU0 marked unaligned store flushes +##81091 +A marked store was flushed from unit 0 because it was unaligned +#56,v,g,n,n,PM_MRK_LSU1_FLUSH_LRQ,LSU1 marked LRQ flushes +##81096 +A marked load was flushed by unit 1 because a younger load executed before an older store executed and they had overlapping data OR two loads executed out of order and they have byte overlap and there was a snoop in between to an overlapped byte. +#57,u,g,n,n,PM_MRK_LSU1_FLUSH_SRQ,LSU1 marked SRQ flushes +##81097 +A marked store was flushed because younger load hits and older store that is already in the SRQ or in the same group. +#58,v,g,n,n,PM_MRK_LSU1_FLUSH_ULD,LSU1 marked unaligned load flushes +##81094 +A marked load was flushed from unit 1 because it was unaligned (crossed a 64byte boundary, or 32 byte if it missed the L1) +#59,u,g,n,n,PM_MRK_LSU1_FLUSH_UST,LSU1 marked unaligned store flushes +##81095 +A marked store was flushed from unit 1 because it was unaligned (crossed a 4k boundary) +#60,c,g,n,n,PM_MRK_LSU_FIN,Marked instruction LSU processing finished +##00004 +One of the Load/Store Units finished a marked instruction. Instructions that finish may not necessary complete +#61,u,g,n,n,PM_MRK_LSU_SRQ_INST_VALID,Marked instruction valid in SRQ +##C709E +This signal is asserted every cycle when a marked request is resident in the Store Request Queue +#62,v,g,n,n,PM_PMC7_OVERFLOW,PMC7 Overflow +##0000A +PMC7 Overflow +#63,v,g,n,n,PM_ST_MISS_L1,L1 D cache store misses +##C1093 +A store missed the dcache +#64,v,g,n,n,PM_ST_REF_L1_LSU0,LSU0 L1 D cache store references +##C1091 +A store executed on unit 0 +#65,v,g,n,n,PM_ST_REF_L1_LSU1,LSU1 L1 D cache store references +##C1095 +A store executed on unit 1 +#66,v,g,n,n,PM_SUSPENDED,Suspended +##00008 +Suspended +#67,u,g,n,n,PM_TB_BIT_TRANS,Time Base bit transition +##00005 +When the selected time base bit (as specified in MMCR0[TBSEL])transitions from 0 to 1 diff --git a/src/event_data/ppc970/groups b/src/event_data/ppc970/groups new file mode 100644 index 0000000..c285c6f --- /dev/null +++ b/src/event_data/ppc970/groups @@ -0,0 +1,227 @@ +{ **************************** +{ THIS IS OPEN SOURCE CODE +{ **************************** +{ (C) COPYRIGHT International Business Machines Corp. 2005 +{ This file is licensed under the University of Tennessee license. +{ See LICENSE.txt. +{ +{ File: events/ppc970/groups +{ Author: Maynard Johnson +{ maynardj@us.ibm.com +{ Mods: +{ + +{ Number of groups + 42 + +{ Group descriptions + +#0,82,2,67,30,0,2,28,29,pm_slice0,Time Slice 0 +##00005,0000F,00001,00009,00003,0000F,00003,00003 +0000051E,00000000,0A46F18C,00002000 +Time Slice 0 + +#1,2,2,37,6,41,37,63,37,pm_eprof,Group for use with eprof +##0000F,0000F,C1080,C1097,12098,00009,C1080,C1080 +00000F1E,40030010,05F09000,00002000 +Group for use with eprof + +#2,37,2,37,6,41,37,63,37,pm_basic,Basic performance indicators +##00009,0000F,C1080,C1097,12098,00009,C1080,C1080 +0000091E,40030010,05F09000,00002000 +Basic performance indicators + +#3,65,64,4,30,67,65,63,37,pm_lsu,Information on the Load Store Unit +##C0080,C0080,0000F,00009,C0080,C0080,C1080,C1080 +00000000,000F0000,7A400000,00002000 +Information on the Load Store Unit + +#4,27,25,22,22,3,26,30,22,pm_fpu1,Floating Point events +##00080,00080,01080,01080,0000F,00080,00009,01080 +00000000,00000000,001E0480,00002000 +Floating Point events + +#5,26,26,4,30,27,27,21,43,pm_fpu2,Floating Point events +##02080,02080,0000F,00009,00080,02080,01080,83080 +00000000,000020E8,7A400000,00002000 +Floating Point events + +#6,88,1,3,29,46,38,30,4,pm_isu_rename,ISU Rename Pool Events +##10092,10094,11091,13099,10096,12098,00009,0000F +00001228,40000021,8E6D84BC,00002000 +ISU Rename Pool Events + +#7,13,21,23,24,3,37,46,49,pm_isu_queues1,ISU Rename Pool Events +##10093,10097,11090,11094,0000F,00009,11092,11093 +0000132E,40000000,851E994C,00002000 +ISU Rename Pool Events + +#8,38,2,25,27,35,32,30,4,pm_isu_flow,ISU Instruction Flow Events +##12098,0000F,1309A,1309E,1209B,1209C,00009,0000F +0000181E,400000B3,D7B7C4BC,00002000 +ISU Instruction Flow Events + +#9,28,84,67,10,3,37,8,10,pm_isu_work,ISU Indicators of Work Blockage +##00004,00001,00001,1309F,0000F,00009,1309B,00002 +00000402,40000005,0FDE9D88,00002000 +ISU Indicators of Work Blockage + +#10,10,18,17,21,12,20,30,4,pm_fpu3,Floating Point events by unit +##00090,00094,01091,01095,00091,00095,00009,0000F +00001028,00000000,8D6354BC,00002000 +Floating Point events by unit + +#11,12,20,14,19,9,17,30,4,pm_fpu4,Floating Point events by unit +##00092,00096,01093,01097,00093,00097,00009,0000F +0000122C,00000000,9DE774BC,00002000 +Floating Point events by unit + +#12,9,17,15,20,3,37,12,18,pm_fpu5,Floating Point events by unit +##02098,0209C,01090,01094,0000F,00009,01092,01096 +00001838,000000C0,851E9958,00002000 +Floating Point events by unit + +#13,15,23,14,19,3,37,4,16,pm_fpu7,Floating Point events by unit +##02099,0209D,01093,01097,0000F,00009,0000F,03098 +0000193A,000000C8,9DDE97E0,00002000 +Floating Point events by unit + +#14,46,55,4,5,49,56,30,4,pm_lsu_flush,LSU Flush Events +##C0092,C0096,0000F,0000F,C0093,C0097,00009,0000F +0000122C,000C0000,7BE774BC,00002000 +LSU Flush Events + +#15,48,57,40,38,3,37,35,36,pm_lsu_load1,LSU Load Events +##C0090,C0094,C1090,C1094,0000F,00009,C1092,C1096 +00001028,000F0000,851E9958,00002000 +LSU Load Events + +#16,49,58,69,65,3,37,62,5,pm_lsu_store1,LSU Store Events +##C0091,C0095,C1091,C1095,0000F,00009,C1093,C1097 +0000112A,000F0000,8D5E99DC,00002000 +LSU Store Events + +#17,54,63,69,65,84,2,30,4,pm_lsu_store2,LSU Store Events +##C2098,C209C,C1091,C1095,C209B,0000F,00009,0000F +00001838,0003C0D0,8D76F4BC,00002000 +LSU Store Events + +#18,45,54,4,5,40,2,31,4,pm_lsu7,Information on the Load Store Unit +##80092,80096,0000F,0000F,00009,0000F,C309C,0000F +0000122C,00083004,7BD2FE3C,00002000 +Information on the Load Store Unit + +#19,28,65,30,5,0,37,28,67,pm_misc,Misc Events for testing +##00004,00002,00004,0000F,00003,00009,00003,00005 +00000404,00000000,23C69194,00002000 +Misc Events for testing + +#20,27,25,27,22,3,26,30,22,pm_pe_bench1,PE Benchmarker group for FP analysis +##00080,00080,63080,01080,0000F,00080,00009,01080 +00000000,10001002,001E0480,00002000 +PE Benchmarker group for FP analysis + +#21,6,41,37,63,3,37,63,37,pm_pe_bench4,PE Benchmarker group for L1 and TLB +##80094,80090,C1080,C1093,0000F,00009,C1080,C1080 +00001420,000B0000,04DE9000,00002000 +PE Benchmarker group for L1 and TLB + +#22,6,65,37,63,3,37,63,37,pm_hpmcount1,Hpmcount group for L1 and TLB behavior +##80094,00002,C1080,C1093,0000F,00009,C1080,C1080 +00001404,000B0000,04DE9000,00002000 +Hpmcount group for L1 and TLB behavior + +#23,27,25,14,19,3,27,30,43,pm_hpmcount2,Hpmcount group for computation +##00080,00080,01093,01097,0000F,02080,00009,83080 +00000000,00002028,9DDE0480,00002000 +Hpmcount group for computation + +#24,37,2,37,1,84,2,1,2,pm_l1andbr,L1 misses and branch misspredict analysis +##00009,0000F,C1080,23098,C209B,0000F,23099,2309A +0000091E,8003C01D,0636FCE8,00002000 +L1 misses and branch misspredict analysis + +#25,37,2,37,1,3,84,63,37,pm_imix,Instruction mix: loads, stores and branches +##00009,0000F,C1080,23098,0000F,C209B,C1080,C1080 +0000091E,8003C021,061FB000,00002000 +Instruction mix: loads, stores and branches + +#26,82,4,0,2,43,2,30,2,pm_branch,SLB and branch misspredict analysis +##00005,80095,23098,23099,80091,0000F,00009,2309A +0000052A,8008000B,C662F4E8,00002000 +SLB and branch misspredict analysis + +#27,3,37,5,5,4,3,44,47,pm_data,data source and LMQ +##C3087,00009,C3087,0000F,C3087,C3087,C309E,C309D +00000712,0000300E,3BCE7F74,00002000 +data source and LMQ + +#28,6,41,31,5,68,67,32,34,pm_tlb,TLB and LRQ plus data prefetch +##80094,80090,00009,0000F,C209E,C209A,83099,8309B +00001420,0008E03C,4BFDACEC,00002000 +TLB and LRQ plus data prefetch + +#29,40,39,30,30,5,2,28,5,pm_isource,inst source and tablewalk +##22086,22086,00004,00009,80097,0000F,00003,C1097 +0000060C,800B00C0,226EF1DC,00002000 +inst source and tablewalk + +#30,69,70,37,49,40,37,4,37,pm_sync,Sync and SRQ +##C209D,C2099,C1080,8309D,00009,00009,0000F,C1080 +00001D32,0003E0C1,07529780,00002000 +Sync and SRQ + +#31,39,36,31,5,40,2,30,4,pm_ierat,IERAT +##2208D,2209F,00009,0000F,00009,0000F,00009,0000F +00000D3E,800000C0,4BD2F4BC,00002000 +IERAT + +#32,28,33,33,30,41,64,63,4,pm_derat,DERAT +##00004,6209B,C309C,00009,6209A,80080,C1080,0000F +00000436,100B7052,E274003C,00002000 +DERAT + +#33,75,83,4,51,36,73,50,30,pm_mark1,Information on marked instructions +##82080,00003,0000F,00004,00004,00005,00005,00009 +00000006,00008080,790852A4,00002001 +Information on marked instructions + +#34,73,71,4,50,36,72,49,60,pm_mark2,Marked Instructions Processing Flow +##00002,00005,0000F,00005,00004,00004,00004,00004 +0000020A,00000000,79484210,00002001 +Marked Instructions Processing Flow + +#35,79,2,64,51,74,78,60,30,pm_mark3,Marked Stores Processing Flow +##00003,0000F,00003,00004,00005,00003,C709E,00009 +0000031E,00203004,190A3F24,00002001 +Marked Stores Processing Flow + +#36,80,72,58,60,3,37,54,58,pm_lsu_mark1,Load Store Unit Marked Events +##8209B,8209A,81091,81095,0000F,00009,81090,81094 +00001B34,000280C0,8D5E9850,00002001 +Load Store Unit Marked Events + +#37,76,74,55,57,3,37,53,57,pm_lsu_mark2,Load Store Unit Marked Events +##82098,8209C,81092,81096,0000F,00009,81093,81097 +00001838,000280C0,959E99DC,00002001 +Load Store Unit Marked Events + +#38,37,37,27,26,29,28,24,4,pm_fxu1,Fixed Point events by unit +##00009,00009,63080,00002,00002,00002,00002,0000F +00000912,10001002,0084213C,00002000 +Fixed Point events by unit + +#39,37,2,24,23,29,28,25,26,pm_fxu2,Fixed Point events by unit +##00009,0000F,11094,11090,00002,00002,1309A,1309E +0000091E,4000000C,A4042D78,00002000 +Fixed Point events by unit + +#40,39,39,32,0,40,2,4,30,pm_ifu,Instruction Fetch Unit events +##2208D,22086,2208D,2208D,00009,0000F,0000F,00009 +00000D0C,800000C0,6B52F7A4,00002000 +Instruction Fetch Unit events + +#41,40,39,32,0,42,39,4,30,pm_L1_icm, Level 1 instruction cache misses +##22086,22086,2208D,2208D,22086,22086,0000F,00009 +0000060C,800000F0,6B4C67A4,00002000 +Level 1 instruction cache misses diff --git a/src/examples/Makefile b/src/examples/Makefile new file mode 100644 index 0000000..c1502ae --- /dev/null +++ b/src/examples/Makefile @@ -0,0 +1,47 @@ +PAPIINC = .. +PAPILIB = ../libpapi.a +CC = gcc +CFLAGS += -I$(PAPIINC) +OS = $(shell uname) + +TARGETS_NTHD = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events + +TARGETS_PTHREAD = locks_pthreads overflow_pthreads + +ifeq ($(OS), SunOS) + LDFLAGS = $(PAPILIB) -lcpc + LDFLAGS_PTHREAD = $(PAPILIB) -lpthread -lcpc + TARGETS = $(TARGETS_NTHD) $(TARGETS_PTHREAD) +else + ifeq ($(OS), AIX) + CC = xlc + LDFLAGS = $(PAPILIB) -lpmapi + LDFLAGS_PTHREAD = $(PAPILIB) -lpthread -lpmapi + TARGETS = $(TARGETS_NTHD) $(TARGETS_PTHREAD) + else + ifeq ($(OS), OSF1) + LDFLAGS = $(PAPILIB) -lrt + LDFLAGS_PTHREAD = $(PAPILIB) -lpthread -lrt + TARGETS = $(TARGETS_NTHD) + else + ifeq ($(OS), Linux) + TARGETS = $(TARGETS_NTHD) $(TARGETS_PTHREAD) + else + TARGETS = $(TARGETS_NTHD) + endif + LDFLAGS = $(PAPILIB) + LDFLAGS_PTHREAD = $(PAPILIB) -lpthread + endif + endif +endif + +all: $(TARGETS) + +$(TARGETS_NTHD): %:%.o + $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) + +$(TARGETS_PTHREAD): %:%.o + $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS_PTHREAD) +clean: + $(RM) *.o $(TARGETS) + diff --git a/src/examples/Makefile.AIX b/src/examples/Makefile.AIX new file mode 100644 index 0000000..0499f35 --- /dev/null +++ b/src/examples/Makefile.AIX @@ -0,0 +1,21 @@ +PAPIINC = .. +PAPILIB = ../libpapi.a +CC = xlc +CFLAGS = -I$(PAPIINC) +LDFLAGS = $(PAPILIB) -lpmapi +LDFLAGS_PTHREAD = $(PAPILIB) -lpthread -lpmapi + +TARGETS = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events + +TARGETS_PTHREAD = locks_pthreads overflow_pthreads + +all: $(TARGETS) $(TARGETS_PTHREAD) + +$(TARGETS): $$@.c + $(CC) $? -o $@ $(CFLAGS) $(LDFLAGS) + +$(TARGETS_PTHREAD): $$@.c + $(CC) $? -o $@ $(CFLAGS) $(LDFLAGS_PTHREAD) +clean: + rm -f *.o $(TARGETS) $(TARGETS_PTHREAD) + diff --git a/src/examples/Makefile.IRIX64 b/src/examples/Makefile.IRIX64 new file mode 100644 index 0000000..05cbdb7 --- /dev/null +++ b/src/examples/Makefile.IRIX64 @@ -0,0 +1,17 @@ +PAPIINC = .. +PAPILIB = ../libpapi.a +CC = gcc +CFLAGS = -I$(PAPIINC) +LDFLAGS = $(PAPILIB) + +TARGETS = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events + + +all: $(TARGETS) + +$(TARGETS): $$@.c + $(CC) $? -o $@ $(CFLAGS) $(LDFLAGS) + +clean: + rm -f *.o $(TARGETS) + diff --git a/src/examples/Makefile.OSF1 b/src/examples/Makefile.OSF1 new file mode 100644 index 0000000..323a5c8 --- /dev/null +++ b/src/examples/Makefile.OSF1 @@ -0,0 +1,16 @@ +PAPIINC = .. +PAPILIB = ../libpapi.a +CC = gcc +CFLAGS = -I$(PAPIINC) +LDFLAGS = $(PAPILIB) -lrt + +TARGETS = PAPI_set_domain sprofile multiplex PAPI_state PAPI_reset PAPI_profil PAPI_perror PAPI_get_virt_cyc PAPI_get_real_cyc PAPI_get_opt PAPI_hw_info PAPI_get_executable_info PAPI_ipc PAPI_flops PAPI_flips PAPI_overflow PAPI_add_remove_event high_level PAPI_add_remove_events + +all: $(TARGETS) + +$(TARGETS): $$@.c + $(CC) $? -o $@ $(CFLAGS) $(LDFLAGS) + +clean: + rm -f *.o $(TARGETS) + diff --git a/src/examples/PAPI_add_remove_event.c b/src/examples/PAPI_add_remove_event.c new file mode 100644 index 0000000..5d58c98 --- /dev/null +++ b/src/examples/PAPI_add_remove_event.c @@ -0,0 +1,110 @@ +/***************************************************************************** +* This example shows how to use PAPI_add_event, PAPI_start, PAPI_read, * +* PAPI_stop and PAPI_remove_event. * +******************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define NUM_EVENTS 2 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int main() +{ + int EventSet = PAPI_NULL; + int tmp, i; + /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ + + long long values[NUM_EVENTS]; + /*This is where we store the values we read from the eventset */ + + /* We use number to keep track of the number of events in the EventSet */ + int retval, number; + + char errstring[PAPI_MAX_STR_LEN]; + + /*************************************************************************** + * This part initializes the library and compares the version number of the* + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ***************************************************************************/ + + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + ERROR_RETURN(retval); + + + /* Creating the eventset */ + if ( (retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Cycles event to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + /* get the number of events in the event set */ + number = 0; + if ( (retval = PAPI_list_events(EventSet, NULL, &number)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("There are %d events in the event set\n", number); + + /* Start counting */ + + if ( (retval = PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* you can replace your code here */ + tmp=0; + for (i = 0; i < 2000000; i++) + { + tmp = i + tmp; + } + + + /* read the counter values and store them in the values array */ + if ( (retval=PAPI_read(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("The total instructions executed for the first loop are %lld \n", values[0] ); + printf("The total cycles executed for the first loop are %lld \n",values[1]); + + /* our slow code again */ + tmp=0; + for (i = 0; i < 2000000; i++) + { + tmp = i + tmp; + } + + /* Stop counting and store the values into the array */ + if ( (retval = PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("Total instructions executed are %lld \n", values[0] ); + printf("Total cycles executed are %lld \n",values[1]); + + /* Remove event: We are going to take the PAPI_TOT_INS from the eventset */ + if( (retval = PAPI_remove_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + printf("Removing PAPI_TOT_INS from the eventset\n"); + + /* Now we list how many events are left on the event set */ + number = 0; + if ((retval=PAPI_list_events(EventSet, NULL, &number))!= PAPI_OK) + ERROR_RETURN(retval); + + printf("There is only %d event left in the eventset now\n", number); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + exit(0); +} + + diff --git a/src/examples/PAPI_add_remove_events.c b/src/examples/PAPI_add_remove_events.c new file mode 100644 index 0000000..e83ff5a --- /dev/null +++ b/src/examples/PAPI_add_remove_events.c @@ -0,0 +1,83 @@ +/****************************************************************************** + * This is a simple low level function demonstration on using PAPI_add_events * + * to add an array of events to a created eventset, we are going to use these * + * events to monitor a set of instructions, start the counters, read the * + * counters and then cleanup the eventset when done. In this example we use * + * the presets PAPI_TOT_INS and PAPI_TOT_CYC. PAPI_add_events,PAPI_start, * + * PAPI_stop, PAPI_clean_eventset, PAPI_destroy_eventset and * + * PAPI_create_eventset all return PAPI_OK(which is 0) when succesful. * + ******************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define NUM_EVENT 2 +#define THRESHOLD 100000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + + +int main(){ + + int i,retval,tmp; + int EventSet = PAPI_NULL; + /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ + + int event_codes[NUM_EVENT]={PAPI_TOT_INS,PAPI_TOT_CYC}; + char errstring[PAPI_MAX_STR_LEN]; + long long values[NUM_EVENT]; + + /*************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + fprintf(stderr, "Error: %s\n", errstring); + exit(1); + } + + + /* Creating event set */ + if ((retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + + /* Add the array of events PAPI_TOT_INS and PAPI_TOT_CYC to the eventset*/ + if ((retval=PAPI_add_events(EventSet, event_codes, NUM_EVENT)) != PAPI_OK) + ERROR_RETURN(retval); + + + /* Start counting */ + if ( (retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /*** this is where your computation goes *********/ + for(i=0;i<1000;i++) + { + tmp = tmp+i; + } + + /* Stop counting, this reads from the counter as well as stop it. */ + if ( (retval=PAPI_stop(EventSet,values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("\nThe total instructions executed are %lld, total cycles %lld\n", + values[0],values[1]); + + + if ( (retval=PAPI_remove_events(EventSet,event_codes, NUM_EVENT))!=PAPI_OK) + ERROR_RETURN(retval); + + /* Free all memory and data structures, EventSet must be empty. */ + if ( (retval=PAPI_destroy_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + exit(0); +} diff --git a/src/examples/PAPI_flips.c b/src/examples/PAPI_flips.c new file mode 100644 index 0000000..c6a1342 --- /dev/null +++ b/src/examples/PAPI_flips.c @@ -0,0 +1,74 @@ +/***************************************************************************** + * This example demonstrates the usage of the high level function PAPI_flips * + * which measures the number of floating point instructions executed and the * + * MegaFlop rate(defined as the number of floating point instructions per * + * microsecond). To use PAPI_flips you need to have floating point * + * instructions event supported by the platform. * + *****************************************************************************/ + +/***************************************************************************** + * The first call to PAPI_flips initializes the PAPI library, set up the * + * counters to monitor PAPI_FP_INS and PAPI_TOT_CYC events, and start the * + * counters. Subsequent calls will read the counters and return total real * + * time, total process time, total floating point instructions, and the * + * Mflins/s rate since the last call to PAPI_flips. * + *****************************************************************************/ + + +#include +#include +#include "papi.h" + + +main() +{ + float real_time, proc_time,mflips; + long long flpins; + float ireal_time, iproc_time, imflips; + long long iflpins; + int retval; + + /*********************************************************************** + * if PAPI_FP_INS is a derived event in your platform, then your * + * platform must have at least three counters to support PAPI_flips, * + * because PAPI needs one counter to cycles. So in UltraSparcIII, even * + * the platform supports PAPI_FP_INS, but UltraSparcIII only have two * + * available hardware counters and PAPI_FP_INS is a derived event in * + * this platform, so PAPI_flops returns an error. * + ***********************************************************************/ + + if((retval=PAPI_flips(&ireal_time,&iproc_time,&iflpins,&imflips)) < PAPI_OK) + { + printf("Could not initialise PAPI_flips \n"); + printf("Your platform may not support floating point instruction event.\n"); printf("retval: %d\n", retval); + exit(1); + } + + your_slow_code(); + + + if((retval=PAPI_flips( &real_time, &proc_time, &flpins, &mflips)) +#include +#include "papi.h" + + +main() +{ + float real_time, proc_time,mflops; + long long flpops; + float ireal_time, iproc_time, imflops; + long long iflpops; + int retval; + + /*********************************************************************** + * if PAPI_FP_OPS is a derived event in your platform, then your * + * platform must have at least three counters to support PAPI_flops, * + * because PAPI needs one counter to cycles. So in UltraSparcIII, even * + * the platform supports PAPI_FP_OPS, but UltraSparcIII only has two * + * available hardware counters and PAPI_FP_OPS is a derived event in * + * this platform, so PAPI_flops returns an error. * + ***********************************************************************/ + if((retval=PAPI_flops(&ireal_time,&iproc_time,&iflpops,&imflops)) < PAPI_OK) + { + printf("Could not initialise PAPI_flops \n"); + printf("Your platform may not support floating point operation event.\n"); + printf("retval: %d\n", retval); + exit(1); + } + + your_slow_code(); + + + if((retval=PAPI_flops( &real_time, &proc_time, &flpops, &mflops)) +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +int main() +{ + int i,tmp=0; + int retval; + const PAPI_exe_info_t *prginfo = NULL; + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + + for(i=0;i<1000;i++) + tmp=tmp+i; + + /* PAPI_get_executable_info returns a NULL if there is an error */ + if ((prginfo = PAPI_get_executable_info()) == NULL) + { + printf("PAPI_get_executable_info error! \n"); + exit(1); + } + + + printf("Start text addess of user program is at %p\n", + prginfo->address_info.text_start); + printf("End text address of user program is at %p\n", + prginfo->address_info.text_end); + + exit(0); +} diff --git a/src/examples/PAPI_get_opt.c b/src/examples/PAPI_get_opt.c new file mode 100644 index 0000000..9592636 --- /dev/null +++ b/src/examples/PAPI_get_opt.c @@ -0,0 +1,94 @@ +/***************************************************************************** + * This is an example using the low level function PAPI_get_opt to query the * + * option settings of the PAPI library or a specific eventset created by the * + * PAPI_create_eventset function. PAPI_set_opt is used on the otherhand to * + * set PAPI library or event set options. * + *****************************************************************************/ + +#include +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int poorly_tuned_function() +{ + float tmp; + int i; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + +int main() +{ + + int num, retval, EventSet = PAPI_NULL; + PAPI_option_t options; + long long values[2]; + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /*PAPI_get_opt returns a negative number if there is an error */ + + /* This call returns the maximum available hardware counters */ + if((num = PAPI_get_opt(PAPI_MAX_HWCTRS,NULL)) <= 0) + ERROR_RETURN(num); + + + printf("This machine has %d counters.\n",num); + + if ((retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Set the domain of this EventSet to counter user and + kernel modes for this process. */ + + memset(&options,0x0,sizeof(options)); + + options.domain.eventset = EventSet; + /* Default domain is PAPI_DOM_USER */ + options.domain.domain = PAPI_DOM_ALL; + /* this sets the options for the domain */ + if ((retval=PAPI_set_opt(PAPI_DOMAIN, &options)) != PAPI_OK) + ERROR_RETURN(retval); + /* Add Total Instructions Executed event to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Cycles Executed event to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Start counting */ + if((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + poorly_tuned_function(); + + /* Stop counting */ + if((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf(" Total instructions: %lld Total Cycles: %lld \n", values[0], + values[1]); + + /* clean up */ + PAPI_shutdown(); + + exit(0); +} diff --git a/src/examples/PAPI_get_real_cyc.c b/src/examples/PAPI_get_real_cyc.c new file mode 100644 index 0000000..b2bccc6 --- /dev/null +++ b/src/examples/PAPI_get_real_cyc.c @@ -0,0 +1,66 @@ +/****************************************************************************** + * This is an example to show how to use low level function PAPI_get_real_cyc * + * and PAPI_get_real_usec. * + ******************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +int your_slow_code() +{ + int i,tmp; + + for(i=1; i<20000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + +int main() +{ + long long s,s1, e, e1; + int retval; + + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Here you get initial cycles and time */ + /* No error checking is done here because this function call is always + successful */ + + s = PAPI_get_real_cyc(); + + your_slow_code(); + + /*Here you get final cycles and time */ + e = PAPI_get_real_cyc(); + + s1= PAPI_get_real_usec(); + + your_slow_code(); + + e1= PAPI_get_real_usec(); + + printf("Wallclock cycles : %lld\nWallclock time(ms): %lld\n",e-s,e1-s1); + + /* clean up */ + PAPI_shutdown(); + + exit(0); +} + + + diff --git a/src/examples/PAPI_get_virt_cyc.c b/src/examples/PAPI_get_virt_cyc.c new file mode 100644 index 0000000..d27442d --- /dev/null +++ b/src/examples/PAPI_get_virt_cyc.c @@ -0,0 +1,66 @@ +/****************************************************************************** + * This is an example to show how to use low level function PAPI_get_virt_cyc * + * and PAPI_get_virt_usec. * + ******************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +int i; +double tmp; + +int your_slow_code() +{ + + for(i=1; i<200000; i++) + { + tmp= (tmp+i)/2; + } + return 0; +} + +int main() +{ + long long s,s1, e, e1; + int retval; + + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Here you get initial cycles and time */ + /* No error checking is done here because this function call is always + successful */ + + s = PAPI_get_virt_cyc(); + + your_slow_code(); + + /*Here you get final cycles and time */ + e = PAPI_get_virt_cyc(); + + s1= PAPI_get_virt_usec(); + + your_slow_code(); + + e1= PAPI_get_virt_usec(); + + printf("Virtual cycles : %lld\nVirtual time(ms): %lld\n",e-s,e1-s1); + + /* clean up */ + PAPI_shutdown(); + + exit(0); +} + diff --git a/src/examples/PAPI_hw_info.c b/src/examples/PAPI_hw_info.c new file mode 100644 index 0000000..83cc3b0 --- /dev/null +++ b/src/examples/PAPI_hw_info.c @@ -0,0 +1,49 @@ +/**************************************************************************** + * This is a simple low level example for getting information on the system * + * hardware. This function PAPI_get_hardware_info(), returns a pointer to a * + * structure of type PAPI_hw_info_t, which contains number of CPUs, nodes, * + * vendor number/name for CPU, CPU revision, clock speed. * + ****************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +int main() +{ + const PAPI_hw_info_t *hwinfo = NULL; + int retval; + + /*************************************************************************** + * This part initializes the library and compares the version number of the* + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ***************************************************************************/ + + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Get hardware info*/ + if ((hwinfo = PAPI_get_hardware_info()) == NULL) + { + printf("PAPI_get_hardware_info error! \n"); + exit(1); + } + /* when there is an error, PAPI_get_hardware_info returns NULL */ + + + printf("%d CPU at %f Mhz.\n",hwinfo->totalcpus,hwinfo->mhz); + printf(" model string is %s \n", hwinfo->model_string); + + /* clean up */ + PAPI_shutdown(); + + exit(0); + +} + diff --git a/src/examples/PAPI_ipc.c b/src/examples/PAPI_ipc.c new file mode 100644 index 0000000..651165b --- /dev/null +++ b/src/examples/PAPI_ipc.c @@ -0,0 +1,64 @@ +/***************************************************************************** + * This example demonstrates the usage of the high level function PAPI_ipc * + * which measures the number of instructions executed per cpu cycle * + *****************************************************************************/ + +/***************************************************************************** + * The first call to PAPI_ipc initializes the PAPI library, set up the * + * counters to monitor PAPI_TOT_INS and PAPI_TOT_CYC events, and start the * + * counters. Subsequent calls will read the counters and return total real * + * time, total process time, total instructions, and the instructions per * + * cycle rate since the last call to PAPI_ipc. * + *****************************************************************************/ + + +#include +#include +#include "papi.h" + + +main() +{ + float real_time, proc_time,ipc; + long long ins; + float real_time_i, proc_time_i, ipc_i; + long long ins_i; + int retval; + + if((retval=PAPI_ipc(&real_time_i,&proc_time_i,&ins_i,&ipc_i)) < PAPI_OK) + { + printf("Could not initialise PAPI_ipc \n"); + printf("retval: %d\n", retval); + exit(1); + } + + your_slow_code(); + + + if((retval=PAPI_ipc( &real_time, &proc_time, &ins, &ipc)) +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ +#include + +#define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" +#define THRESHOLD 100000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int total = 0; /* we use total to track the amount of overflows that occured */ + +/* THis is the handler called by PAPI_overflow*/ +void +handler(int EventSet, void *address, long long overflow_vector, void *context) +{ + fprintf(stderr, OVER_FMT, EventSet, address, overflow_vector); + total++; +} + + +int main () +{ + int EventSet = PAPI_NULL; + /* must be set to null before calling PAPI_create_eventset */ + + char errstring[PAPI_MAX_STR_LEN]; + long long (values[2])[2]; + int retval, i; + double tmp = 0; + int PAPI_event; /* a place holder for an event preset */ + char event_name[PAPI_MAX_STR_LEN]; + + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if ((retval = PAPI_library_init (PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Here we create the eventset */ + if ((retval=PAPI_create_eventset (&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + PAPI_event = PAPI_TOT_INS; + + /* Here we are querying for the existence of the PAPI presets */ + if (PAPI_query_event (PAPI_TOT_INS) != PAPI_OK) + { + PAPI_event = PAPI_TOT_CYC; + + if ((retval=PAPI_query_event (PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + printf ("PAPI_TOT_INS not available on this platform."); + printf (" so subst PAPI_event with PAPI_TOT_CYC !\n\n"); + + } + + + /* PAPI_event_code_to_name is used to convert a PAPI preset from + its integer value to its string name. */ + if ((retval = PAPI_event_code_to_name (PAPI_event, event_name)) != PAPI_OK) + ERROR_RETURN(retval); + + /* add event to the event set */ + if ((retval = PAPI_add_event (EventSet, PAPI_event)) != PAPI_OK) + ERROR_RETURN(retval); + + /* register overflow and set up threshold */ + /* The threshold "THRESHOLD" was set to 100000 */ + if ((retval = PAPI_overflow (EventSet, PAPI_event, THRESHOLD, 0, + handler)) != PAPI_OK) + ERROR_RETURN(retval); + + printf ("Here are the addresses at which overflows occured and overflow vectors \n"); + printf ("--------------------------------------------------------------\n"); + + + /* Start counting */ + + if ( (retval=PAPI_start (EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + for (i = 0; i < 2000000; i++) + { + tmp = 1.01 + tmp; + tmp++; + } + + /* Stops the counters and reads the counter values into the values array */ + if ( (retval=PAPI_stop (EventSet, values[0])) != PAPI_OK) + ERROR_RETURN(retval); + + + printf ("The total no of overflows was %d\n", total); + + /* clear the overflow status */ + if ((retval = PAPI_overflow (EventSet, PAPI_event, 0, 0, + handler)) != PAPI_OK) + ERROR_RETURN(retval); + + /************************************************************************ + * PAPI_cleanup_eventset can only be used after the counter has been * + * stopped then it remove all events in the eventset * + ************************************************************************/ + if ( (retval=PAPI_cleanup_eventset (EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Free all memory and data structures, EventSet must be empty. */ + if ( (retval=PAPI_destroy_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + exit(0); +} diff --git a/src/examples/PAPI_perror.c b/src/examples/PAPI_perror.c new file mode 100644 index 0000000..49b2e48 --- /dev/null +++ b/src/examples/PAPI_perror.c @@ -0,0 +1,81 @@ +/***************************************************************************** + * PAPI_perror converts PAPI error codes to strings,it fills the string * + * destination with the error message corresponding to the error code. * + * The function copies length worth of the error description string * + * corresponding to code into destination. The resulting string is always * + * null terminated. If length is 0, then the string is printed on stderr. * + * PAPI_strerror does similar but it just returns the corresponding * + * error string from the code. * + *****************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + + +int main() +{ + + int retval; + int EventSet = PAPI_NULL; + char error_str[PAPI_MAX_STR_LEN]; + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + exit(1); + } + + if ((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + { + fprintf(stderr, "PAPI error %d: %s\n",retval,PAPI_strerror(retval)); + exit(1); + } + + /* Add Total Instructions Executed to our EventSet */ + + if ((retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + { + PAPI_perror( "PAPI_add_event" ); + exit(1); + } + + /* Start counting */ + + if ((retval = PAPI_start(EventSet)) != PAPI_OK) + { + PAPI_perror( "PAPI_start" ); + exit(1); + } + + /* We are trying to start the counter which has already been started, + and this will give an error which will be passed to PAPI_perror via + retval and the function will then display the error string on the + screen. + */ + + if ((retval = PAPI_start(EventSet)) != PAPI_OK) + { + PAPI_perror( "PAPI_start" ); + } + + /* The function PAPI_strerror returns the corresponding error string + from the error code */ + if ((retval = PAPI_start(EventSet)) != PAPI_OK) + { + printf("%s\n",PAPI_strerror(retval)); + } + + /* finish using PAPI and free all related resources + (this is optional, you don't have to use it + */ + PAPI_shutdown (); + + exit(0); +} diff --git a/src/examples/PAPI_profil.c b/src/examples/PAPI_profil.c new file mode 100644 index 0000000..ad6e990 --- /dev/null +++ b/src/examples/PAPI_profil.c @@ -0,0 +1,155 @@ +/**************************************************************************** + * PAPI_profil - generate PC histogram data * + ****************************************************************************/ + +#include +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define FLOPS 1000000 +#define THRESHOLD 100000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int code_to_monitor() +{ + int i; + double tmp=1.1; + + for(i=0; i < FLOPS; i++) + { + tmp=i+tmp; + tmp++; + } + i = (int) tmp; + return i; +} + +int main() +{ + + unsigned long length; + caddr_t start, end; + PAPI_sprofil_t * prof; + int EventSet = PAPI_NULL; + /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ + int PAPI_event,i,tmp = 0; + char event_name[PAPI_MAX_STR_LEN]; + /*These are going to be used as buffers */ + unsigned short *profbuf; + long long values[2]; + const PAPI_exe_info_t *prginfo = NULL; + + + int retval; + + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + + if ((prginfo = PAPI_get_executable_info()) == NULL) + { + fprintf(stderr, "Error in get executable information \n"); + exit(1); + } + + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; + length = (end - start); + + /* for PAPI_PROFIL_BUCKET_16 and scale = 65536, + profile buffer length == program address length. + Larger bucket sizes would increase the buffer length. + Smaller scale factors would decrease it. + Handle with care... + */ + profbuf = (unsigned short *)malloc(length); + if (profbuf == NULL) + { + fprintf(stderr, "Not enough memory \n"); + exit(1); + } + memset(profbuf,0x00,length); + + /* Creating the eventset */ + if ( (retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + PAPI_event = PAPI_TOT_INS; + /* Add Total Instructions Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_event)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Cycles Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + /* enable the collection of profiling information */ + if ((retval = PAPI_profil(profbuf, length, start, 65536, EventSet, + PAPI_event, THRESHOLD, PAPI_PROFIL_POSIX | PAPI_PROFIL_BUCKET_16)) != PAPI_OK) + ERROR_RETURN(retval); + + /* let's rock and roll */ + if ((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + code_to_monitor(); + + if ((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + /* disable the collection of profiling information by setting threshold + to 0 + */ + if ((retval = PAPI_profil(profbuf, length, start, 65536, EventSet, + PAPI_event, 0, PAPI_PROFIL_POSIX)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("-----------------------------------------------------------\n"); + printf("Text start: %p, Text end: %p, \n", + prginfo->address_info.text_start,prginfo->address_info.text_end); + printf("Data start: %p, Data end: %p\n", + prginfo->address_info.data_start,prginfo->address_info.data_end); + printf("BSS start : %p, BSS end: %p\n", + prginfo->address_info.bss_start,prginfo->address_info.bss_end); + + printf("------------------------------------------\n"); + + printf("Test type : \tPAPI_PROFIL_POSIX\n"); + printf("------------------------------------------\n\n\n"); + printf("PAPI_profil() hash table.\n"); + printf("address\t\tflat \n"); + for (i = 0; i < (int) length/2; i++) + { + if (profbuf[i]) + printf("%#lx\t%d \n", + (unsigned long) start + (unsigned long) (2 * i), profbuf[i]); + } + + printf("-----------------------------------------\n"); + + retval = 0; + for (i = 0; i < (int) length/2; i++) + retval = retval || (profbuf[i]); + if (retval) + printf("Test succeeds! \n"); + else + printf( "No information in buffers\n"); + /* clean up */ + PAPI_shutdown(); + + exit(0); +} + + diff --git a/src/examples/PAPI_reset.c b/src/examples/PAPI_reset.c new file mode 100644 index 0000000..c2dcd68 --- /dev/null +++ b/src/examples/PAPI_reset.c @@ -0,0 +1,91 @@ +/***************************************************************************** + * PAPI_reset - resets the hardware event counters used by an EventSet. * + *****************************************************************************/ + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int poorly_tuned_function() +{ + float tmp; + int i; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + +int main() +{ + int EventSet = PAPI_NULL; + /*must be initialized to PAPI_NULL before calling PAPI_create_event*/ + + int retval; + unsigned int event_code=PAPI_TOT_INS; + /* By default monitor total instructions */ + + char errstring[PAPI_MAX_STR_LEN]; + long long values[1]; + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Creating the eventset */ + if ( (retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to our EventSet */ + if ((retval=PAPI_add_event(EventSet, event_code)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Start counting */ + if((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + poorly_tuned_function(); + + /* Stop counting */ + if((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + + printf("The first time read value is %lld\n",values[0]); + + /* This zeroes out the counters on the eventset that was created */ + if((retval=PAPI_reset(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Start counting */ + if((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + poorly_tuned_function(); + + /* Stop counting */ + if((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("The second time read value is %lld\n",values[0]); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + exit(0); +} + + diff --git a/src/examples/PAPI_set_domain.c b/src/examples/PAPI_set_domain.c new file mode 100644 index 0000000..766626a --- /dev/null +++ b/src/examples/PAPI_set_domain.c @@ -0,0 +1,128 @@ +/***************************************************************************** + * This example shows how to use PAPI_set_domain * + *****************************************************************************/ + +#include +#include +#include +#include +#include + +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +int poorly_tuned_function() +{ + float tmp; + int i; + + for(i=1; i<2000; i++) + { + tmp=(tmp+100)/i; + } + return 0; +} + +int main() +{ + + int num, retval, EventSet = PAPI_NULL; + long long values[2]; + PAPI_option_t options; + int fd; + + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* Set the domain of this EventSet to counter user mode. The domain + will be valid for all the eventset created after this function call + unless you call PAPI_set_domain again */ + if ((retval=PAPI_set_domain(PAPI_DOM_USER)) != PAPI_OK) + ERROR_RETURN(retval); + + if ((retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed event to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Cycles Executed event to the EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Start counting */ + if((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + poorly_tuned_function(); + /* add some system calls */ + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + { + perror("open(/dev/zero)"); + exit(1); + } + close(fd); + + + /* Stop counting */ + if((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf(" Total instructions: %lld Total Cycles: %lld \n", values[0], + values[1]); + + /* Set the domain of this EventSet to counter user and kernel modes */ + if ((retval=PAPI_set_domain(PAPI_DOM_ALL)) != PAPI_OK) + ERROR_RETURN(retval); + + EventSet = PAPI_NULL; + if ((retval=PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + /* Start counting */ + if((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + poorly_tuned_function(); + /* add some system calls */ + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + { + perror("open(/dev/zero)"); + exit(1); + } + close(fd); + + /* Stop counting */ + if((retval=PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + printf(" Total instructions: %lld Total Cycles: %lld \n", values[0], + values[1]); + + /* clean up */ + PAPI_shutdown(); + + exit(0); +} diff --git a/src/examples/PAPI_state.c b/src/examples/PAPI_state.c new file mode 100644 index 0000000..5cfc441 --- /dev/null +++ b/src/examples/PAPI_state.c @@ -0,0 +1,80 @@ +/***************************************************************************** + * We use PAPI_state to get the counting state of an EventSet.This function * + * returns the state of the entire EventSet. * + *****************************************************************************/ + + + +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + + +int main() +{ + + int retval; + int status = 0; + int EventSet = PAPI_NULL; + + /**************************************************************************** + * This part initializes the library and compares the version number of the * + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ****************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + printf("Library initialization error! \n"); + exit(-1); + } + + /*Creating the Eventset */ + if((retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to our EventSet */ + if ((retval=PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + if ((retval=PAPI_state(EventSet, &status)) != PAPI_OK) + ERROR_RETURN(retval); + + printstate(status); + + /* Start counting */ + if ((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + if (PAPI_state(EventSet, &status) != PAPI_OK) + ERROR_RETURN(retval); + + printstate(status); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + exit(0); +} + +int printstate(int status) +{ + if(status & PAPI_STOPPED) + printf("Eventset is currently stopped or inactive \n"); + if(status & PAPI_RUNNING) + printf("Eventset is currently running \n"); + if(status & PAPI_PAUSED) + printf("Eventset is currently Paused \n"); + if(status & PAPI_NOT_INIT) + printf(" Eventset defined but not initialized \n"); + if(status & PAPI_OVERFLOWING) + printf(" Eventset has overflowing enabled \n"); + if(status & PAPI_PROFILING) + printf(" Eventset has profiling enabled \n"); + if(status & PAPI_MULTIPLEXING) + printf(" Eventset has multiplexing enabled \n"); + return 0; +} diff --git a/src/examples/README b/src/examples/README new file mode 100644 index 0000000..315fc53 --- /dev/null +++ b/src/examples/README @@ -0,0 +1,22 @@ +/* +* File: papi/src/examples/README +* Author: Min Zhou +* min@cs.utk.edu +* Mods: +* +*/ + +This directory contains: + +Makefile example Makefile for platforms that support GNU make +Makefile.AIX example Makefile for AIX; +Makefile.IRIX64 example Makefile for IRIX64; +Makefile.OSF1 example Makefile for OSF1; +*.c various example programs +run_examples.sh shell script to test the example programs + +NOTE: not all the example program can be run successfully due to the +availability of the events. For example, PAPI_FP_INS is a derived event +in power3 and UltraSparc III, so overflow_pthreads can not be run successfully +in these platforms. But these programs should help you understand how to +use the PAPI functions. diff --git a/src/examples/add_event/Papi_add_env_event.c b/src/examples/add_event/Papi_add_env_event.c new file mode 100644 index 0000000..a25e3cc --- /dev/null +++ b/src/examples/add_event/Papi_add_env_event.c @@ -0,0 +1,141 @@ +/* + * This example shows how to use PAPI_library_init, PAPI_create_eventset, + * PAPI_add_event, * PAPI_start and PAPI_stop. These 5 functions + * will allow a user to do most of the performance information gathering + * that they would need. PAPI_read could also be used if you don't want + * to stop the EventSet from running but only check the counts. + * + * Also, we will use PAPI_perror for * error information. + * + * In addition, a new call was created called PAPI_add_env_event + * that allows a user to setup environment variable to read + * which event should be monitored this allows different events + * to be monitored at runtime without recompiling, the syntax + * is as follows: + * PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); + * EventSet is the same as in PAPI_add_event + * Event is the default event to monitor if the environment variable + * does not exist and differs from PAPI_add_event as it is + * a pointer. + * env_varialbe is the name of the environment variable to look for + * the event code, this can be a name, number or hex, for example + * PAPI_L1_DCM could be defined in the environment variable as + * all of the following: PAPI_L1_DCM, 0x80000000, or -2147483648 + * + * To use only add_event you would change the calls to + * PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); + * to PAPI_add_event(int *EventSet, int Event); + * + * We will also use PAPI_event_code_to_name since the event may have + * changed. + * Author: Kevin London + * email: london@cs.utk.edu + */ +#include +#include +#include "papi.h" /* This needs to be included anytime you use PAPI */ + +int PAPI_add_env_event(int *EventSet, int *Event, char *env_variable); + + +int main(){ + int retval,i; + int EventSet=PAPI_NULL; + int event_code=PAPI_TOT_INS; /* By default monitor total instructions */ + char errstring[PAPI_MAX_STR_LEN]; + char event_name[PAPI_MAX_STR_LEN]; + float a[1000],b[1000],c[1000]; + long long values; + + + /* This initializes the library and checks the version number of the + * header file, to the version of the library, if these don't match + * then it is likely that PAPI won't work correctly. + */ + if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ){ + /* This call loads up what the error means into errstring + * if retval == PAPI_ESYS then it might be beneficial + * to call perror as well to see what system call failed + */ + PAPI_perror("PAPI_library_init"); + exit(-1); + } + /* Create space for the EventSet */ + if ( (retval=PAPI_create_eventset( &EventSet ))!=PAPI_OK){ + PAPI_perror(retval, errstring, PAPI_MAX_STR_LEN); + exit(-1); + } + + /* After this call if the environment variable PAPI_EVENT is set, + * event_code may contain something different than total instructions. + */ + if ( (retval=PAPI_add_env_event(&EventSet, &event_code, "PAPI_EVENT"))!=PAPI_OK){ + PAPI_perror("PAPI_add_env_event"); + exit(-1); + } + /* Now lets start counting */ + if ( (retval = PAPI_start(EventSet)) != PAPI_OK ){ + PAPI_perror("PAPI_start"); + exit(-1); + } + + /* Some work to take up some time, the PAPI_start/PAPI_stop (and/or + * PAPI_read) should surround what you want to monitor. + */ + for ( i=0;i<1000;i++){ + a[i] = b[i]-c[i]; + c[i] = a[i]*1.2; + } + + if ( (retval = PAPI_stop(EventSet, &values) ) != PAPI_OK ){ + PAPI_perror("PAPI_stop"); + exit(-1); + } + + if ( (retval=PAPI_event_code_to_name( event_code, event_name))!=PAPI_OK){ + PAPI_perror("PAPI_event_code_to_name"); + exit(-1); + } + + printf("Ending values for %s: %lld\n", event_name,values); + /* Remove PAPI instrumentation, this is necessary on platforms + * that need to release shared memory segments and is always + * good practice. + */ + PAPI_shutdown(); + exit(0); +} + + + +int PAPI_add_env_event(int *EventSet, int *EventCode, char *env_variable){ + int real_event=*EventCode; + char *eventname; + int retval; + + if ( env_variable != NULL ){ + if ( (eventname=getenv(env_variable)) ) { + if ( eventname[0] == 'P' ) { /* Use the PAPI name */ + retval=PAPI_event_name_to_code(eventname, &real_event ); + if ( retval != PAPI_OK ) real_event = *EventCode; + } + else{ + if ( strlen(eventname)>1 && eventname[1]=='x') + sscanf(eventname, "%#x", &real_event); + else + real_event = atoi(eventname); + } + } + } + if ( (retval = PAPI_add_event( *EventSet, real_event))!= PAPI_OK ){ + if ( real_event != *EventCode ) { + if ( (retval = PAPI_add_event( *EventSet, *EventCode)) == PAPI_OK +){ + real_event = *EventCode; + } + } + } + *EventCode = real_event; + return retval; +} + diff --git a/src/examples/high_level.c b/src/examples/high_level.c new file mode 100644 index 0000000..810fbdb --- /dev/null +++ b/src/examples/high_level.c @@ -0,0 +1,150 @@ +/***************************************************************************** +* This example code shows how to use most of PAPI's High level functions * +* to start,count,read and stop on an event set. We use two preset events * +* here: * +* PAPI_TOT_INS: Total instructions executed in a period of time * +* PAPI_TOT_CYC: Total cpu cycles in a period of time * +******************************************************************************/ + +#include +#include +#include "papi.h" + +#define NUM_EVENTS 2 +#define THRESHOLD 10000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +/* stupid codes to be monitored */ +void computation_mult() +{ + double tmp=1.0; + int i=1; + for( i = 1; i < THRESHOLD; i++ ) + { + tmp = tmp*i; + } +} + +/* stupid codes to be monitored */ +void computation_add() +{ + int tmp = 0; + int i=0; + + for( i = 0; i < THRESHOLD; i++ ) + { + tmp = tmp + i; + } + +} + + +int main() +{ + /*Declaring and initializing the event set with the presets*/ + int Events[2] = {PAPI_TOT_INS, PAPI_TOT_CYC}; + /*The length of the events array should be no longer than the + value returned by PAPI_num_counters.*/ + + /*declaring place holder for no of hardware counters */ + int num_hwcntrs = 0; + int retval; + char errstring[PAPI_MAX_STR_LEN]; + /*This is going to store our list of results*/ + long long values[NUM_EVENTS]; + + + /*************************************************************************** + * This part initializes the library and compares the version number of the* + * header file, to the version of the library, if these don't match then it * + * is likely that PAPI won't work correctly.If there is an error, retval * + * keeps track of the version number. * + ***************************************************************************/ + + if((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT ) + { + fprintf(stderr, "Error: %d %s\n",retval, errstring); + exit(1); + } + + + /************************************************************************** + * PAPI_num_counters returns the number of hardware counters the platform * + * has or a negative number if there is an error * + **************************************************************************/ + if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK) + { + printf("There are no counters available. \n"); + exit(1); + } + + printf("There are %d counters in this system\n",num_hwcntrs); + + /************************************************************************** + * PAPI_start_counters initializes the PAPI library (if necessary) and * + * starts counting the events named in the events array. This function * + * implicitly stops and initializes any counters running as a result of * + * a previous call to PAPI_start_counters. * + **************************************************************************/ + + if ( (retval = PAPI_start_counters(Events, NUM_EVENTS)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("\nCounter Started: \n"); + + /* Your code goes here*/ + computation_add(); + + + + + /********************************************************************** + * PAPI_read_counters reads the counter values into values array * + **********************************************************************/ + + if ( (retval=PAPI_read_counters(values, NUM_EVENTS)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("Read successfully\n"); + + + + printf("The total instructions executed for addition are %lld \n",values[0]); + printf("The total cycles used are %lld \n", values[1] ); + + printf("\nNow we try to use PAPI_accum to accumulate values\n"); + + /* Do some computation here */ + computation_add(); + + + /************************************************************************ + * What PAPI_accum_counters does is it adds the running counter values * + * to what is in the values array. The hardware counters are reset and * + * left running after the call. * + ************************************************************************/ + + if ( (retval=PAPI_accum_counters(values, NUM_EVENTS)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("We did an additional %d times addition!\n", THRESHOLD); + printf("The total instructions executed for addition are %lld \n", + values[0] ); + printf("The total cycles used are %lld \n", values[1] ); + + /*********************************************************************** + * Stop counting events(this reads the counters as well as stops them * + ***********************************************************************/ + + printf("\nNow we try to do some multiplications\n"); + computation_mult(); + + /******************* PAPI_stop_counters **********************************/ + if ((retval=PAPI_stop_counters(values, NUM_EVENTS)) != PAPI_OK) + ERROR_RETURN(retval); + + printf("The total instruction executed for multiplication are %lld \n", + values[0] ); + printf("The total cycles used are %lld \n", values[1] ); + exit(0); +} diff --git a/src/examples/locks_pthreads.c b/src/examples/locks_pthreads.c new file mode 100644 index 0000000..0c87477 --- /dev/null +++ b/src/examples/locks_pthreads.c @@ -0,0 +1,130 @@ +/**************************************************************************** + * This program shows how to use PAPI_register_thread, PAPI_lock, * + * PAPI_unlock, PAPI_set_thr_specific, PAPI_get_thr_specific. * + * Warning: Don't use PAPI_lock and PAPI_unlock on platforms on which the * + * locking mechanisms are not implemented. * + ****************************************************************************/ + +#include +#include +#include +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +#define LOOPS 100000 +#define SLEEP_VALUE 20000 + +int count; +int rank; + +void *Master(void *arg) +{ + int i, retval, tmp; + int *pointer, * pointer2; + + tmp = 20; + pointer = &tmp; + + /* register the thread */ + if ( (retval=PAPI_register_thread())!= PAPI_OK ) + ERROR_RETURN(retval); + + /* save the pointer for late use */ + if ( (retval=PAPI_set_thr_specific(1,pointer))!= PAPI_OK ) + ERROR_RETURN(retval); + /* change the value of tmp */ + tmp = 15; + + usleep(SLEEP_VALUE); + PAPI_lock(PAPI_USR1_LOCK); + /* Make sure Slaves are not sleeping */ + for (i = 0; i < LOOPS; i++) { + count = 2 * count - i; + } + PAPI_unlock(PAPI_USR1_LOCK); + + /* retrieve the pointer saved by PAPI_set_thr_specific */ + if ( (retval=PAPI_get_thr_specific(1, (void *)&pointer2)) != PAPI_OK ) + ERROR_RETURN(retval); + + /* the output value should be 15 */ + printf("Thread specific data is %d \n", *pointer2); + + pthread_exit(NULL); +} + +void *Slave(void *arg) +{ + int i; + + PAPI_lock(PAPI_USR2_LOCK); + PAPI_lock(PAPI_USR1_LOCK); + for (i = 0; i < LOOPS; i++) { + count += i; + } + PAPI_unlock(PAPI_USR1_LOCK); + PAPI_unlock(PAPI_USR2_LOCK); + pthread_exit(NULL); +} + + + +int main(int argc, char **argv) +{ + pthread_t master; + pthread_t slave1; + int result_m, result_s, rc, i; + int retval; + + /* Setup a random number so compilers can't optimize it out */ + count = rand(); + result_m = count; + rank = 0; + + for (i = 0; i < LOOPS; i++) { + result_m = 2 * result_m - i; + } + result_s = result_m; + + for (i = 0; i < LOOPS; i++) { + result_s += i; + } + + if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + { + printf("Library initialization error! \n"); + exit(-1); + } + + if ((retval = PAPI_thread_init(&pthread_self)) != PAPI_OK) + ERROR_RETURN(retval); + + if ((retval = PAPI_set_debug(PAPI_VERB_ECONT)) != PAPI_OK) + ERROR_RETURN(retval); + + PAPI_lock(PAPI_USR2_LOCK); + rc = pthread_create(&master, NULL, Master, NULL); + if (rc) { + retval = PAPI_ESYS; + ERROR_RETURN(retval); + } + rc = pthread_create(&slave1, NULL, Slave, NULL); + if (rc) { + retval = PAPI_ESYS; + ERROR_RETURN(retval); + } + pthread_join(master, NULL); + printf("Master: Expected: %d Recieved: %d\n", result_m, count); + if (result_m != count) + ERROR_RETURN(1); + PAPI_unlock(PAPI_USR2_LOCK); + + pthread_join(slave1, NULL); + printf("Slave: Expected: %d Recieved: %d\n", result_s, count); + + if (result_s != count) + ERROR_RETURN(1); + + exit(0); +} diff --git a/src/examples/multiplex.c b/src/examples/multiplex.c new file mode 100644 index 0000000..8ca8824 --- /dev/null +++ b/src/examples/multiplex.c @@ -0,0 +1,149 @@ +/**************************************************************************** + * Multiplexing allows more counters to be used than what is supported by * + * the platform, thus allowing a larger number of events to be counted * + * simultaneously. When a microprocessor has a very limited number of * + * counters that can be counted simultaneously, a large application with * + * many hours of run time may require days of profiling in order to gather * + * enough information to base a performance analysis. Multiplexing overcomes* + * this limitation by the usage of the counters over timesharing. * + * This is an example demonstrating how to use PAPI_set_multiplex to * + * convert a standard event set to a multiplexed event set. * + ****************************************************************************/ +#include +#include +#include +#include "papi.h" + +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +#define NUM_ITERS 10000000 +#define MAX_TO_ADD 6 + +double c = 0.11; +void do_flops(int n) +{ + int i; + double a = 0.5; + double b = 6.2; + + for (i=0; i < n; i++) + c += a * b; + return; +} + +/* Tests that we can really multiplex a lot. */ +int multiplex(void) +{ + int retval, i, EventSet = PAPI_NULL, j = 0; + long long *values; + PAPI_event_info_t pset; + int events[MAX_TO_ADD], number; + + /* Initialize the library */ + retval = PAPI_library_init(PAPI_VER_CURRENT); + if (retval != PAPI_VER_CURRENT) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* initialize multiplex support */ + retval = PAPI_multiplex_init(); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + retval = PAPI_create_eventset(&EventSet); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + /* convert the event set to a multiplex event set */ + retval = PAPI_set_multiplex(EventSet); + if (retval != PAPI_OK) + ERROR_RETURN(retval); +/* + retval = PAPI_add_event(EventSet, PAPI_TOT_INS); + if ((retval != PAPI_OK) && (retval != PAPI_ECNFLCT)) + ERROR_RETURN(retval); + printf("Adding %s\n", "PAPI_TOT_INS"); +*/ + + for (i = 0; i < PAPI_MAX_PRESET_EVENTS; i++) + { + retval = PAPI_get_event_info(i | PAPI_PRESET_MASK, &pset); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + if ((pset.count) && (pset.event_code != PAPI_TOT_CYC)) + { + printf("Adding %s\n", pset.symbol); + + retval = PAPI_add_event(EventSet, pset.event_code); + if ((retval != PAPI_OK) && (retval != PAPI_ECNFLCT)) + ERROR_RETURN(retval); + + if (retval == PAPI_OK) + printf("Added %s\n", pset.symbol); + else + printf("Could not add %s due to resource limitation.\n", + pset.symbol); + + if (retval == PAPI_OK) + { + if (++j >= MAX_TO_ADD) + break; + } + } + } + + values = (long long *) malloc(MAX_TO_ADD * sizeof(long long)); + if (values == NULL) + { + printf("Not enough memory available. \n"); + exit(1); + } + + if ((retval=PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + do_flops(NUM_ITERS); + + retval = PAPI_stop(EventSet, values); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + /* get the number of events in the event set */ + number=MAX_TO_ADD; + if ( (retval = PAPI_list_events(EventSet, events, &number)) != PAPI_OK) + ERROR_RETURN(retval); + + /* print the read result */ + for (i = 0; i < MAX_TO_ADD; i++) + { + retval = PAPI_get_event_info(events[i], &pset); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + printf("Event name: %s value: %lld \n", pset.symbol, values[i]); + } + + retval = PAPI_cleanup_eventset(EventSet); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + retval = PAPI_destroy_eventset(&EventSet); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + /* free the resources used by PAPI */ + PAPI_shutdown(); + + return (0); +} + +int main(int argc, char **argv) +{ + + printf("Using %d iterations\n\n", NUM_ITERS); + printf("Does PAPI_multiplex_init() handle lots of events?\n"); + multiplex(); + exit(0); +} diff --git a/src/examples/overflow_pthreads.c b/src/examples/overflow_pthreads.c new file mode 100644 index 0000000..84426e0 --- /dev/null +++ b/src/examples/overflow_pthreads.c @@ -0,0 +1,181 @@ +/* This file performs the following test: overflow dispatch with pthreads + + - This example tests the dispatch of overflow calls from PAPI. The event + set is counted in the default counting domain and default granularity, + depending on the platform. Usually this is the user domain + (PAPI_DOM_USER) and thread context (PAPI_GRN_THR). + + The Eventset contains: + + PAPI_TOT_INS (overflow monitor) + + PAPI_TOT_CYC + + Each thread will do the followings : + - enable overflow + - Start eventset 1 + - Do flops + - Stop eventset 1 + - disable overflow +*/ +#include +#include +#include +#include "papi.h" + +#define THRESHOLD 200000 +#define OVER_FMT "handler(%d ) Overflow at %p! bit=%#llx \n" +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + + +int total = 0; + +void do_flops(int n) +{ + int i; + double c = 0.11; + double a = 0.5; + double b = 6.2; + + for (i=0; i < n; i++) + c += a * b; +} + +/* overflow handler */ +void +handler(int EventSet, void *address, long long overflow_vector, void *context) +{ + fprintf(stderr, OVER_FMT, EventSet, address, overflow_vector); + total++; +} + +void *Thread(void *arg) +{ + int retval; + int EventSet1=PAPI_NULL; + long long values[2]; + long long elapsed_us, elapsed_cyc; + + fprintf(stderr,"Thread %lx running PAPI\n",PAPI_thread_id()); + + /* create the event set */ + if ( (retval = PAPI_create_eventset(&EventSet1))!=PAPI_OK) + ERROR_RETURN(retval); + + /* query whether the event exists */ + if ((retval=PAPI_query_event(PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + if ((retval=PAPI_query_event(PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + /* add events to the event set */ + if ( (retval = PAPI_add_event(EventSet1, PAPI_TOT_INS))!= PAPI_OK) + ERROR_RETURN(retval); + + if ( (retval = PAPI_add_event(EventSet1, PAPI_TOT_CYC)) != PAPI_OK) + ERROR_RETURN(retval); + + elapsed_us = PAPI_get_real_usec(); + + elapsed_cyc = PAPI_get_real_cyc(); + + retval = PAPI_overflow(EventSet1, PAPI_TOT_CYC, THRESHOLD, 0, handler); + if(retval !=PAPI_OK) + ERROR_RETURN(retval); + + /* start counting */ + if((retval = PAPI_start(EventSet1))!=PAPI_OK) + ERROR_RETURN(retval); + + do_flops(*(int *)arg); + + if ((retval = PAPI_stop(EventSet1, values))!=PAPI_OK) + ERROR_RETURN(retval); + + elapsed_us = PAPI_get_real_usec() - elapsed_us; + + elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; + + /* disable overflowing */ + retval = PAPI_overflow(EventSet1, PAPI_TOT_CYC, 0, 0, handler); + if(retval !=PAPI_OK) + ERROR_RETURN(retval); + + /* remove the event from the eventset */ + retval = PAPI_remove_event(EventSet1, PAPI_TOT_INS); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + retval = PAPI_remove_event(EventSet1, PAPI_TOT_CYC); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + printf("Thread %#x PAPI_TOT_INS : \t%lld\n",(int)PAPI_thread_id(), + values[0]); + printf(" PAPI_TOT_CYC: \t%lld\n", values[1]); + printf(" Real usec : \t%lld\n", elapsed_us); + printf(" Real cycles : \t%lld\n", elapsed_cyc); + + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + pthread_t thread_one; + pthread_t thread_two; + int flops1, flops2; + int rc,retval; + pthread_attr_t attr; + long long elapsed_us, elapsed_cyc; + + + /* papi library initialization */ + if ((retval=PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + { + printf("Library initialization error! \n"); + exit(1); + } + + /* thread initialization */ + retval=PAPI_thread_init((unsigned long(*)(void))(pthread_self)); + if (retval != PAPI_OK) + ERROR_RETURN(retval); + + /* return the number of microseconds since some arbitrary starting point */ + elapsed_us = PAPI_get_real_usec(); + + /* return the number of cycles since some arbitrary starting point */ + elapsed_cyc = PAPI_get_real_cyc(); + + /* pthread attribution init */ + pthread_attr_init(&attr); + pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); + + /* create the first thread */ + flops1 = 1000000; + rc = pthread_create(&thread_one, &attr, Thread, (void *)&flops1); + if (rc) + ERROR_RETURN(rc); + + /* create the second thread */ + flops2 = 4000000; + rc = pthread_create(&thread_two, &attr, Thread, (void *)&flops2); + if (rc) + ERROR_RETURN(rc); + + /* wait for the threads to finish */ + pthread_attr_destroy(&attr); + pthread_join(thread_one, NULL); + pthread_join(thread_two, NULL); + + /* compute the elapsed cycles and microseconds */ + elapsed_cyc = PAPI_get_real_cyc() - elapsed_cyc; + + elapsed_us = PAPI_get_real_usec() - elapsed_us; + + printf("Master real usec : \t%lld\n", elapsed_us); + printf("Master real cycles : \t%lld\n", elapsed_cyc); + + /* clean up */ + PAPI_shutdown(); + exit(0); +} + diff --git a/src/examples/run_examples.sh b/src/examples/run_examples.sh new file mode 100755 index 0000000..0dfdc87 --- /dev/null +++ b/src/examples/run_examples.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +# File: run_example.sh +# CVS: $Id$ +# Author: Min Zhou +# min@cs.utk.edu + + +CTESTS=`find . -perm -u+x -type f`; +ALLTESTS="$CTESTS"; +x=0; +CWD=`pwd` + +echo "Platform:" +uname -a + +echo "" +echo "The following test cases will be run:"; +echo $ALLTESTS; + +echo ""; +echo "Running C Example Programs"; +echo "" + +for i in $CTESTS; +do +if [ -x $i ]; then +if [ "$i" != "./run_examples.sh" ]; then +echo "Running $i: "; +./$i +fi; +fi; +echo ""; +done + diff --git a/src/examples/sprofile.c b/src/examples/sprofile.c new file mode 100644 index 0000000..b3f4a74 --- /dev/null +++ b/src/examples/sprofile.c @@ -0,0 +1,182 @@ +/* This program shows how to use PAPI_sprofil */ + +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" /* This needs to be included every time you use PAPI */ + +#define NUM_FLOPS 20000000 +#define NUM_ITERS 100000 +#define THRESHOLD 100000 +#define ERROR_RETURN(retval) { fprintf(stderr, "Error %d %s:line %d: \n", retval,__FILE__,__LINE__); exit(retval); } + +#if (defined(linux) && defined(__ia64__)) || (defined(_AIX)) +#define DO_FLOPS1 (caddr_t)(*(void **)do_flops1) +#define DO_FLOPS2 (caddr_t)(*(void **)do_flops2) +#else +#define DO_FLOPS1 (caddr_t)(do_flops1) +#define DO_FLOPS2 (caddr_t)(do_flops2) +#endif + +void do_flops2(int); +volatile double t1 = 0.8, t2 = 0.9; +void do_flops1(int n) +{ + int i; + double c = 22222.11; + + for (i = 0; i < n; i++) + c -= t1 * t2; +} + +void do_both(int n) +{ + int i; + const int flops2 = NUM_FLOPS / n; + const int flops1 = NUM_FLOPS / n; + + for (i = 0; i < n; i++) + { + do_flops1(flops1); + do_flops2(flops2); + } +} + +int main(int argc, char **argv) +{ + int i , PAPI_event; + int EventSet = PAPI_NULL; + unsigned short *profbuf; + unsigned short *profbuf2; + unsigned short *profbuf3; + unsigned long length; + caddr_t start, end; + long long values[2]; + const PAPI_exe_info_t *prginfo = NULL; + PAPI_sprofil_t sprof[3]; + int retval; + + /* initializaion */ + if ((retval = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) + { + printf("Library initialization error! \n"); + exit(1); + } + + if ((prginfo = PAPI_get_executable_info()) == NULL) + ERROR_RETURN(1); + + start = prginfo->address_info.text_start; + end = prginfo->address_info.text_end; + length = (end - start)/sizeof(unsigned short) * sizeof(unsigned short); + printf("start= %p end =%p \n", start, end); + + profbuf = (unsigned short *) malloc(length); + if (profbuf == NULL) + ERROR_RETURN(PAPI_ESYS); + + memset(profbuf, 0x00, length ); + + profbuf2 = (unsigned short *) malloc(length); + if (profbuf2 == NULL) + ERROR_RETURN(PAPI_ESYS); + + memset(profbuf2, 0x00, length ); + + profbuf3 = (unsigned short *) malloc(1 * sizeof(unsigned short)); + if (profbuf3 == NULL) + ERROR_RETURN(PAPI_ESYS); + + memset(profbuf3, 0x00, 1 * sizeof(unsigned short)); + + /* First half */ + sprof[0].pr_base = profbuf; + sprof[0].pr_size = length / 2; + sprof[0].pr_off = DO_FLOPS2; + fprintf(stderr, "do_flops is at %p %lx\n", &do_flops2, strtoul(sprof[0].pr_off,NULL,0)); + + sprof[0].pr_scale = 65536; /* constant needed by PAPI_sprofil */ + /* Second half */ + sprof[1].pr_base = profbuf2; + sprof[1].pr_size = length / 2; + sprof[1].pr_off = DO_FLOPS1; + fprintf(stderr, "do_flops1 is at %p %lx\n", &do_flops1, strtoul(sprof[1].pr_off,NULL,0)); + sprof[1].pr_scale = 65536; /* constant needed by PAPI_sprofil */ + + /* Overflow bin */ + sprof[2].pr_base = profbuf3; + sprof[2].pr_size = 1; + sprof[2].pr_off = 0; + sprof[2].pr_scale = 0x2; /* constant needed by PAPI_sprofil */ + + /* Creating the eventset */ + if ( (retval = PAPI_create_eventset(&EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + PAPI_event = PAPI_TOT_CYC; + /* Add Total Instructions Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_event)) != PAPI_OK) + ERROR_RETURN(retval); + + /* Add Total Instructions Executed to our EventSet */ + if ( (retval = PAPI_add_event(EventSet, PAPI_TOT_INS)) != PAPI_OK) + ERROR_RETURN(retval); + + /* set profile flag */ + if ((retval = PAPI_sprofil(sprof, 3, EventSet, PAPI_event, THRESHOLD, + PAPI_PROFIL_POSIX)) != PAPI_OK) + ERROR_RETURN(retval); + + if ((retval = PAPI_start(EventSet)) != PAPI_OK) + ERROR_RETURN(retval); + + do_both(NUM_ITERS); + + if ((retval = PAPI_stop(EventSet, values)) != PAPI_OK) + ERROR_RETURN(retval); + + /* to clear the profile flag before removing the events */ + if ((retval = PAPI_sprofil(sprof, 3, EventSet, PAPI_event, 0, + PAPI_PROFIL_POSIX)) != PAPI_OK) + ERROR_RETURN(retval); + + /* free the resources hold by PAPI */ + PAPI_shutdown(); + + printf("Test case: PAPI_sprofil()\n"); + printf("---------Buffer 1--------\n"); + for (i = 0; i < length / 2; i++) + { + if (profbuf[i]) + printf("%#lx\t%d\n", strtoul(DO_FLOPS2,NULL,0) + 2 * i, profbuf[i]); + } + printf("---------Buffer 2--------\n"); + for (i = 0; i < length / 2; i++) + { + if (profbuf2[i]) + printf("%#lx\t%d\n", strtoul(DO_FLOPS1,NULL,0) + 2 * i, profbuf2[i]); + } + printf("-------------------------\n"); + printf("%u samples fell outside the regions.\n", *profbuf3); + + exit(0); +} + +/* Declare a and b to be volatile. + This is to try to keep the + compiler from optimizing the loop */ +volatile double a = 0.5, b = 2.2; +void do_flops2(int n) +{ + int i; + double c = 0.11; + + for (i = 0; i < n; i++) + c += a * b; +} + diff --git a/src/extras.c b/src/extras.c new file mode 100644 index 0000000..815eede --- /dev/null +++ b/src/extras.c @@ -0,0 +1,519 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: extras.c +* Author: Philip Mucci +* mucci@cs.utk.edu +* Mods: dan terpstra +* terpstra@cs.utk.edu +* Mods: Haihang You +* you@cs.utk.edu +* Mods: Kevin London +* london@cs.utk.edu +* Mods: Maynard Johnson +* maynardj@us.ibm.com +*/ + +/* This file contains portable routines to do things that we wish the +vendors did in the kernel extensions or performance libraries. */ + +#include "papi.h" +#include "papi_internal.h" +#include "papi_vector.h" +#include "papi_memory.h" +#include "extras.h" +#include "threads.h" + +#if (!defined(HAVE_FFSLL) || defined(__bgp__)) +int ffsll( long long lli ); +#else +#include +#endif + +/****************/ +/* BEGIN LOCALS */ +/****************/ + +static unsigned int _rnum = DEADBEEF; + +/**************/ +/* END LOCALS */ +/**************/ + +inline_static unsigned short +random_ushort( void ) +{ + return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 ); +} + + +/* compute the amount by which to increment the bucket. + value is the current value of the bucket + this routine is used by all three profiling cases + it is inlined for speed +*/ +inline_static int +profil_increment( long long value, + int flags, long long excess, long long threshold ) +{ + int increment = 1; + + if ( flags == PAPI_PROFIL_POSIX ) { + return ( 1 ); + } + + if ( flags & PAPI_PROFIL_RANDOM ) { + if ( random_ushort( ) <= ( USHRT_MAX / 4 ) ) + return ( 0 ); + } + + if ( flags & PAPI_PROFIL_COMPRESS ) { + /* We're likely to ignore the sample if buf[address] gets big. */ + if ( random_ushort( ) < value ) { + return ( 0 ); + } + } + + if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */ + if ( excess <= ( long long ) 1 ) + increment = 1; + else if ( excess > threshold ) + increment = 255; + else { + threshold = threshold / ( long long ) 255; + increment = ( int ) ( excess / threshold ); + } + } + return ( increment ); +} + + +static void +posix_profil( caddr_t address, PAPI_sprofil_t * prof, + int flags, long long excess, long long threshold ) +{ + unsigned short *buf16; + unsigned int *buf32; + unsigned long long *buf64; + unsigned long indx; + unsigned long long lloffset; + + /* SPECIAL CASE: if starting address is 0 and scale factor is 2 + then all counts go into first bin. + */ + if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) ) + indx = 0; + else { + /* compute the profile buffer offset by: + - subtracting the profiling base address from the pc address + - multiplying by the scaling factor + - dividing by max scale (65536, or 2^^16) + - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses + NOTE: 131072 is a valid scale value. It produces byte resolution of addresses + */ + lloffset = + ( unsigned long long ) ( ( address - prof->pr_off ) * + prof->pr_scale ); + indx = ( unsigned long ) ( lloffset >> 17 ); + } + + /* confirm addresses within specified range */ + if ( address >= prof->pr_off ) { + /* test first for 16-bit buckets; this should be the fast case */ + if ( flags & PAPI_PROFIL_BUCKET_16 ) { + if ( ( indx * sizeof ( short ) ) < prof->pr_size ) { + buf16 = (unsigned short *) prof->pr_base; + buf16[indx] = + ( unsigned short ) ( ( unsigned short ) buf16[indx] + + profil_increment( buf16[indx], flags, + excess, + threshold ) ); + PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx, + buf16[indx] ); + } + } + /* next, look for the 32-bit case */ + else if ( flags & PAPI_PROFIL_BUCKET_32 ) { + if ( ( indx * sizeof ( int ) ) < prof->pr_size ) { + buf32 = (unsigned int *) prof->pr_base; + buf32[indx] = ( unsigned int ) buf32[indx] + + ( unsigned int ) profil_increment( buf32[indx], flags, + excess, threshold ); + PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx, + buf32[indx] ); + } + } + /* finally, fall through to the 64-bit case */ + else { + if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) { + buf64 = (unsigned long long *) prof->pr_base; + buf64[indx] = ( unsigned long long ) buf64[indx] + + ( unsigned long long ) profil_increment( ( long long ) + buf64[indx], flags, + excess, + threshold ); + PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx, + buf64[indx] ); + } + } + } +} + +void +_papi_hwi_dispatch_profile( EventSetInfo_t * ESI, caddr_t pc, + long long over, int profile_index ) +{ + EventSetProfileInfo_t *profile = &ESI->profile; + PAPI_sprofil_t *sprof; + caddr_t offset = 0; + caddr_t best_offset = 0; + int count; + int best_index = -1; + int i; + + PRFDBG( "handled IP %p\n", pc ); + + sprof = profile->prof[profile_index]; + count = profile->count[profile_index]; + + for ( i = 0; i < count; i++ ) { + offset = sprof[i].pr_off; + if ( ( offset < pc ) && ( offset > best_offset ) ) { + best_index = i; + best_offset = offset; + } + } + + if ( best_index == -1 ) + best_index = 0; + + posix_profil( pc, &sprof[best_index], profile->flags, over, + profile->threshold[profile_index] ); +} + +/* if isHardware is true, then the processor is using hardware overflow, + else it is using software overflow. Use this parameter instead of + _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors + may use hardware overflow, some may use software overflow. + + overflow_bit: if the component can get the overflow bit when overflow + occurs, then this should be passed by the component; + + If both genOverflowBit and isHardwareSupport are true, that means + the component doesn't know how to get the overflow bit from the + kernel directly, so we generate the overflow bit in this function + since this function can access the ESI->overflow struct; + (The component can only set genOverflowBit parameter to true if the + hardware doesn't support multiple hardware overflow. If the + component supports multiple hardware overflow and you don't know how + to get the overflow bit, then I don't know how to deal with this + situation). +*/ + +int +_papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address, + int *isHardware, long long overflow_bit, + int genOverflowBit, ThreadInfo_t ** t, + int cidx ) +{ + int retval, event_counter, i, overflow_flag, pos; + int papi_index, j; + int profile_index = 0; + long long overflow_vector; + + long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over; + long long latest = 0; + ThreadInfo_t *thread; + EventSetInfo_t *ESI; + _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext; + + OVFDBG( "enter\n" ); + + if ( *t ) + thread = *t; + else + *t = thread = _papi_hwi_lookup_thread( 0 ); + + if ( thread != NULL ) { + ESI = thread->running_eventset[cidx]; + + if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) { + OVFDBG( "Either no eventset or eventset not set to overflow.\n" ); +#ifdef ANY_THREAD_GETS_SIGNAL + _papi_hwi_broadcast_signal( thread->tid ); +#endif + return ( PAPI_OK ); + } + + if ( ESI->CmpIdx != cidx ) + return ( PAPI_ENOCMP ); + + if ( ESI->master != thread ) { + PAPIERROR + ( "eventset->thread %#lx vs. current thread %#lx mismatch", + ESI->master, thread ); + return ( PAPI_EBUG ); + } + + if ( isHardware ) { + if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { + ESI->state |= PAPI_PAUSED; + *isHardware = 1; + } else + *isHardware = 0; + } + /* Get the latest counter value */ + event_counter = ESI->overflow.event_counter; + + overflow_flag = 0; + overflow_vector = 0; + + if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) { + retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop ); + if ( retval < PAPI_OK ) + return ( retval ); + for ( i = 0; i < event_counter; i++ ) { + papi_index = ESI->overflow.EventIndex[i]; + latest = ESI->sw_stop[papi_index]; + temp[i] = -1; + + if ( latest >= ( long long ) ESI->overflow.deadline[i] ) { + OVFDBG + ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n", + latest, ESI->overflow.deadline[i], + ESI->overflow.threshold[i] ); + pos = ESI->EventInfoArray[papi_index].pos[0]; + overflow_vector ^= ( long long ) 1 << pos; + temp[i] = latest - ESI->overflow.deadline[i]; + overflow_flag = 1; + /* adjust the deadline */ + ESI->overflow.deadline[i] = + latest + ESI->overflow.threshold[i]; + } + } + } else if ( genOverflowBit ) { + /* we had assumed the overflow event can't be derived event */ + papi_index = ESI->overflow.EventIndex[0]; + + /* suppose the pos is the same as the counter number + * (this is not true in Itanium, but itanium doesn't + * need us to generate the overflow bit + */ + pos = ESI->EventInfoArray[papi_index].pos[0]; + overflow_vector = ( long long ) 1 << pos; + } else + overflow_vector = overflow_bit; + + if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) { + if ( ESI->state & PAPI_PROFILING ) { + int k = 0; + while ( overflow_vector ) { + i = ffsll( overflow_vector ) - 1; + for ( j = 0; j < event_counter; j++ ) { + papi_index = ESI->overflow.EventIndex[j]; + /* This loop is here ONLY because Pentium 4 can have tagged * + * events that contain more than one counter without being * + * derived. You've gotta scan all terms to make sure you * + * find the one to profile. */ + for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0; + k++ ) { + pos = ESI->EventInfoArray[papi_index].pos[k]; + if ( i == pos ) { + profile_index = j; + goto foundit; + } + } + } + if ( j == event_counter ) { + PAPIERROR + ( "BUG! overflow_vector is 0, dropping interrupt" ); + return ( PAPI_EBUG ); + } + + foundit: + if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) + over = 0; + else + over = temp[profile_index]; + _papi_hwi_dispatch_profile( ESI, address, over, + profile_index ); + overflow_vector ^= ( long long ) 1 << i; + } + /* do not use overflow_vector after this place */ + } else { + ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address, + overflow_vector, ctx->ucontext ); + } + } + ESI->state &= ~( PAPI_PAUSED ); + } +#ifdef ANY_THREAD_GETS_SIGNAL + else { + OVFDBG( "I haven't been noticed by PAPI before\n" ); + _papi_hwi_broadcast_signal( ( *_papi_hwi_thread_id_fn ) ( ) ); + } +#endif + return ( PAPI_OK ); +} + +#include +#include +#include + +int _papi_hwi_using_signal[PAPI_NSIG]; + +int +_papi_hwi_start_timer( int timer, int signal, int ns ) +{ + struct itimerval value; + int us = ns / 1000; + + if ( us == 0 ) + us = 1; + +#ifdef ANY_THREAD_GETS_SIGNAL + _papi_hwi_lock( INTERNAL_LOCK ); + if ( ( _papi_hwi_using_signal[signal] - 1 ) ) { + INTDBG( "itimer already installed\n" ); + _papi_hwi_unlock( INTERNAL_LOCK ); + return ( PAPI_OK ); + } + _papi_hwi_unlock( INTERNAL_LOCK ); +#else + ( void ) signal; /*unused */ +#endif + + value.it_interval.tv_sec = 0; + value.it_interval.tv_usec = us; + value.it_value.tv_sec = 0; + value.it_value.tv_usec = us; + + INTDBG( "Installing itimer %d, with %d us interval\n", timer, us ); + if ( setitimer( timer, &value, NULL ) < 0 ) { + PAPIERROR( "setitimer errno %d", errno ); + return ( PAPI_ESYS ); + } + + return ( PAPI_OK ); +} + +int +_papi_hwi_start_signal( int signal, int need_context, int cidx ) +{ + struct sigaction action; + + _papi_hwi_lock( INTERNAL_LOCK ); + _papi_hwi_using_signal[signal]++; + if ( _papi_hwi_using_signal[signal] - 1 ) { + INTDBG( "_papi_hwi_using_signal is now %d\n", + _papi_hwi_using_signal[signal] ); + _papi_hwi_unlock( INTERNAL_LOCK ); + return ( PAPI_OK ); + } + + memset( &action, 0x00, sizeof ( struct sigaction ) ); + action.sa_flags = SA_RESTART; + action.sa_sigaction = + ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]-> + dispatch_timer; + if ( need_context ) +#if (defined(_BGL) /*|| defined (__bgp__)*/) + action.sa_flags |= SIGPWR; +#else + action.sa_flags |= SA_SIGINFO; +#endif + + INTDBG( "installing signal handler\n" ); + if ( sigaction( signal, &action, NULL ) < 0 ) { + PAPIERROR( "sigaction errno %d", errno ); + _papi_hwi_unlock( INTERNAL_LOCK ); + return ( PAPI_ESYS ); + } + + INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal, + _papi_hwi_using_signal[signal] ); + _papi_hwi_unlock( INTERNAL_LOCK ); + + return ( PAPI_OK ); +} + +int +_papi_hwi_stop_signal( int signal ) +{ + _papi_hwi_lock( INTERNAL_LOCK ); + if ( --_papi_hwi_using_signal[signal] == 0 ) { + INTDBG( "removing signal handler\n" ); + if ( sigaction( signal, NULL, NULL ) == -1 ) { + PAPIERROR( "sigaction errno %d", errno ); + _papi_hwi_unlock( INTERNAL_LOCK ); + return ( PAPI_ESYS ); + } + } + + INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal, + _papi_hwi_using_signal[signal] ); + _papi_hwi_unlock( INTERNAL_LOCK ); + + return ( PAPI_OK ); +} + +int +_papi_hwi_stop_timer( int timer, int signal ) +{ + struct itimerval value; + +#ifdef ANY_THREAD_GETS_SIGNAL + _papi_hwi_lock( INTERNAL_LOCK ); + if ( _papi_hwi_using_signal[signal] > 1 ) { + INTDBG( "itimer in use by another thread\n" ); + _papi_hwi_unlock( INTERNAL_LOCK ); + return ( PAPI_OK ); + } + _papi_hwi_unlock( INTERNAL_LOCK ); +#else + ( void ) signal; /*unused */ +#endif + + value.it_interval.tv_sec = 0; + value.it_interval.tv_usec = 0; + value.it_value.tv_sec = 0; + value.it_value.tv_usec = 0; + + INTDBG( "turning off timer\n" ); + if ( setitimer( timer, &value, NULL ) == -1 ) { + PAPIERROR( "setitimer errno %d", errno ); + return PAPI_ESYS; + } + + return PAPI_OK; +} + + + +#if (!defined(HAVE_FFSLL) || defined(__bgp__)) +/* find the first set bit in long long */ + +int +ffsll( long long lli ) +{ + int i, num, t, tmpint, len; + + num = sizeof ( long long ) / sizeof ( int ); + if ( num == 1 ) + return ( ffs( ( int ) lli ) ); + len = sizeof ( int ) * CHAR_BIT; + + for ( i = 0; i < num; i++ ) { + tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli ); + + t = ffs( tmpint ); + if ( t ) { + return ( t + i * len ); + } + lli = lli >> len; + } + return PAPI_OK; +} +#endif diff --git a/src/extras.h b/src/extras.h new file mode 100644 index 0000000..0ef65a8 --- /dev/null +++ b/src/extras.h @@ -0,0 +1,16 @@ +#ifndef EXTRAS_H +#define EXTRAS_H + +int _papi_hwi_stop_timer( int timer, int signal ); +int _papi_hwi_start_timer( int timer, int signal, int ms ); +int _papi_hwi_stop_signal( int signal ); +int _papi_hwi_start_signal( int signal, int need_context, int cidx ); +int _papi_hwi_initialize( DynamicArray_t ** ); +int _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address, + int *, long long, int, + ThreadInfo_t ** master, int cidx ); +void _papi_hwi_dispatch_profile( EventSetInfo_t * ESI, caddr_t address, + long long over, int profile_index ); + + +#endif /* EXTRAS_H */ diff --git a/src/freebsd-context.h b/src/freebsd-context.h new file mode 100644 index 0000000..5780f86 --- /dev/null +++ b/src/freebsd-context.h @@ -0,0 +1,6 @@ +#ifndef _PAPI_FreeBSD_CONTEXT_H +#define _PAPI_FreeBSD_CONTEXT_H + +#define GET_OVERFLOW_ADDRESS(ctx) (0x80000000) + +#endif /* _PAPI_FreeBSD_CONTEXT_H */ diff --git a/src/freebsd-lock.h b/src/freebsd-lock.h new file mode 100644 index 0000000..571fd0a --- /dev/null +++ b/src/freebsd-lock.h @@ -0,0 +1,3 @@ + +#define _papi_hwd_lock(a) { ; } +#define _papi_hwd_unlock(a) { ; } diff --git a/src/freebsd-memory.c b/src/freebsd-memory.c new file mode 100644 index 0000000..bbd6601 --- /dev/null +++ b/src/freebsd-memory.c @@ -0,0 +1,60 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: freebsd-memory.c +* Author: Harald Servat +* redcrash@gmail.com +* Mod: James Ralph +* ralph@cs.utk.edu +*/ + +#include "papi.h" +#include "papi_internal.h" + +#include "x86_cpuid_info.h" + +#define UNREFERENCED(x) (void)x + + +#if defined(__i386__)||defined(__x86_64__) +static int +x86_get_memory_info( PAPI_hw_info_t *hw_info ) +{ + int retval = PAPI_OK; + + switch ( hw_info->vendor ) { + case PAPI_VENDOR_AMD: + case PAPI_VENDOR_INTEL: + retval = _x86_cache_info( &hw_info->mem_hierarchy ); + break; + default: + PAPIERROR( "Unknown vendor in memory information call for x86." ); + return PAPI_ENOIMPL; + } + return retval; +} +#endif + + +int +_freebsd_get_memory_info( PAPI_hw_info_t *hw_info, int id) +{ + UNREFERENCED(id); + UNREFERENCED(hw_info); + +#if defined(__i386__)||defined(__x86_64__) + x86_get_memory_info( hw_info ); +#endif + + return PAPI_ENOIMPL; +} + +int _papi_freebsd_get_dmem_info(PAPI_dmem_info_t *d) +{ + /* TODO */ + d->pagesize = getpagesize(); + return PAPI_OK; +} + diff --git a/src/freebsd-memory.h b/src/freebsd-memory.h new file mode 100644 index 0000000..1defbc0 --- /dev/null +++ b/src/freebsd-memory.h @@ -0,0 +1,3 @@ +int _freebsd_get_memory_info( PAPI_hw_info_t *hw_info, int id); +int _papi_freebsd_get_dmem_info(PAPI_dmem_info_t *d); + diff --git a/src/freebsd.c b/src/freebsd.c new file mode 100644 index 0000000..08288ca --- /dev/null +++ b/src/freebsd.c @@ -0,0 +1,983 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: freebsd.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include +#include +#include +#include + +#include "papi.h" + +#include "papi_internal.h" + +#include "papi_lock.h" +#include "freebsd.h" +#include "papi_vector.h" + +#include "map.h" + +#include "freebsd-memory.h" +#include "x86_cpuid_info.h" + +/* Global values referenced externally */ +PAPI_os_info_t _papi_os_info; + +/* Advance Declarations */ +papi_vector_t _papi_freebsd_vector; +long long _papi_freebsd_get_real_cycles(void); +int _papi_freebsd_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len); + + +/* For debugging */ + +static void show_counter(char *string, int id, char *name, + const char *function, char *file, int line) { + +#if defined(DEBUG) + pmc_value_t tmp_value; + int ret = pmc_read (id, &tmp_value); + + fprintf(stderr,"%s\n",string); + if (ret < 0) { + fprintf (stderr, "DEBUG: Unable to read counter %s (ID: %08x) " + "on routine %s (file: %s, line: %d)\n", + name, id, function,file,line); + } else { + fprintf (stderr, "DEBUG: Read counter %s (ID: %08x) - " + "value %llu on routine %s (file: %s, line: %d)\n", + name, id, (long long unsigned int)tmp_value, + function, file, line); + } +#else + (void) string; (void)name; + (void)id; (void)function; (void)file; (void)line; +#endif +} + + +static hwd_libpmc_context_t Context; + + +/* + * This function is an internal function and not exposed and thus + * it can be called anything you want as long as the information + * is setup in _papi_freebsd_init_component. Below is some, but not + * all of the values that will need to be setup. For a complete + * list check out papi_mdi_t, though some of the values are setup + * and used above the component level. + */ +int init_mdi(void) +{ + const struct pmc_cpuinfo *info; + + SUBDBG("Entering\n"); + + /* Initialize PMC library */ + if (pmc_init() < 0) + return PAPI_ESYS; + + if (pmc_cpuinfo (&info) != 0) + return PAPI_ESYS; + + if (info != NULL) + { + /* Get CPU clock rate from HW.CLOCKRATE sysctl value, and + MODEL from HW.MODEL */ + int mib[5]; + size_t len; + int hw_clockrate; + char hw_model[PAPI_MAX_STR_LEN]; + +#if !defined(__i386__) && !defined(__amd64__) + Context.use_rdtsc = FALSE; +#else + /* Ok, I386s/AMD64s can use RDTSC. But be careful, if the cpufreq + module is loaded, then CPU frequency can vary and this method + does not work properly! We'll use use_rdtsc to know if this + method is available */ + len = 5; + Context.use_rdtsc = sysctlnametomib ("dev.cpufreq.0.%driver", mib, &len) == -1; +#endif + + len = 3; + if (sysctlnametomib ("hw.clockrate", mib, &len) == -1) + return PAPI_ESYS; + len = sizeof(hw_clockrate); + if (sysctl (mib, 2, &hw_clockrate, &len, NULL, 0) == -1) + return PAPI_ESYS; + + len = 3; + if (sysctlnametomib ("hw.model", mib, &len) == -1) + return PAPI_ESYS; + len = PAPI_MAX_STR_LEN; + if (sysctl (mib, 2, &hw_model, &len, NULL, 0) == -1) + return PAPI_ESYS; + + /*strcpy (_papi_hwi_system_info.hw_info.vendor_string, pmc_name_of_cputype(info->pm_cputype));*/ + sprintf (_papi_hwi_system_info.hw_info.vendor_string, "%s (TSC:%c)", pmc_name_of_cputype(info->pm_cputype), Context.use_rdtsc?'Y':'N'); + strcpy (_papi_hwi_system_info.hw_info.model_string, hw_model); + _papi_hwi_system_info.hw_info.mhz = (float) hw_clockrate; + _papi_hwi_system_info.hw_info.cpu_max_mhz = hw_clockrate; + _papi_hwi_system_info.hw_info.cpu_min_mhz = hw_clockrate; + _papi_hwi_system_info.hw_info.ncpu = info->pm_ncpu; + _papi_hwi_system_info.hw_info.nnodes = 1; + _papi_hwi_system_info.hw_info.totalcpus = info->pm_ncpu; + /* Right now, PMC states that TSC is an additional counter. However + it's only available as a system-wide counter and this requires + root access */ + _papi_freebsd_vector.cmp_info.num_cntrs = info->pm_npmc - 1; + + if ( strstr(pmc_name_of_cputype(info->pm_cputype), "INTEL")) + _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_INTEL; + else if ( strstr(pmc_name_of_cputype(info->pm_cputype), "AMD")) + _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_AMD; + else + fprintf(stderr,"We didn't actually find a supported vendor...\n\n\n"); + } + else + return PAPI_ESYS; + + return 1; +} + + +int init_presets(int cidx) +{ + const struct pmc_cpuinfo *info; + + SUBDBG("Entering\n"); + + if (pmc_cpuinfo (&info) != 0) + return PAPI_ESYS; + + init_freebsd_libpmc_mappings(); + + if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_P6") == 0) + Context.CPUtype = CPU_P6; + + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_PII") == 0) + Context.CPUtype = CPU_P6_2; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_PIII") == 0) + Context.CPUtype = CPU_P6_3; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_CL") == 0) + Context.CPUtype = CPU_P6_C; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_PM") == 0) + Context.CPUtype = CPU_P6_M; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "AMD_K7") == 0) + Context.CPUtype = CPU_K7; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "AMD_K8") == 0) + Context.CPUtype = CPU_K8; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_PIV") == 0) + Context.CPUtype = CPU_P4; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_ATOM") == 0) + Context.CPUtype = CPU_ATOM; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_CORE") == 0) + Context.CPUtype = CPU_CORE; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_CORE2") == 0) + Context.CPUtype = CPU_CORE2; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_CORE2EXTREME") == 0) + Context.CPUtype = CPU_CORE2EXTREME; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_COREI7") == 0) + Context.CPUtype = CPU_COREI7; + else if (strcmp(pmc_name_of_cputype(info->pm_cputype), "INTEL_WESTMERE") == 0) + Context.CPUtype = CPU_COREWESTMERE; + else + /* Unknown processor! */ + Context.CPUtype = CPU_UNKNOWN; + + + _papi_freebsd_vector.cmp_info.num_native_events = freebsd_number_of_events (Context.CPUtype); + _papi_freebsd_vector.cmp_info.attach = 0; + + _papi_load_preset_table((char *)pmc_name_of_cputype(info->pm_cputype), + 0,cidx); + + return 0; +} + +/* + * Component setup and shutdown + */ + +/* Initialize hardware counters, setup the function vector table + * and get hardware information, this routine is called when the + * PAPI process is initialized (IE PAPI_library_init) + */ +int _papi_freebsd_init_component(int cidx) +{ + (void)cidx; + + int retval; + + SUBDBG("Entering\n"); + + /* Internal function, doesn't necessarily need to be a function */ + retval=init_presets(cidx); + + return retval; +} + + + + +/* + * This is called whenever a thread is initialized + */ +int _papi_freebsd_init_thread(hwd_context_t *ctx) +{ + (void)ctx; + SUBDBG("Entering\n"); + return PAPI_OK; +} + +int _papi_freebsd_shutdown_thread(hwd_context_t *ctx) +{ + (void)ctx; + SUBDBG("Entering\n"); + return PAPI_OK; +} + +int _papi_freebsd_shutdown_component(void) +{ + SUBDBG("Entering\n"); + return PAPI_OK; +} + + +/* + * Control of counters (Reading/Writing/Starting/Stopping/Setup) + * functions + */ +int _papi_freebsd_init_control_state(hwd_control_state_t *ptr) +{ + /* We will default to gather counters in USER|KERNEL mode */ + SUBDBG("Entering\n"); + ptr->hwc_domain = PAPI_DOM_USER|PAPI_DOM_KERNEL; + ptr->pmcs = NULL; + ptr->counters = NULL; + ptr->n_counters = 0; + return PAPI_OK; +} + +int _papi_freebsd_update_control_state(hwd_control_state_t *ptr, NativeInfo_t *native, int count, hwd_context_t *ctx) +{ + char name[1024]; + int i; + int res; + (void)ctx; + + SUBDBG("Entering\n"); + + /* We're going to store which counters are being used in this EventSet. + As this ptr structure can be reused within many PAPI_add_event calls, + and domain can change we will reconstruct the table of counters + (ptr->counters) everytime where here. + */ + if (ptr->counters != NULL && ptr->n_counters > 0) + { + for (i = 0; i < ptr->n_counters; i++) + if (ptr->counters[i] != NULL) + free (ptr->counters[i]); + free (ptr->counters); + } + if (ptr->pmcs != NULL) + free (ptr->pmcs); + if (ptr->values != NULL) + free (ptr->values); + if (ptr->caps != NULL) + free (ptr->caps); + + ptr->n_counters = count; + ptr->pmcs = (pmc_id_t*) malloc (sizeof(pmc_id_t)*count); + ptr->caps = (uint32_t*) malloc (sizeof(uint32_t)*count); + ptr->values = (pmc_value_t*) malloc (sizeof(pmc_value_t)*count); + ptr->counters = (char **) malloc (sizeof(char*)*count); + for (i = 0; i < count; i++) + ptr->counters[i] = NULL; + + for (i = 0; i < count; i++) + { + res = _papi_freebsd_ntv_code_to_name (native[i].ni_event, name, sizeof(name)); + if (res != PAPI_OK) + return res; + + native[i].ni_position = i; + + /* Domains can be applied to canonical events in libpmc (not "generic") */ + if (Context.CPUtype != CPU_UNKNOWN) + { + if (ptr->hwc_domain == (PAPI_DOM_USER|PAPI_DOM_KERNEL)) + { + /* PMC defaults domain to OS & User. So simply copy the name of the counter */ + ptr->counters[i] = strdup (name); + if (ptr->counters[i] == NULL) + return PAPI_ESYS; + } + else if (ptr->hwc_domain == PAPI_DOM_USER) + { + /* This is user-domain case. Just add unitmask=usr */ + ptr->counters[i] = malloc ((strlen(name)+strlen(",usr")+1)*sizeof(char)); + if (ptr->counters[i] == NULL) + return PAPI_ESYS; + sprintf (ptr->counters[i], "%s,usr", name); + } + else /* if (ptr->hwc_domain == PAPI_DOM_KERNEL) */ + { + /* This is the last case. Just add unitmask=os */ + ptr->counters[i] = malloc ((strlen(name)+strlen(",os")+1)*sizeof(char)); + if (ptr->counters[i] == NULL) + return PAPI_ESYS; + sprintf (ptr->counters[i], "%s,os", name); + } + } + else + { + /* PMC defaults domain to OS & User. So simply copy the name of the counter */ + ptr->counters[i] = strdup (name); + if (ptr->counters[i] == NULL) + return PAPI_ESYS; + } + } + + return PAPI_OK; +} + +int _papi_freebsd_start(hwd_context_t *ctx, hwd_control_state_t *ctrl) +{ + int i, ret; + (void)ctx; + + SUBDBG("Entering\n"); + + for (i = 0; i < ctrl->n_counters; i++) + { + if ((ret = pmc_allocate (ctrl->counters[i], PMC_MODE_TC, 0, PMC_CPU_ANY, &(ctrl->pmcs[i]))) < 0) + { +#if defined(DEBUG) + /* This shouldn't happen, it's tested previously on _papi_freebsd_allocate_registers */ + fprintf (stderr, "DEBUG: %s FAILED to allocate '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + if ((ret = pmc_capabilities (ctrl->pmcs[i],&(ctrl->caps[i]))) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to get capabilites for '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + ctrl->caps[i] = 0; + } +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s got counter '%s' is %swrittable! [%d of %d]\n", FUNC, ctrl->counters[i], (ctrl->caps[i]&PMC_CAP_WRITE)?"":"NOT", i+1, ctrl->n_counters); +#endif + if ((ret = pmc_start (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to start '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + } + return PAPI_OK; +} + +int _papi_freebsd_read(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **events, int flags) +{ + int i, ret; + (void)ctx; + (void)flags; + + SUBDBG("Entering\n"); + + for (i = 0; i < ctrl->n_counters; i++) + if ((ret = pmc_read (ctrl->pmcs[i], &(ctrl->values[i]))) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to read '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + *events = (long long *)ctrl->values; + +#if defined(DEBUG) + for (i = 0; i < ctrl->n_counters; i++) + fprintf (stderr, "DEBUG: %s counter '%s' has value %lld\n", + FUNC, ctrl->counters[i], (long long)ctrl->values[i]); +#endif + return PAPI_OK; +} + +int _papi_freebsd_stop(hwd_context_t *ctx, hwd_control_state_t *ctrl) +{ + int i, ret; + (void)ctx; + + SUBDBG("Entering\n"); + + for (i = 0; i < ctrl->n_counters; i++) + { + if ((ret = pmc_stop (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to stop '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + if ((ret = pmc_release (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + /* This shouldn't happen, it's tested previously on _papi_freebsd_allocate_registers */ + fprintf (stderr, "DEBUG: %s FAILED to release '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + } + return PAPI_OK; +} + +int _papi_freebsd_reset(hwd_context_t *ctx, hwd_control_state_t *ctrl) +{ + int i, ret; + (void)ctx; + + SUBDBG("Entering\n"); + + for (i = 0; i < ctrl->n_counters; i++) + { + /* Can we write on the counters? */ + if (ctrl->caps[i] & PMC_CAP_WRITE) + { + show_counter("DEBUG: _papi_freebsd_reset is about " + "to stop the counter i+1", + ctrl->pmcs[i],ctrl->counters[i], + __FUNCTION__,__FILE__,__LINE__); + + if ((ret = pmc_stop (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to stop '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + + show_counter( + "DEBUG: _papi_freebsd_reset is about " + "to write the counter i+1\n", + ctrl->pmcs[i],ctrl->counters[i], + __FUNCTION__,__FILE__,__LINE__); + + if ((ret = pmc_write (ctrl->pmcs[i], 0)) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to write '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + + show_counter("DEBUG: _papi_freebsd_reset is about to " + "start the counter %i+1", + ctrl->pmcs[i],ctrl->counters[i], + __FUNCTION__,__FILE__,__LINE__); + + if ((ret = pmc_start (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to start '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + + show_counter("DEBUG: _papi_freebsd_reset after " + "starting the counter i+1", + ctrl->pmcs[i],ctrl->counters[i], + __FUNCTION__,__FILE__,__LINE__); + + } + else + return PAPI_ECMP; + } + return PAPI_OK; +} + +int _papi_freebsd_write(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long *from) +{ + int i, ret; + (void)ctx; + + SUBDBG("Entering\n"); + + for (i = 0; i < ctrl->n_counters; i++) + { + /* Can we write on the counters? */ + if (ctrl->caps[i] & PMC_CAP_WRITE) + { + if ((ret = pmc_stop (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to stop '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + if ((ret = pmc_write (ctrl->pmcs[i], from[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to write '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + if ((ret = pmc_start (ctrl->pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to stop '%s' [%d of %d] ERROR = %d\n", FUNC, ctrl->counters[i], i+1, ctrl->n_counters, ret); +#endif + return PAPI_ESYS; + } + } + else + return PAPI_ECMP; + } + return PAPI_OK; +} + +/* + * Overflow and profile functions + */ +void _papi_freebsd_dispatch_timer(int signal, hwd_siginfo_t * info, void *context) +{ + (void)signal; + (void)info; + (void)context; + /* Real function would call the function below with the proper args + * _papi_hwi_dispatch_overflow_signal(...); + */ + SUBDBG("Entering\n"); + return; +} + +int _papi_freebsd_stop_profiling(ThreadInfo_t *master, EventSetInfo_t *ESI) +{ + (void)master; + (void)ESI; + SUBDBG("Entering\n"); + return PAPI_OK; +} + +int _papi_freebsd_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold) +{ + (void)ESI; + (void)EventIndex; + (void)threshold; + SUBDBG("Entering\n"); + return PAPI_OK; +} + +int _papi_freebsd_set_profile(EventSetInfo_t *ESI, int EventIndex, int threashold) +{ + (void)ESI; + (void)EventIndex; + (void)threashold; + SUBDBG("Entering\n"); + return PAPI_OK; +} + +/* + * Functions for setting up various options + */ + +/* + * This function has to set the bits needed to count different domains + * In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER + * By default return PAPI_EINVAL if none of those are specified + * and PAPI_OK with success + * PAPI_DOM_USER is only user context is counted + * PAPI_DOM_KERNEL is only the Kernel/OS context is counted + * PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) + * PAPI_DOM_ALL is all of the domains + */ +int _papi_freebsd_set_domain(hwd_control_state_t *cntrl, int domain) +{ + int found = 0; + + SUBDBG("Entering\n"); + /* libpmc supports USER/KERNEL mode only when counters are native */ + if (Context.CPUtype != CPU_UNKNOWN) + { + if (domain & (PAPI_DOM_USER|PAPI_DOM_KERNEL)) + { + cntrl->hwc_domain = domain & (PAPI_DOM_USER|PAPI_DOM_KERNEL); + found = 1; + } + return found?PAPI_OK:PAPI_EINVAL; + } + else + return PAPI_ECMP; +} + + +/* This function sets various options in the component + * The valid codes being passed in are PAPI_SET_DEFDOM, + * PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL * and PAPI_SET_INHERIT + */ +int _papi_freebsd_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option) +{ + (void)ctx; + SUBDBG("Entering\n"); + switch (code) + { + case PAPI_DOMAIN: + case PAPI_DEFDOM: + /*return _papi_freebsd_set_domain(&option->domain.ESI->machdep, option->domain.domain);*/ + return _papi_freebsd_set_domain(option->domain.ESI->ctl_state, option->domain.domain); + case PAPI_GRANUL: + case PAPI_DEFGRN: + return PAPI_ECMP; + default: + return PAPI_EINVAL; + } +} + + +/* + * Timing Routines + * These functions should return the highest resolution timers available. + */ +long long _papi_freebsd_get_real_usec(void) +{ + /* Hey, I've seen somewhere a define called __x86_64__! Should I support it? */ +#if !defined(__i386__) && !defined(__amd64__) + /* This will surely work, but with low precision and high overhead */ + struct rusage res; + + SUBDBG("Entering\n"); + if ((getrusage(RUSAGE_SELF, &res) == -1)) + return PAPI_ESYS; + return (res.ru_utime.tv_sec * 1000000) + res.ru_utime.tv_usec; +#else + SUBDBG("Entering\n"); + if (Context.use_rdtsc) + { + return _papi_freebsd_get_real_cycles() / _papi_hwi_system_info.hw_info.cpu_max_mhz; + } + else + { + struct rusage res; + if ((getrusage(RUSAGE_SELF, &res) == -1)) + return PAPI_ESYS; + return (res.ru_utime.tv_sec * 1000000) + res.ru_utime.tv_usec; + } +#endif +} + + +long long _papi_freebsd_get_real_cycles(void) +{ + /* Hey, I've seen somewhere a define called __x86_64__! Should I support it? */ +#if !defined(__i386__) && !defined(__amd64__) + SUBDBG("Entering\n"); + /* This will surely work, but with low precision and high overhead */ + return ((long long) _papi_freebsd_get_real_usec() * _papi_hwi_system_info.hw_info.cpu_max_mhz); +#else + SUBDBG("Entering\n"); + if (Context.use_rdtsc) + { + long long cycles; + __asm __volatile(".byte 0x0f, 0x31" : "=A" (cycles)); + return cycles; + } + else + { + return ((long long) _papi_freebsd_get_real_usec() * _papi_hwi_system_info.hw_info.cpu_max_mhz); + } +#endif +} + + + +long long _papi_freebsd_get_virt_usec(void) +{ + struct rusage res; + + SUBDBG("Entering\n"); + + if ((getrusage(RUSAGE_SELF, &res) == -1)) + return PAPI_ESYS; + return (res.ru_utime.tv_sec * 1000000) + res.ru_utime.tv_usec; +} + +/* + * Native Event functions + */ + + +int _papi_freebsd_ntv_enum_events(unsigned int *EventCode, int modifier) +{ + int res; + char name[1024]; + unsigned int nextCode = 1 + *EventCode; + + SUBDBG("Entering\n"); + + if (modifier==PAPI_ENUM_FIRST) { + + *EventCode=0; + + return PAPI_OK; + } + + if (modifier==PAPI_ENUM_EVENTS) { + + res = _papi_freebsd_ntv_code_to_name(nextCode, name, sizeof(name)); + if (res != PAPI_OK) { + return res; + } else { + *EventCode = nextCode; + } + return PAPI_OK; + } + + return PAPI_ENOEVNT; + +} + +int _papi_freebsd_ntv_name_to_code(const char *name, unsigned int *event_code) { + + SUBDBG("Entering\n"); + + int i; + + for(i = 0; i < _papi_freebsd_vector.cmp_info.num_native_events; i++) { + if (strcmp (name, _papi_hwd_native_info[Context.CPUtype].info[i].name) == 0) { + *event_code = i; + return PAPI_OK; + } + } + return PAPI_ENOEVNT; +} + +int _papi_freebsd_ntv_code_to_name(unsigned int EventCode, char *ntv_name, + int len) +{ + SUBDBG("Entering\n"); + + int nidx; + + nidx = EventCode & PAPI_NATIVE_AND_MASK; + + if (nidx >= _papi_freebsd_vector.cmp_info.num_native_events) { + return PAPI_ENOEVNT; + } + + strncpy (ntv_name, + _papi_hwd_native_info[Context.CPUtype].info[nidx].name, len); + if (strlen(_papi_hwd_native_info[Context.CPUtype].info[nidx].name) > (size_t)len-1) { + return PAPI_EBUF; + } + return PAPI_OK; +} + +int _papi_freebsd_ntv_code_to_descr(unsigned int EventCode, char *descr, int len) +{ + SUBDBG("Entering\n"); + int nidx; + + nidx = EventCode & PAPI_NATIVE_AND_MASK; + if (nidx >= _papi_freebsd_vector.cmp_info.num_native_events) { + return PAPI_ENOEVNT; + } + + strncpy (descr, _papi_hwd_native_info[Context.CPUtype].info[nidx].description, len); + if (strlen(_papi_hwd_native_info[Context.CPUtype].info[nidx].description) > (size_t)len-1) { + return PAPI_EBUF; + } + return PAPI_OK; +} + + +/* + * Counter Allocation Functions, only need to implement if + * the component needs smart counter allocation. + */ + +/* Here we'll check if PMC can provide all the counters the user want */ +int _papi_freebsd_allocate_registers (EventSetInfo_t *ESI) +{ + char name[1024]; + int failed, allocated_counters, i, j, ret; + pmc_id_t *pmcs; + + SUBDBG("Entering\n"); + + failed = 0; + pmcs = (pmc_id_t*) malloc(sizeof(pmc_id_t)*ESI->NativeCount); + if (pmcs != NULL) + { + allocated_counters = 0; + /* Check if we can allocate all the counters needed */ + for (i = 0; i < ESI->NativeCount; i++) + { + ret = _papi_freebsd_ntv_code_to_name (ESI->NativeInfoArray[i].ni_event, name, sizeof(name)); + if (ret != PAPI_OK) + return ret; + if ( (ret = pmc_allocate (name, PMC_MODE_TC, 0, PMC_CPU_ANY, &pmcs[i])) < 0) + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s FAILED to allocate '%s' (%#08x) [%d of %d] ERROR = %d\n", FUNC, name, ESI->NativeInfoArray[i].ni_event, i+1, ESI->NativeCount, ret); +#endif + failed = 1; + break; + } + else + { +#if defined(DEBUG) + fprintf (stderr, "DEBUG: %s SUCCEEDED allocating '%s' (%#08x) [%d of %d]\n", FUNC, name, ESI->NativeInfoArray[i].ni_event, i+1, ESI->NativeCount); +#endif + allocated_counters++; + } + } + /* Free the counters */ + for (j = 0; j < allocated_counters; j++) + pmc_release (pmcs[j]); + free (pmcs); + } + else + failed = 1; + + return failed?PAPI_ECNFLCT:PAPI_OK; +} + +/* + * Shared Library Information and other Information Functions + */ +int _papi_freebsd_update_shlib_info(papi_mdi_t *mdi){ + SUBDBG("Entering\n"); + (void)mdi; + return PAPI_OK; +} + + + +int +_papi_freebsd_detect_hypervisor(char *virtual_vendor_name) { + + int retval=0; + +#if defined(__i386__)||defined(__x86_64__) + retval=_x86_detect_hypervisor(virtual_vendor_name); +#else + (void) virtual_vendor_name; +#endif + + return retval; +} + + + +int +_papi_freebsd_get_system_info( papi_mdi_t *mdi ) { + + int retval; + + retval=_freebsd_get_memory_info(&mdi->hw_info, mdi->hw_info.model ); + + /* Get virtualization info */ + mdi->hw_info.virtualized=_papi_freebsd_detect_hypervisor(mdi->hw_info.virtual_vendor_string); + + + return PAPI_OK; + +} + +int +_papi_hwi_init_os(void) { + + struct utsname uname_buffer; + + /* Internal function, doesn't necessarily need to be a function */ + init_mdi(); + + uname(&uname_buffer); + + strncpy(_papi_os_info.name,uname_buffer.sysname,PAPI_MAX_STR_LEN); + + strncpy(_papi_os_info.version,uname_buffer.release,PAPI_MAX_STR_LEN); + + _papi_os_info.itimer_sig = PAPI_INT_MPX_SIGNAL; + _papi_os_info.itimer_num = PAPI_INT_ITIMER; + _papi_os_info.itimer_ns = PAPI_INT_MPX_DEF_US * 1000; /* Not actually supported */ + _papi_os_info.itimer_res_ns = 1; + + _papi_freebsd_get_system_info(&_papi_hwi_system_info); + + return PAPI_OK; +} + +papi_vector_t _papi_freebsd_vector = { + .cmp_info = { + /* default component information (unspecified values are initialized to 0) */ + .name = "FreeBSD", + .description = "FreeBSD CPU counters", + .default_domain = PAPI_DOM_USER, + .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, + .default_granularity = PAPI_GRN_THR, + .available_granularities = PAPI_GRN_THR, + + .hardware_intr = 1, + .kernel_multiplex = 1, + .kernel_profile = 1, + .num_mpx_cntrs = HWPMC_NUM_COUNTERS, /* ?? */ + .hardware_intr_sig = PAPI_INT_SIGNAL, + + /* component specific cmp_info initializations */ + .fast_real_timer = 1, + .fast_virtual_timer = 0, + .attach = 0, + .attach_must_ptrace = 0, + } , + .size = { + .context = sizeof( hwd_context_t ), + .control_state = sizeof( hwd_control_state_t ), + .reg_value = sizeof( hwd_register_t ), + .reg_alloc = sizeof( hwd_reg_alloc_t ) + }, + + .dispatch_timer = _papi_freebsd_dispatch_timer, + .start = _papi_freebsd_start, + .stop = _papi_freebsd_stop, + .read = _papi_freebsd_read, + .reset = _papi_freebsd_reset, + .write = _papi_freebsd_write, + .stop_profiling = _papi_freebsd_stop_profiling, + .init_component = _papi_freebsd_init_component, + .init_thread = _papi_freebsd_init_thread, + .init_control_state = _papi_freebsd_init_control_state, + .update_control_state = _papi_freebsd_update_control_state, + .ctl = _papi_freebsd_ctl, + .set_overflow = _papi_freebsd_set_overflow, + .set_profile = _papi_freebsd_set_profile, + .set_domain = _papi_freebsd_set_domain, + + .ntv_enum_events = _papi_freebsd_ntv_enum_events, + .ntv_name_to_code = _papi_freebsd_ntv_name_to_code, + .ntv_code_to_name = _papi_freebsd_ntv_code_to_name, + .ntv_code_to_descr = _papi_freebsd_ntv_code_to_descr, + + .allocate_registers = _papi_freebsd_allocate_registers, + + .shutdown_thread = _papi_freebsd_shutdown_thread, + .shutdown_component = _papi_freebsd_shutdown_component, +}; + +papi_os_vector_t _papi_os_vector = { + .get_dmem_info = _papi_freebsd_get_dmem_info, + .get_real_cycles = _papi_freebsd_get_real_cycles, + .get_real_usec = _papi_freebsd_get_real_usec, + .get_virt_usec = _papi_freebsd_get_virt_usec, + .update_shlib_info = _papi_freebsd_update_shlib_info, + .get_system_info = _papi_freebsd_get_system_info, +}; diff --git a/src/freebsd.h b/src/freebsd.h new file mode 100644 index 0000000..2a8b60d --- /dev/null +++ b/src/freebsd.h @@ -0,0 +1,75 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ +/* +* File: freebsd-libpmc.c +* Author: Kevin London +* london@cs.utk.edu +* Mods: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef _PAPI_FreeBSD_H +#define _PAPI_FreeBSD_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "papi.h" +#include + +#include "freebsd-config.h" + +#define MAX_COUNTERS HWPMC_NUM_COUNTERS +#define MAX_COUNTER_TERMS MAX_COUNTERS + +#undef hwd_siginfo_t +#undef hwd_register_t +#undef hwd_reg_alloc_t +#undef hwd_control_state_t +#undef hwd_context_t +#undef hwd_libpmc_context_t + +typedef struct hwd_siginfo { + int placeholder; +} hwd_siginfo_t; + +typedef struct hwd_register { + int placeholder; +} hwd_register_t; + +typedef struct hwd_reg_alloc { + int placeholder; +} hwd_reg_alloc_t; + +typedef struct hwd_control_state { + int n_counters; /* Number of counters */ + int hwc_domain; /* HWC domain {user|kernel} */ + unsigned *caps; /* Capabilities for each counter */ + pmc_id_t *pmcs; /* PMC identifiers */ + pmc_value_t *values; /* Stored values for each counter */ + char **counters; /* Name of each counter (with mode) */ +} hwd_control_state_t; + +typedef struct hwd_context { + int placeholder; +} hwd_context_t; + +#include "freebsd-context.h" + +typedef struct hwd_libpmc_context { + int CPUtype; + int use_rdtsc; +} hwd_libpmc_context_t; + +#define _papi_hwd_lock_init() { ; } + +#endif /* _PAPI_FreeBSD_H */ diff --git a/src/freebsd/map-atom.c b/src/freebsd/map-atom.c new file mode 100644 index 0000000..a8f3258 --- /dev/null +++ b/src/freebsd/map-atom.c @@ -0,0 +1,259 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-atom.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + ATOM SUBSTRATE + ATOM SUBSTRATE + ATOM SUBSTRATE + ATOM SUBSTRATE + ATOM SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_AtomProcessor must match AtomProcessor_info +*/ + +Native_Event_LabelDescription_t AtomProcessor_info[] = +{ + {"BACLEARS", "The number of times the front end is resteered."}, + {"BOGUS_BR", "The number of byte sequences mistakenly detected as taken branch instructions."}, + {"BR_BAC_MISSP_EXEC", "The number of branch instructions that were mispredicted when decoded."}, + {"BR_CALL_MISSP_EXEC", "The number of mispredicted CALL instructions that were executed."}, + {"BR_CALL_EXEC", "The number of CALL instructions executed."}, + {"BR_CND_EXEC", "The number of conditional branches executed, but not necessarily retired."}, + {"BR_CND_MISSP_EXEC", "The number of mispredicted conditional branches executed."}, + {"BR_IND_CALL_EXEC", "The number of indirect CALL instructions executed."}, + {"BR_IND_EXEC", "The number of indirect branch instructions executed."}, + {"BR_IND_MISSP_EXEC", "The number of mispredicted indirect branch instructions executed."}, + {"BR_INST_DECODED", "The number of branch instructions decoded."}, + {"BR_INST_EXEC", "The number of branches executed, but not necessarily retired."}, + {"BR_INST_RETIRED.ANY", "The number of branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.ANY1", "The number of branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.MISPRED", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.MISPRED_NOT_TAKEN", "The number of not taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.MISPRED_TAKEN", "The number taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.PRED_NOT_TAKEN", "The number of not taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.PRED_TAKEN", "The number of taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.TAKEN", "The number of taken branch instructions retired."}, + {"BR_MISSP_EXEC", "The number of mispredicted branch instructions that were executed."}, + {"BR_RET_MISSP_EXEC", "The number of mispredicted RET instructions executed."}, + {"BR_RET_BAC_MISSP_EXEC", "The number of RET instructions executed that were mispredicted at decode time."}, + {"BR_RET_EXEC", "The number of RET instructions executed."}, + {"BR_TKN_BUBBLE_1", "The number of branch predicted taken with bubble 1."}, + {"BR_TKN_BUBBLE_2", "The number of branch predicted taken with bubble 2."}, + {"BUSQ_EMPTY", "The number of cycles during which the core did not have any pending transactions in the bus queue."}, + {"BUS_BNR_DRV", "The number of Bus Not Ready signals asserted on the bus. This event is thread-independent."}, + {"BUS_DATA_RCV", "The number of bus cycles during which the processor is receiving data. This event is thread-independent."}, + {"BUS_DRDY_CLOCKS", "The number of bus cycles during which the Data Ready signal is asserted on the bus. This event is thread-independent."}, + {"BUS_HIT_DRV", "The number of bus cycles during which the processor drives the HIT# pin. This event is thread-independent."}, + {"BUS_HITM_DRV", "The number of bus cycles during which the processor drives the HITM# pin. This event is thread-independent."}, + {"BUS_IO_WAIT", "The number of core cycles during which I/O requests wait in the bus queue."}, + {"BUS_LOCK_CLOCKS", "The number of bus cycles during which the LOCK signal was asserted on the bus. This event is thread independent."}, + {"BUS_REQUEST_OUTSTANDING", "The number of pending full cache line read transactions on the bus occuring in each cycle. This event is thread independent."}, + {"BUS_TRANS_P", "The number of partial bus transactions."}, + {"BUS_TRANS_IFETCH", "The number of instruction fetch full cache line bus transactions."}, + {"BUS_TRANS_INVAL", "The number of invalidate bus transactions."}, + {"BUS_TRANS_PWR", "The number of partial write bus transactions."}, + {"BUS_TRANS_DEF", "The number of deferred bus transactions."}, + {"BUS_TRANS_BURST", "The number of burst transactions."}, + {"BUS_TRANS_MEM", "The number of memory bus transactions."}, + {"BUS_TRANS_ANY", "The number of bus transactions of any kind."}, + {"BUS_TRANS_BRD", "The number of burst read transactions."}, + {"BUS_TRANS_IO", "The number of completed I/O bus transaactions due to IN and OUT instructions."}, + {"BUS_TRANS_RFO", "The number of Read For Ownership bus transactions."}, + {"BUS_TRANS_WB", "The number explicit writeback bus transactions due to dirty line evictions."}, + {"CMP_SNOOP", "The number of times the L1 data cache is snooped by the other core in the same processor."}, + {"CPU_CLK_UNHALTED.BUS", "The number of bus cycles when the core is not in the halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.CORE_P", "The number of core cycles while the core is not in a halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.NO_OTHER", "The number of bus cycles during which the core remains unhalted and the other core is halted."}, + {"CYCLES_DIV_BUSY", "The number of cycles the divider is busy."}, + {"CYCLES_INT_MASKED.CYCLES_INT_MASKED", "The number of cycles during which interrupts are disabled."}, + {"CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED", "The number of cycles during which there were pending interrupts while interrupts were disabled."}, + {"CYCLES_L1I_MEM_STALLED", "The number of cycles for which an instruction fetch stalls."}, + {"DATA_TLB_MISSES.DTLB_MISS", "The number of memory access that missed the Data TLB"}, + {"DATA_TLB_MISSES.DTLB_MISS_LD", "The number of loads that missed the Data TLB."}, + {"DATA_TLB_MISSES.DTLB_MISS_ST", "The number of stores that missed the Data TLB."}, + {"DATA_TLB_MISSES.UTLB_MISS_LD", "The number of loads that missed the UTLB."}, + {"DELAYED_BYPASS.FP", "The number of floating point operations that used data immediately after the data was generated by a non floating point execution unit."}, + {"DELAYED_BYPASS.LOAD", "The number of delayed bypass penalty cycles that a load operation incurred."}, + {"DELAYED_BYPASS.SIMD", "The number of times SIMD operations use data immediately after data, was generated by a non-SIMD execution unit."}, + {"DIV", "The number of divide operations executed. This event is only available on PMC1."}, + {"DIV.AR", "The number of divide operations retired."}, + {"DIV.S", "The number of divide operations executed."}, + {"DTLB_MISSES.ANY", "The number of Data TLB misses, including misses that result from speculative accesses."}, + {"DTLB_MISSES.L0_MISS_LD", "The number of level 0 DTLB misses due to load operations."}, + {"DTLB_MISSES.MISS_LD", "The number of Data TLB misses due to load operations."}, + {"DTLB_MISSES.MISS_ST", "The number of Data TLB misses due to store operations."}, + {"EIST_TRANS", "The number of Enhanced Intel SpeedStep Technology transitions."}, + {"ESP.ADDITIONS", "The number of automatic additions to the esp register."}, + {"ESP.SYNCH", "The number of times the esp register was explicitly used in an address expression after it is implicitly used by a PUSH or POP instruction."}, + {"EXT_SNOOP", "The number of snoop responses to bus transactions."}, + {"FP_ASSIST", "The number of floating point operations executed that needed a microcode assist, including speculatively executed instructions."}, + {"FP_ASSIST.AR", "The number of floating point operations retired that needed a microcode assist."}, + {"FP_COMP_OPS_EXE", "The number of floating point computational micro-ops executed. The event is available only on PMC0."}, + {"FP_MMX_TRANS_TO_FP", "The number of transitions from MMX instructions to floating point instructions."}, + {"FP_MMX_TRANS_TO_MMX", "The number of transitions from floating point instructions to MMX instructions."}, + {"HW_INT_RCV", "The number of hardware interrupts recieved."}, + {"ICACHE.ACCESSES", "The number of instruction fetches."}, + {"ICACHE.MISSES", "The number of instruction fetches that miss the instruction cache."}, + {"IDLE_DURING_DIV", "The number of cycles the divider is busy and no other execution unit or load operation was in progress. This event is available only on PMC0."}, + {"ILD_STALL", "The number of cycles the instruction length decoder stalled due to a length changing prefix."}, + {"INST_QUEUE.FULL", "The number of cycles during which the instruction queue is full."}, + {"INST_RETIRED.ANY_P", "The number of instructions retired. This is an architectural performance event."}, + {"INST_RETIRED.LOADS", "The number of instructions retired that contained a load operation."}, + {"INST_RETIRED.OTHER", "The number of instructions retired that did not contain a load or a store operation."}, + {"INST_RETIRED.STORES", "The number of instructions retired that contained a store operation."}, + {"ITLB.FLUSH", "The number of ITLB flushes."}, + {"ITLB.LARGE_MISS", "The number of instruction fetches from large pages that miss the ITLB."}, + {"ITLB.MISSES", "The number of instruction fetches from both large and small pages that miss the ITLB."}, + {"ITLB.SMALL_MISS", "The number of instruction fetches from small pages that miss the ITLB."}, + {"ITLB_MISS_RETIRED", "The number of retired instructions that missed the ITLB when they were fetched."}, + {"L1D_ALL_REF", "The number of references to L1 data cache counting loads and stores of to all memory types."}, + {"L1D_ALL_CACHE_REF", "The number of data reads and writes to cacheable memory."}, + {"L1D_CACHE_LOCK", "The number of locked reads from cacheable memory."}, + {"L1D_CACHE_LOCK_DURATION", "The number of cycles during which any cache line is locked by any locking instruction."}, + {"L1D_CACHE.LD", "The number of data reads from cacheable memory."}, + {"L1D_CACHE.ST", "The number of data writes to cacheable memory."}, + {"L1D_M_EVICT", "The number of modified cache lines evicted from L1 data cache."}, + {"L1D_M_REPL", "The number of modified lines allocated in L1 data cache."}, + {"L1D_PEND_MISS", "The total number of outstanding L1 data cache misses at any clock."}, + {"L1D_PREFETCH.REQUESTS", "The number of times L1 data cache requested to prefetch a data cache line."}, + {"L1D_REPL", "The number of lines brought into L1 data cache."}, + {"L1D_SPLIT.LOADS", "The number of load operations that span two cache lines."}, + {"L1D_SPLIT.STORES", "The number of store operations that span two cache lines."}, + {"L1I_MISSES", "The number of instruction fetch unit misses."}, + {"L1I_READS", "The number of instruction fetches."}, + {"L2_ADS", "The number of cycles that the L2 address bus is in use."}, + {"L2_DBUS_BUSY_RD", "The number of core cycles during which the L2 data bus is busy transferring data to the core."}, + {"L2_IFETCH", "The number of instruction cache line requests from the instruction fetch unit."}, + {"L2_LD", "The number of L2 cache read requests from L1 cache and L2 prefetchers."}, + {"L2_LINES_IN", "The number of cache lines allocated in L2 cache."}, + {"L2_LINES_OUT", "The number of L2 cache lines evicted."}, + {"L2_LOCK", "The number of locked accesses to cache lines that miss L1 data cache."}, + {"L2_M_LINES_IN", "The number of L2 cache line modifications."}, + {"L2_M_LINES_OUT", "The number of modified lines evicted from L2 cache."}, + {"L2_NO_REQ", "The number of cycles during which no L2 cache requests were pending from a core."}, + {"L2_REJECT_BUSQ", "The number of L2 cache requests that were rejected."}, + {"L2_RQSTS", "The number of completed L2 cache requests."}, + {"L2_RQSTS.SELF.DEMAND.I_STATE", "The number of completed L2 cache demand requests from this core that missed the L2 cache. This is an architectural performance event."}, + {"L2_RQSTS.SELF.DEMAND.MESI", "The number of completed L2 cache demand requests from this core."}, + {"L2_ST", "The number of store operations that miss the L1 cache and request data from the L2 cache."}, + {"LOAD_BLOCK.L1D", "The number of loads blocked by the L1 data cache."}, + {"LOAD_BLOCK.OVERLAP_STORE", "The number of loads that partially overlap an earlier store or are aliased with a previous store."}, + {"LOAD_BLOCK.STA", "The number of loads blocked by preceding stores whose address is yet to be calculated."}, + {"LOAD_BLOCK.STD", "The number of loads blocked by preceding stores to the same address whose data value is not known."}, + {"LOAD_BLOCK.UNTIL_RETIRE", "The numer of load operations that were blocked until retirement."}, + {"LOAD_HIT_PRE", "The number of load operations that conflicted with an prefetch to the same cache line."}, + {"MACHINE_CLEARS.SMC", "The number of times a program writes to a code section."}, + {"MACHINE_NUKES.MEM_ORDER", "The number of times the execution pipeline was restarted due to a memory ordering conflict or memory disambiguation misprediction."}, + {"MACRO_INSTS.ALL_DECODED", "The number of instructions decoded."}, + {"MACRO_INSTS.CISC_DECODED", "The number of complex instructions decoded."}, + {"MEMORY_DISAMBIGUATION.RESET", "The number of cycles during which memory disambiguation misprediction occurs."}, + {"MEMORY_DISAMBIGUATION.SUCCESS", "The number of load operations that were successfully disambiguated."}, + {"MEM_LOAD_RETIRED.DTLB_MISS", "The number of retired load operations that missed the DTLB."}, + {"MEM_LOAD_RETIRED.L2_MISS", "The number of retired load operations that miss L2 cache."}, + {"MEM_LOAD_RETIRED.L2_HIT", "The number of retired load operations that hit L2 cache."}, + {"MEM_LOAD_RETIRED.L2_LINE_MISS", "The number of load operations that missed L2 cache and that caused a bus request."}, + {"MUL", "The number of multiply operations executed. This event is only available on PMC1."}, + {"MUL.AR", "The number of multiply operations retired."}, + {"MUL.S", "The number of multiply operations executed."}, + {"PAGE_WALKS.WALKS", "The number of page walks executed due to an ITLB or DTLB miss."}, + {"PAGE_WALKS.CYCLES", "The number of cycles spent in a page walk caused by an ITLB or DTLB miss."}, + {"PREF_RQSTS_DN", "The number of downward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"PREF_RQSTS_UP", "The number of upward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"PREFETCH.PREFETCHNTA", "The number of PREFETCHNTA instructions executed."}, + {"PREFETCH.PREFETCHT0", "The number of PREFETCHT0 instructions executed."}, + {"PREFETCH.SW_L2", "The number of PREFETCHT1 and PREFETCHT2 instructions executed."}, + {"RAT_STALLS.ANY", "The number of stall cycles due to any of RAT_STALLS.FLAGS RAT_STALLS.FPSW, RAT_STALLS.PARTIAL and RAT_STALLS.ROB_READ_PORT."}, + {"RAT_STALLS.FLAGS", "The number of cycles execution stalled due to a flag register induced stall."}, + {"RAT_STALLS.FPSW", "The number of times the floating point status word was written."}, + {"RAT_STALLS.PARTIAL_CYCLES", "The number of cycles of added instruction execution latency due to the use of a register that was partially written by previous instructions."}, + {"RAT_STALLS.ROB_READ_PORT", "The number of cycles when ROB read port stalls occurred."}, + {"RESOURCE_STALLS.ANY", "The number of cycles during which any resource related stall occurred."}, + {"RESOURCE_STALLS.BR_MISS_CLEAR", "The number of cycles stalled due to branch misprediction."}, + {"RESOURCE_STALLS.FPCW", "The number of cycles stalled due to writing the floating point control word."}, + {"RESOURCE_STALLS.LD_ST", "The number of cycles during which the number of loads and stores in the pipeline exceeded their limits."}, + {"RESOURCE_STALLS.ROB_FULL", "The number of cycles when the reorder buffer was full."}, + {"RESOURCE_STALLS.RS_FULL", "The number of cycles during which the RS was full."}, + {"RS_UOPS_DISPATCHED", "The number of micro-ops dispatched for execution."}, + {"RS_UOPS_DISPATCHED.PORT0", "The number of cycles micro-ops were dispatched for execution on port 0."}, + {"RS_UOPS_DISPATCHED.PORT1", "The number of cycles micro-ops were dispatched for execution on port 1."}, + {"RS_UOPS_DISPATCHED.PORT2", "The number of cycles micro-ops were dispatched for execution on port 2."}, + {"RS_UOPS_DISPATCHED.PORT3", "The number of cycles micro-ops were dispatched for execution on port 3."}, + {"RS_UOPS_DISPATCHED.PORT4", "The number of cycles micro-ops were dispatched for execution on port 4."}, + {"RS_UOPS_DISPATCHED.PORT5", "The number of cycles micro-ops were dispatched for execution on port 5."}, + {"SB_DRAIN_CYCLES", "The number of cycles while the store buffer is draining."}, + {"SEGMENT_REG_LOADS.ANY", "The number of segment register loads."}, + {"SEG_REG_RENAMES.ANY", "The number of times the any segment register was renamed."}, + {"SEG_REG_RENAMES.DS", "The number of times the ds register is renamed."}, + {"SEG_REG_RENAMES.ES", "The number of times the es register is renamed."}, + {"SEG_REG_RENAMES.FS", "The number of times the fs register is renamed."}, + {"SEG_REG_RENAMES.GS", "The number of times the gs register is renamed."}, + {"SEG_RENAME_STALLS.ANY", "The number of stalls due to lack of resource to rename any segment register."}, + {"SEG_RENAME_STALLS.DS", "The number of stalls due to lack of renaming resources for the ds register."}, + {"SEG_RENAME_STALLS.ES", "The number of stalls due to lack of renaming resources for the es register."}, + {"SEG_RENAME_STALLS.FS", "The number of stalls due to lack of renaming resources for the fs register."}, + {"SEG_RENAME_STALLS.GS", "The number of stalls due to lack of renaming resources for the gs register."}, + {"SIMD_ASSIST", "The number SIMD assists invoked."}, + {"SIMD_COMP_INST_RETIRED.PACKED_DOUBLE", "Then number of computational SSE2 packed double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.PACKED_SINGLE", "Then number of computational SSE2 packed single precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE", "Then number of computational SSE2 scalar double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_SINGLE", "Then number of computational SSE2 scalar single precision instructions retired."}, + {"SIMD_INSTR_RETIRED", "The number of retired SIMD instructions that use MMX registers."}, + {"SIMD_INST_RETIRED.ANY", "The number of streaming SIMD instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_DOUBLE", "The number of SSE2 packed double precision instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_SINGLE", "The number of SSE packed single precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_DOUBLE", "The number of SSE2 scalar double precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_SINGLE", "The number of SSE scalar single precision instructions retired."}, + {"SIMD_INST_RETIRED.VECTOR", "The number of SSE2 vector instructions retired."}, + {"SIMD_SAT_INSTR_RETIRED", "The number of saturated arithmetic SIMD instructions retired."}, + {"SIMD_SAT_UOP_EXEC.AR", "The number of SIMD saturated arithmetic micro-ops retired."}, + {"SIMD_SAT_UOP_EXEC.S", "The number of SIMD saturated arithmetic micro-ops executed."}, + {"SIMD_UOPS_EXEC.AR", "The number of SIMD micro-ops retired."}, + {"SIMD_UOPS_EXEC.S", "The number of SIMD micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.ARITHMETIC.AR", "The number of SIMD packed arithmetic micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.ARITHMETIC.S", "The number of SIMD packed arithmetic micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.LOGICAL.AR", "The number of SIMD packed logical microops executed."}, + {"SIMD_UOP_TYPE_EXEC.LOGICAL.S", "The number of SIMD packed logical microops executed."}, + {"SIMD_UOP_TYPE_EXEC.MUL.AR", "The number of SIMD packed multiply microops retired."}, + {"SIMD_UOP_TYPE_EXEC.MUL.S", "The number of SIMD packed multiply microops executed."}, + {"SIMD_UOP_TYPE_EXEC.PACK.AR", "The number of SIMD pack micro-ops retired."}, + {"SIMD_UOP_TYPE_EXEC.PACK.S", "The number of SIMD pack micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.SHIFT.AR", "The number of SIMD packed shift micro-ops retired."}, + {"SIMD_UOP_TYPE_EXEC.SHIFT.S", "The number of SIMD packed shift micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.UNPACK.AR", "The number of SIMD unpack micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.UNPACK.S", "The number of SIMD unpack micro-ops executed."}, + {"SNOOP_STALL_DRV", "The number of times the bus stalled for snoops. This event is thread-independent."}, + {"SSE_PRE_EXEC.L2", "The number of PREFETCHT1 instructions executed."}, + {"SSE_PRE_EXEC.STORES", "The number of times SSE non-temporal store instructions were executed."}, + {"SSE_PRE_MISS.L1", "The number of times the PREFETCHT0 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.L2", "The number of times the PREFETCHT1 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.NTA", "The number of times the PREFETCHNTA instruction executed and missed all cache levels."}, + {"STORE_BLOCK.ORDER", "The number of cycles while a store was waiting for another store to be globally observed."}, + {"STORE_BLOCK.SNOOP", "The number of cycles while a store was blocked due to a conflict with an internal or external snoop."}, + {"STORE_FORWARDS.GOOD", "The number of times stored data was forwarded directly to a load."}, + {"THERMAL_TRIP", "The number of thermal trips."}, + {"UOPS_RETIRED.LD_IND_BR", "The number of micro-ops retired that fused a load with another operation."}, + {"UOPS_RETIRED.STD_STA", "The number of store address calculations that fused into one micro-op."}, + {"UOPS_RETIRED.MACRO_FUSION", "The number of times retired instruction pairs were fused into one micro-op."}, + {"UOPS_RETIRED.FUSED", "The number of fused micro-ops retired."}, + {"UOPS_RETIRED.NON_FUSED", "The number of non-fused micro-ops retired."}, + {"UOPS_RETIRED.ANY", "The number of micro-ops retired."}, + {"X87_COMP_OPS_EXE.ANY.AR", "The number of x87 floating-point computational micro-ops retired."}, + {"X87_COMP_OPS_EXE.ANY.S", "The number of x87 floating-point computational micro-ops executed."}, + {"X87_OPS_RETIRED.ANY", "The number of floating point computational instructions retired."}, + {"X87_OPS_RETIRED.FXCH", "The number of FXCH instructions retired."}, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-atom.h b/src/freebsd/map-atom.h new file mode 100644 index 0000000..e9c01d1 --- /dev/null +++ b/src/freebsd/map-atom.h @@ -0,0 +1,250 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-atom.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_ATOM +#define FreeBSD_MAP_ATOM + +enum NativeEvent_Value_AtomProcessor { + PNE_ATOM_BACLEARS = PAPI_NATIVE_MASK, + PNE_ATOM_BOGUS_BR, + PNE_ATOM_BR_BAC_MISSP_EXEC, + PNE_ATOM_BR_CALL_MISSP_EXEC, + PNE_ATOM_BR_CALL_EXEC, + PNE_ATOM_BR_CND_EXEC, + PNE_ATOM_BR_CND_MISSP_EXEC, + PNE_ATOM_BR_IND_CALL_EXEC, + PNE_ATOM_BR_IND_EXEC, + PNE_ATOM_BR_IND_MISSP_EXEC, + PNE_ATOM_BR_INST_DECODED, + PNE_ATOM_BR_INST_EXEC, + PNE_ATOM_BR_INST_RETIRED_ANY, + PNE_ATOM_BR_INST_RETIRED_ANY1, + PNE_ATOM_BR_INST_RETIRED_MISPRED, + PNE_ATOM_BR_INST_RETIRED_MISPRED_NOT_TAKEN, + PNE_ATOM_BR_INST_RETIRED_MISPRED_TAKEN, + PNE_ATOM_BR_INST_RETIRED_PRED_NOT_TAKEN, + PNE_ATOM_BR_INST_RETIRED_PRED_TAKEN, + PNE_ATOM_BR_INST_RETIRED_TAKEN, + PNE_ATOM_BR_MISSP_EXEC, + PNE_ATOM_BR_RET_MISSP_EXEC, + PNE_ATOM_BR_RET_BAC_MISSP_EXEC, + PNE_ATOM_BR_RET_EXEC, + PNE_ATOM_BR_TKN_BUBBLE_1, + PNE_ATOM_BR_TKN_BUBBLE_2, + PNE_ATOM_BUSQ_EMPTY, + PNE_ATOM_BUS_BNR_DRV, + PNE_ATOM_BUS_DATA_RCV, + PNE_ATOM_BUS_DRDY_CLOCKS, + PNE_ATOM_BUS_HIT_DRV, + PNE_ATOM_BUS_HITM_DRV, + PNE_ATOM_BUS_IO_WAIT, + PNE_ATOM_BUS_LOCK_CLOCKS, + PNE_ATOM_BUS_REQUEST_OUTSTANDING, + PNE_ATOM_BUS_TRANS_P, + PNE_ATOM_BUS_TRANS_IFETCH, + PNE_ATOM_BUS_TRANS_INVAL, + PNE_ATOM_BUS_TRANS_PWR, + PNE_ATOM_BUS_TRANS_DEF, + PNE_ATOM_BUS_TRANS_BURST, + PNE_ATOM_BUS_TRANS_MEM, + PNE_ATOM_BUS_TRANS_ANY, + PNE_ATOM_BUS_TRANS_BRD, + PNE_ATOM_BUS_TRANS_IO, + PNE_ATOM_BUS_TRANS_RFO, + PNE_ATOM_BUS_TRANS_WB, + PNE_ATOM_CMP_SNOOP, + PNE_ATOM_CPU_CLK_UNHALTED_BUS, + PNE_ATOM_CPU_CLK_UNHALTED_CORE_P, + PNE_ATOM_CPU_CLK_UNHALTED_NO_OTHER, + PNE_ATOM_CYCLES_DIV_BUSY, + PNE_ATOM_CYCLES_INT_MASKED_CYCLES_INT_MASKED, + PNE_ATOM_CYCLES_INT_MASKED_CYCLES_INT_PENDING_AND_MASKED, + PNE_ATOM_CYCLES_L1I_MEM_STALLED, + PNE_ATOM_DATA_TLB_MISSES_DTLB_MISS, + PNE_ATOM_DATA_TLB_MISSES_DTLB_MISS_LD, + PNE_ATOM_DATA_TLB_MISSES_DTLB_MISS_ST, + PNE_ATOM_DATA_TLB_MISSES_UTLB_MISS_LD, + PNE_ATOM_DELAYED_BYPASS_FP, + PNE_ATOM_DELAYED_BYPASS_LOAD, + PNE_ATOM_DELAYED_BYPASS_SIMD, + PNE_ATOM_DIV, + PNE_ATOM_DIV_AR, + PNE_ATOM_DIV_S, + PNE_ATOM_DTLB_MISSES_ANY, + PNE_ATOM_DTLB_MISSES_L0_MISS_LD, + PNE_ATOM_DTLB_MISSES_MISS_LD, + PNE_ATOM_DTLB_MISSES_MISS_ST, + PNE_ATOM_EIST_TRANS, + PNE_ATOM_ESP_ADDITIONS, + PNE_ATOM_ESP_SYNCH, + PNE_ATOM_EXT_SNOOP, + PNE_ATOM_FP_ASSIST, + PNE_ATOM_FP_ASSIST_AR, + PNE_ATOM_FP_COMP_OPS_EXE, + PNE_ATOM_FP_MMX_TRANS_TO_FP, + PNE_ATOM_FP_MMX_TRANS_TO_MMX, + PNE_ATOM_HW_INT_RCV, + PNE_ATOM_ICACHE_ACCESSES, + PNE_ATOM_ICACHE_MISSES, + PNE_ATOM_IDLE_DURING_DIV, + PNE_ATOM_ILD_STALL, + PNE_ATOM_INST_QUEUE_FULL, + PNE_ATOM_INST_RETIRED_ANY_P, + PNE_ATOM_INST_RETIRED_LOADS, + PNE_ATOM_INST_RETIRED_OTHER, + PNE_ATOM_INST_RETIRED_STORES, + PNE_ATOM_ITLB_FLUSH, + PNE_ATOM_ITLB_LARGE_MISS, + PNE_ATOM_ITLB_MISSES, + PNE_ATOM_ITLB_SMALL_MISS, + PNE_ATOM_ITLB_MISS_RETIRED, + PNE_ATOM_L1D_ALL_REF, + PNE_ATOM_L1D_ALL_CACHE_REF, + PNE_ATOM_L1D_CACHE_LOCK, + PNE_ATOM_L1D_CACHE_LOCK_DURATION, + PNE_ATOM_L1D_CACHE_LD, + PNE_ATOM_L1D_CACHE_ST, + PNE_ATOM_L1D_M_EVICT, + PNE_ATOM_L1D_M_REPL, + PNE_ATOM_L1D_PEND_MISS, + PNE_ATOM_L1D_PREFETCH_REQUESTS, + PNE_ATOM_L1D_REPL, + PNE_ATOM_L1D_SPLIT_LOADS, + PNE_ATOM_L1D_SPLIT_STORES, + PNE_ATOM_L1I_MISSES, + PNE_ATOM_L1I_READS, + PNE_ATOM_L2_ADS, + PNE_ATOM_L2_DBUS_BUSY_RD, + PNE_ATOM_L2_IFETCH, + PNE_ATOM_L2_LD, + PNE_ATOM_L2_LINES_IN, + PNE_ATOM_L2_LINES_OUT, + PNE_ATOM_L2_LOCK, + PNE_ATOM_L2_M_LINES_IN, + PNE_ATOM_L2_M_LINES_OUT, + PNE_ATOM_L2_NO_REQ, + PNE_ATOM_L2_REJECT_BUSQ, + PNE_ATOM_L2_RQSTS, + PNE_ATOM_L2_RQSTS_SELF_DEMAND_I_STATE, + PNE_ATOM_L2_RQSTS_SELF_DEMAND_MESI, + PNE_ATOM_L2_ST, + PNE_ATOM_LOAD_BLOCK_L1D, + PNE_ATOM_LOAD_BLOCK_OVERLAP_STORE, + PNE_ATOM_LOAD_BLOCK_STA, + PNE_ATOM_LOAD_BLOCK_STD, + PNE_ATOM_LOAD_BLOCK_UNTIL_RETIRE, + PNE_ATOM_LOAD_HIT_PRE, + PNE_ATOM_MACHINE_CLEARS_SMC, + PNE_ATOM_MACHINE_NUKES_MEM_ORDER, + PNE_ATOM_MACRO_INSTS_ALL_DECODED, + PNE_ATOM_MACRO_INSTS_CISC_DECODED, + PNE_ATOM_MEMORY_DISAMBIGUATION_RESET, + PNE_ATOM_MEMORY_DISAMBIGUATION_SUCCESS, + PNE_ATOM_MEM_LOAD_RETIRED_DTLB_MISS, + PNE_ATOM_MEM_LOAD_RETIRED_L2_MISS, + PNE_ATOM_MEM_LOAD_RETIRED_L2_HIT, + PNE_ATOM_MEM_LOAD_RETIRED_L2_LINE_MISS, + PNE_ATOM_MUL, + PNE_ATOM_MUL_AR, + PNE_ATOM_MUL_S, + PNE_ATOM_PAGE_WALKS_WALKS, + PNE_ATOM_PAGE_WALKS_CYCLES, + PNE_ATOM_PREF_RQSTS_DN, + PNE_ATOM_PREF_RQSTS_UP, + PNE_ATOM_PREFETCH_PREFETCHNTA, + PNE_ATOM_PREFETCH_PREFETCHT0, + PNE_ATOM_PREFETCH_SW_L2, + PNE_ATOM_RAT_STALLS_ANY, + PNE_ATOM_RAT_STALLS_FLAGS, + PNE_ATOM_RAT_STALLS_FPSW, + PNE_ATOM_RAT_STALLS_PARTIAL_CYCLES, + PNE_ATOM_RAT_STALLS_ROB_READ_PORT, + PNE_ATOM_RESOURCE_STALLS_ANY, + PNE_ATOM_RESOURCE_STALLS_BR_MISS_CLEAR, + PNE_ATOM_RESOURCE_STALLS_FPCW, + PNE_ATOM_RESOURCE_STALLS_LD_ST, + PNE_ATOM_RESOURCE_STALLS_ROB_FULL, + PNE_ATOM_RESOURCE_STALLS_RS_FULL, + PNE_ATOM_RS_UOPS_DISPATCHED, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT0, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT1, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT2, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT3, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT4, + PNE_ATOM_RS_UOPS_DISPATCHED_PORT5, + PNE_ATOM_SB_DRAIN_CYCLES, + PNE_ATOM_SEGMENT_REG_LOADS_ANY, + PNE_ATOM_SEG_REG_RENAMES_ANY, + PNE_ATOM_SEG_REG_RENAMES_DS, + PNE_ATOM_SEG_REG_RENAMES_ES, + PNE_ATOM_SEG_REG_RENAMES_FS, + PNE_ATOM_SEG_REG_RENAMES_GS, + PNE_ATOM_SEG_RENAME_STALLS_ANY, + PNE_ATOM_SEG_RENAME_STALLS_DS, + PNE_ATOM_SEG_RENAME_STALLS_ES, + PNE_ATOM_SEG_RENAME_STALLS_FS, + PNE_ATOM_SEG_RENAME_STALLS_GS, + PNE_ATOM_SIMD_ASSIST, + PNE_ATOM_SIMD_COMP_INST_RETIRED_PACKED_DOUBLE, + PNE_ATOM_SIMD_COMP_INST_RETIRED_PACKED_SINGLE, + PNE_ATOM_SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE, + PNE_ATOM_SIMD_COMP_INST_RETIRED_SCALAR_SINGLE, + PNE_ATOM_SIMD_INSTR_RETIRED, + PNE_ATOM_SIMD_INST_RETIRED_ANY, + PNE_ATOM_SIMD_INST_RETIRED_PACKED_DOUBLE, + PNE_ATOM_SIMD_INST_RETIRED_PACKED_SINGLE, + PNE_ATOM_SIMD_INST_RETIRED_SCALAR_DOUBLE, + PNE_ATOM_SIMD_INST_RETIRED_SCALAR_SINGLE, + PNE_ATOM_SIMD_INST_RETIRED_VECTOR, + PNE_ATOM_SIMD_SAT_INSTR_RETIRED, + PNE_ATOM_SIMD_SAT_UOP_EXEC_AR, + PNE_ATOM_SIMD_SAT_UOP_EXEC_S, + PNE_ATOM_SIMD_UOPS_EXEC_AR, + PNE_ATOM_SIMD_UOPS_EXEC_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_ARITHMETIC_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_ARITHMETIC_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_LOGICAL_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_LOGICAL_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_MUL_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_MUL_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_PACK_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_PACK_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_SHIFT_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_SHIFT_S, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_UNPACK_AR, + PNE_ATOM_SIMD_UOP_TYPE_EXEC_UNPACK_S, + PNE_ATOM_SNOOP_STALL_DRV, + PNE_ATOM_SSE_PRE_EXEC_L2, + PNE_ATOM_SSE_PRE_EXEC_STORES, + PNE_ATOM_SSE_PRE_MISS_L1, + PNE_ATOM_SSE_PRE_MISS_L2, + PNE_ATOM_SSE_PRE_MISS_NTA, + PNE_ATOM_STORE_BLOCK_ORDER, + PNE_ATOM_STORE_BLOCK_SNOOP, + PNE_ATOM_STORE_FORWARDS_GOOD, + PNE_ATOM_THERMAL_TRIP, + PNE_ATOM_UOPS_RETIRED_LD_IND_BR, + PNE_ATOM_UOPS_RETIRED_STD_STA, + PNE_ATOM_UOPS_RETIRED_MACRO_FUSION, + PNE_ATOM_UOPS_RETIRED_FUSED, + PNE_ATOM_UOPS_RETIRED_NON_FUSED, + PNE_ATOM_UOPS_RETIRED_ANY, + PNE_ATOM_X87_COMP_OPS_EXE_ANY_AR, + PNE_ATOM_X87_COMP_OPS_EXE_ANY_S, + PNE_ATOM_X87_OPS_RETIRED_ANY, + PNE_ATOM_X87_OPS_RETIRED_FXCH, + PNE_ATOM_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t AtomProcessor_info[]; +extern hwi_search_t AtomProcessor_map[]; + +#endif diff --git a/src/freebsd/map-core.c b/src/freebsd/map-core.c new file mode 100644 index 0000000..aaccd25 --- /dev/null +++ b/src/freebsd/map-core.c @@ -0,0 +1,163 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + CORE SUBSTRATE + CORE SUBSTRATE + CORE SUBSTRATE + CORE SUBSTRATE + CORE SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_CoreProcessor must match CoreProcessor_info +*/ + +Native_Event_LabelDescription_t CoreProcessor_info[] = +{ + {"BAClears", "The number of BAClear conditions asserted."}, + {"BTB_Misses", "The number of branches for which the branch table buffer did not produce a prediction."}, + {"Br_BAC_Missp_Exec", "The number of branch instructions executed that were mispredicted at the front end."}, + {"Br_Bogus", "The number of bogus branches."}, + {"Br_Call_Exec", "The number of CALL instructions executed."}, + {"Br_Call_Missp_Exec", "The number of CALL instructions executed that were mispredicted."}, + {"Br_Cnd_Exec", "The number of conditional branch instructions executed."}, + {"Br_Cnd_Missp_Exec", "The number of conditional branch instructions executed that were mispredicted."}, + {"Br_Ind_Call_Exec", "The number of indirect CALL instructions executed."}, + {"Br_Ind_Exec", "The number of indirect branches executed."}, + {"Br_Ind_Missp_Exec", "The number of indirect branch instructions executed that were mispredicted."}, + {"Br_Inst_Exec", "The number of branch instructions executed including speculative branches."}, + {"Br_Instr_Decoded", "The number of branch instructions decoded."}, + {"Br_Instr_Ret", "The number of branch instructions retired. This is an architectural performance event."}, + {"Br_MisPred_Ret", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, + {"Br_MisPred_Taken_Ret", "The number of taken and mispredicted branches retired."}, + {"Br_Missp_Exec", "The number of branch instructions executed and mispredicted at execution including branches that were not predicted."}, + {"Br_Ret_BAC_Missp_Exec", "The number of return branch instructions that were mispredicted at the front end."}, + {"Br_Ret_Exec", "The number of return branch instructions executed."}, + {"Br_Ret_Missp_Exec", "The number of return branch instructions executed that were mispredicted."}, + {"Br_Taken_Ret", "The number of taken branches retired."}, + {"Bus_BNR_Clocks", "was asserted."}, + {"Bus_DRDY_Clocks", "The number of external bus cycles while DRDY was asserted."}, + {"Bus_Data_Rcv", "The number of cycles during which the processor is busy receiving data."}, + {"Bus_Locks_Clocks", "The number of external bus cycles while the bus lock signal was asserted."}, + {"Bus_Not_In_Use", "The number of cycles when there is no transaction from the core."}, + {"Bus_Req_Outstanding", "The weighted cycles of cacheable bus data read requests from the data cache unit or hardware prefetcher."}, + {"Bus_Snoop_Stall", "The number bus cycles while a bus snoop is stalled."}, + {"Bus_Snoops", "The number of snoop responses to bus transactions."}, + {"Bus_Trans_Any", "The number of completed bus transactions."}, + {"Bus_Trans_Brd", "The number of read bus transactions."}, + {"Bus_Trans_Burst", "The number of completed burst transactions. Retried transactions may be counted more than once."}, + {"Bus_Trans_Def", "The number of completed deferred transactions."}, + {"Bus_Trans_IO", "The number of completed I/O transactions counting both reads and writes."}, + {"Bus_Trans_Ifetch", "Completed instruction fetch transactions."}, + {"Bus_Trans_Inval", "The number completed invalidate transactions."}, + {"Bus_Trans_Mem", "The number of completed memory transactions."}, + {"Bus_Trans_P", "The number of completed partial transactions."}, + {"Bus_Trans_Pwr", "The number of completed partial write transactions."}, + {"Bus_Trans_RFO", "The number of completed read-for-ownership transactions."}, + {"Bus_Trans_WB", "The number of completed writeback transactions from the data cache unit, excluding L2 writebacks."}, + {"Cycles_Div_Busy", "The number of cycles the divider is busy. The event is only available on PMC0."}, + {"Cycles_Int_Masked", "The number of cycles while interrupts were disabled."}, + {"Cycles_Int_Pending_Masked", "The number of cycles while interrupts were disabled and interrupts were pending."}, + {"DCU_Snoop_To_Share", "The number of data cache unit snoops to L1 cache lines in the shared state."}, + {"DCache_Cache_Lock", "The number of cacheable locked read operations to invalid state."}, + {"DCache_Cache_LD", "The number of cacheable L1 data read operations."}, + {"DCache_Cache_ST", "The number cacheable L1 data write operations."}, + {"DCache_M_Evict", "The number of M state data cache lines that were evicted."}, + {"DCache_M_Repl", "The number of M state data cache lines that were allocated."}, + {"DCache_Pend_Miss", "The weighted cycles an L1 miss was outstanding."}, + {"DCache_Repl", "The number of data cache line replacements."}, + {"Data_Mem_Cache_Ref", "The number of cacheable read and write operations to L1 data cache."}, + {"Data_Mem_Ref", "The number of L1 data reads and writes, both cacheable and uncacheable."}, + {"Dbus_Busy", "The number of core cycles during which the data bus was busy."}, + {"Dbus_Busy_Rd", "The nunber of cycles during which the data bus was busy transferring data to a core."}, + {"Div", "The number of divide operations including speculative operations for integer and floating point divides. This event can only be counted on PMC1."}, + {"Dtlb_Miss", "The number of data references that missed the TLB."}, + {"ESP_Uops", "The number of ESP folding instructions decoded."}, + {"EST_Trans", "Count the number of Intel Enhanced SpeedStep transitions."}, + {"FP_Assist", "The number of floating point operations that required microcode assists. The event is only available on PMC1."}, + {"FP_Comp_Instr_Ret", "The number of X87 floating point compute instructions retired. The event is only available on PMC0."}, + {"FP_Comps_Op_Exe", "The number of floating point computational instructions executed."}, + {"FP_MMX_Trans", "The number of transitions from X87 to MMX."}, + {"Fused_Ld_Uops_Ret", "The number of fused load uops retired."}, + {"Fused_St_Uops_Ret", "The number of fused store uops retired."}, + {"Fused_Uops_Ret", "The number of fused uops retired."}, + {"HW_Int_Rx", "The number of hardware interrupts received."}, + {"ICache_Misses", "The number of instruction fetch misses in the instruction cache and streaming buffers."}, + {"ICache_Reads", "The number of instruction fetches from the the instruction cache and streaming buffers counting both cacheable and uncacheable fetches."}, + {"IFU_Mem_Stall", "The number of cycles the instruction fetch unit was stalled while waiting for data from memory."}, + {"ILD_Stall", "The number of instruction length decoder stalls."}, + {"ITLB_Misses", "The number of instruction TLB misses."}, + {"Instr_Decoded", "The number of instructions decoded."}, + {"Instr_Ret", "The number of instructions retired. This is an architectural performance event."}, + {"L1_Pref_Req", "The number of L1 prefetch request due to data cache misses."}, + {"L2_ADS", "The number of L2 address strobes."}, + {"L2_IFetch", "The number of instruction fetches by the instruction fetch unit from L2 cache including speculative fetches."}, + {"L2_LD", "The number of L2 cache reads."}, + {"L2_Lines_In", "The number of L2 cache lines allocated."}, + {"L2_Lines_Out", "The number of L2 cache lines evicted."}, + {"L2_M_Lines_In", "The number of L2 M state cache lines allocated."}, + {"L2_M_Lines_Out", "The number of L2 M state cache lines evicted."}, + {"L2_No_Request_Cycles", "The number of cycles there was no request to access L2 cache."}, + {"L2_Reject_Cycles", "The number of cycles the L2 cache was busy and rejecting new requests."}, + {"L2_Rqsts", "The number of L2 cache requests."}, + {"L2_ST", "The number of L2 cache writes including speculative writes."}, + {"LD_Blocks", "The number of load operations delayed due to store buffer blocks."}, + {"LLC_Misses", "The number of cache misses for references to the last level cache, excluding misses due to hardware prefetches. This is an architectural performance event."}, + {"LLC_Reference", "The number of references to the last level cache, excluding those due to hardware prefetches. This is an architectural performance event."}, + {"MMX_Assist", "The number of EMMX instructions executed."}, + {"MMX_FP_Trans", "The number of transitions from MMX to X87."}, + {"MMX_Instr_Exec", "The number of MMX instructions executed excluding MOVQ and MOVD stores."}, + {"MMX_Instr_Ret", "The number of MMX instructions retired."}, + {"Misalign_Mem_Ref", "The number of misaligned data memory references, counting loads and stores."}, + {"Mul", "The number of multiply operations include speculative floating point and integer multiplies. This event is available on PMC1 only."}, + {"NonHlt_Ref_Cycles", "The number of non-halted bus cycles. This is an architectural performance event."}, + {"Pref_Rqsts_Dn", "The number of hardware prefetch requests issued in backward streams."}, + {"Pref_Rqsts_Up", "The number of hardware prefetch requests issued in forward streams."}, + {"Resource_Stall", "The number of cycles where there is a resource related stall."}, + {"SD_Drains", "The number of cycles while draining store buffers."}, + {"SIMD_FP_DP_P_Ret", "The number of SSE/SSE2 packed double precision instructions retired."}, + {"SIMD_FP_DP_P_Comp_Ret", "The number of SSE/SSE2 packed double precision compute instructions retired."}, + {"SIMD_FP_DP_S_Ret", "The number of SSE/SSE2 scalar double precision instructions retired."}, + {"SIMD_FP_DP_S_Comp_Ret", "The number of SSE/SSE2 scalar double precision compute instructions retired."}, + {"SIMD_FP_SP_P_Comp_Ret", "The number of SSE/SSE2 packed single precision compute instructions retired."}, + {"SIMD_FP_SP_Ret", "The number of SSE/SSE2 scalar single precision instructions retired, both packed and scalar."}, + {"SIMD_FP_SP_S_Ret", "The number of SSE/SSE2 scalar single precision instructions retired."}, + {"SIMD_FP_SP_S_Comp_Ret", "The number of SSE/SSE2 single precision compute instructions retired."}, + {"SIMD_Int_128_Ret", "The number of SSE2 128-bit integer instructions retired."}, + {"SIMD_Int_Pari_Exec", "The number of SIMD integer packed arithmetic instructions executed."}, + {"SIMD_Int_Pck_Exec", "The number of SIMD integer pack operations instructions executed."}, + {"SIMD_Int_Plog_Exec", "The number of SIMD integer packed logical instructions executed."}, + {"SIMD_Int_Pmul_Exec", "The number of SIMD integer packed multiply instructions executed."}, + {"SIMD_Int_Psft_Exec", "The number of SIMD integer packed shift instructions executed."}, + {"SIMD_Int_Sat_Exec", "The number of SIMD integer saturating instructions executed."}, + {"SIMD_Int_Upck_Exec", "The number of SIMD integer unpack instructions executed."}, + {"SMC_Detected", "The number of times self-modifying code was detected."}, + {"SSE_NTStores_Miss", "The number of times an SSE streaming store instruction missed all caches."}, + {"SSE_NTStores_Ret", "The number of SSE streaming store instructions executed."}, + {"SSE_PrefNta_Miss", "The number of times PREFETCHNTA missed all caches."}, + {"SSE_PrefNta_Ret", "The number of PREFETCHNTA instructions retired."}, + {"SSE_PrefT1_Miss", "The number of times PREFETCHT1 missed all caches."}, + {"SSE_PrefT1_Ret", "The number of PREFETCHT1 instructions retired."}, + {"SSE_PrefT2_Miss", "The number of times PREFETCHNT2 missed all caches."}, + {"SSE_PrefT2_Ret", "The number of PREFETCHT2 instructions retired."}, + {"Seg_Reg_Loads", "The number of segment register loads."}, + {"Serial_Execution_Cycles", "The number of non-halted bus cycles of this code while the other core was halted."}, + {"Thermal_Trip", "The duration in a thermal trip based on the current core clock."}, + {"Unfusion", "The number of unfusion events."}, + {"Unhalted_Core_Cycles", "The number of core clock cycles when the clock signal on a specific core is not halted. This is an architectural performance event."}, + {"Uops_Ret", "The number of micro-ops retired."}, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-core.h b/src/freebsd/map-core.h new file mode 100644 index 0000000..e74727c --- /dev/null +++ b/src/freebsd/map-core.h @@ -0,0 +1,155 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_CORE +#define FreeBSD_MAP_CORE + +enum NativeEvent_Value_CoreProcessor { + PNE_CORE_BACLEARS = PAPI_NATIVE_MASK, + PNE_CORE_BTB_MISSES, + PNE_CORE_BR_BAC_MISSP_EXEC, + PNE_CORE_BR_BOGUS, + PNE_CORE_BR_CALL_EXEC, + PNE_CORE_BR_CALL_MISSP_EXEC, + PNE_CORE_BR_CND_EXEC, + PNE_CORE_BR_CND_MISSP_EXEC, + PNE_CORE_BR_IND_CALL_EXEC, + PNE_CORE_BR_IND_EXEC, + PNE_CORE_BR_IND_MISSP_EXEC, + PNE_CORE_BR_INST_EXEC, + PNE_CORE_BR_INSTR_DECODED, + PNE_CORE_BR_INSTR_RET, + PNE_CORE_BR_MISPRED_RET, + PNE_CORE_BR_MISPRED_TAKEN_RET, + PNE_CORE_BR_MISSP_EXEC, + PNE_CORE_BR_RET_BAC_MISSP_EXEC, + PNE_CORE_BR_RET_EXEC, + PNE_CORE_BR_RET_MISSP_EXEC, + PNE_CORE_BR_TAKEN_RET, + PNE_CORE_BUS_BNR_CLOCKS, + PNE_CORE_BUS_DRDY_CLOCKS, + PNE_CORE_BUS_DATA_RCV, + PNE_CORE_BUS_LOCKS_CLOCKS, + PNE_CORE_BUS_NOT_IN_USE, + PNE_CORE_BUS_REQ_OUTSTANDING, + PNE_CORE_BUS_SNOOP_STALL, + PNE_CORE_BUS_SNOOPS, + PNE_CORE_BUS_TRANS_ANY, + PNE_CORE_BUS_TRANS_BRD, + PNE_CORE_BUS_TRANS_BURST, + PNE_CORE_BUS_TRANS_DEF, + PNE_CORE_BUS_TRANS_IO, + PNE_CORE_BUS_TRANS_IFETCH, + PNE_CORE_BUS_TRANS_INVAL, + PNE_CORE_BUS_TRANS_MEM, + PNE_CORE_BUS_TRANS_P, + PNE_CORE_BUS_TRANS_PWR, + PNE_CORE_BUS_TRANS_RFO, + PNE_CORE_BUS_TRANS_WB, + PNE_CORE_CYCLES_DIV_BUSY, + PNE_CORE_CYCLES_INT_MASKED, + PNE_CORE_CYCLES_INT_PENDING_MASKED, + PNE_CORE_DCU_SNOOP_TO_SHARE, + PNE_CORE_DCACHE_CACHE_LOCK, + PNE_CORE_DCACHE_CACHE_LD, + PNE_CORE_DCACHE_CACHE_ST, + PNE_CORE_DCACHE_M_EVICT, + PNE_CORE_DCACHE_M_REPL, + PNE_CORE_DCACHE_PEND_MISS, + PNE_CORE_DCACHE_REPL, + PNE_CORE_DATA_MEM_CACHE_REF, + PNE_CORE_DATA_MEM_REF, + PNE_CORE_DBUS_BUSY, + PNE_CORE_DBUS_BUSY_RD, + PNE_CORE_DIV, + PNE_CORE_DTLB_MISS, + PNE_CORE_ESP_UOPS, + PNE_CORE_EST_TRANS, + PNE_CORE_FP_ASSIST, + PNE_CORE_FP_COMP_INSTR_RET, + PNE_CORE_FP_COMPS_OP_EXE, + PNE_CORE_FP_MMX_TRANS, + PNE_CORE_FUSED_LD_UOPS_RET, + PNE_CORE_FUSED_ST_UOPS_RET, + PNE_CORE_FUSED_UOPS_RET, + PNE_CORE_HW_INT_RX, + PNE_CORE_ICACHE_MISSES, + PNE_CORE_ICACHE_READS, + PNE_CORE_IFU_MEM_STALL, + PNE_CORE_ILD_STALL, + PNE_CORE_ITLB_MISSES, + PNE_CORE_INSTR_DECODED, + PNE_CORE_INSTR_RET, + PNE_CORE_L1_PREF_REQ, + PNE_CORE_L2_ADS, + PNE_CORE_L2_IFETCH, + PNE_CORE_L2_LD, + PNE_CORE_L2_LINES_IN, + PNE_CORE_L2_LINES_OUT, + PNE_CORE_L2_M_LINES_IN, + PNE_CORE_L2_M_LINES_OUT, + PNE_CORE_L2_NO_REQUEST_CYCLES, + PNE_CORE_L2_REJECT_CYCLES, + PNE_CORE_L2_RQSTS, + PNE_CORE_L2_ST, + PNE_CORE_LD_BLOCKS, + PNE_CORE_LLC_MISSES, + PNE_CORE_LLC_REFERENCE, + PNE_CORE_MMX_ASSIST, + PNE_CORE_MMX_FP_TRANS, + PNE_CORE_MMX_INSTR_EXEC, + PNE_CORE_MMX_INSTR_RET, + PNE_CORE_MISALIGN_MEM_REF, + PNE_CORE_MUL, + PNE_CORE_NONHLT_REF_CYCLES, + PNE_CORE_PREF_RQSTS_DN, + PNE_CORE_PREF_RQSTS_UP, + PNE_CORE_RESOURCE_STALL, + PNE_CORE_SD_DRAINS, + PNE_CORE_SIMD_FP_DP_P_RET, + PNE_CORE_SIMD_FP_DP_P_COMP_RET, + PNE_CORE_SIMD_FP_DP_S_RET, + PNE_CORE_SIMD_FP_DP_S_COMP_RET, + PNE_CORE_SIMD_FP_SP_P_COMP_RET, + PNE_CORE_SIMD_FP_SP_RET, + PNE_CORE_SIMD_FP_SP_S_RET, + PNE_CORE_SIMD_FP_SP_S_COMP_RET, + PNE_CORE_SIMD_INT_128_RET, + PNE_CORE_SIMD_INT_PARI_EXEC, + PNE_CORE_SIMD_INT_PCK_EXEC, + PNE_CORE_SIMD_INT_PLOG_EXEC, + PNE_CORE_SIMD_INT_PMUL_EXEC, + PNE_CORE_SIMD_INT_PSFT_EXEC, + PNE_CORE_SIMD_INT_SAT_EXEC, + PNE_CORE_SIMD_INT_UPCK_EXEC, + PNE_CORE_SMC_DETECTED, + PNE_CORE_SSE_NTSTORES_MISS, + PNE_CORE_SSE_NTSTORES_RET, + PNE_CORE_SSE_PREFNTA_MISS, + PNE_CORE_SSE_PREFNTA_RET, + PNE_CORE_SSE_PREFT1_MISS, + PNE_CORE_SSE_PREFT1_RET, + PNE_CORE_SSE_PREFT2_MISS, + PNE_CORE_SSE_PREFT2_RET, + PNE_CORE_SEG_REG_LOADS, + PNE_CORE_SERIAL_EXECUTION_CYCLES, + PNE_CORE_THERMAL_TRIP, + PNE_CORE_UNFUSION, + PNE_CORE_UNHALTED_CORE_CYCLES, + PNE_CORE_UOPS_RET, + PNE_CORE_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t CoreProcessor_info[]; +extern hwi_search_t CoreProcessor_map[]; + +#endif + diff --git a/src/freebsd/map-core2-extreme.c b/src/freebsd/map-core2-extreme.c new file mode 100644 index 0000000..0a90cc6 --- /dev/null +++ b/src/freebsd/map-core2-extreme.c @@ -0,0 +1,241 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core2-extreme.c +* Author: George Neville-Neil +* gnn@freebsd.org +* Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + CORE2_EXTREME SUBSTRATE + CORE2_EXTREME SUBSTRATE + CORE2_EXTREME SUBSTRATE + CORE2_EXTREME SUBSTRATE + CORE2_EXTREME SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_Core2ExtremeProcessor must match Core2ExtremeProcessor_info +*/ + +Native_Event_LabelDescription_t Core2ExtremeProcessor_info[] = +{ + {"BACLEARS", "The number of times the front end is resteered."}, + {"BOGUS_BR", "The number of byte sequences mistakenly detected as taken branch instructions."}, + {"BR_BAC_MISSP_EXEC", "The number of branch instructions that were mispredicted when decoded."}, + {"BR_CALL_MISSP_EXEC", "The number of mispredicted CALL instructions that were executed."}, + {"BR_CALL_EXEC", "The number of CALL instructions executed."}, + {"BR_CND_EXEC", "The number of conditional branches executed, but not necessarily retired."}, + {"BR_CND_MISSP_EXEC", "The number of mispredicted conditional branches executed."}, + {"BR_IND_CALL_EXEC", "The number of indirect CALL instructions executed."}, + {"BR_IND_EXEC", "The number of indirect branch instructions executed."}, + {"BR_IND_MISSP_EXEC", "The number of mispredicted indirect branch instructions executed."}, + {"BR_INST_DECODED", "The number of branch instructions decoded."}, + {"BR_INST_EXEC", "The number of branches executed, but not necessarily retired."}, + {"BR_INST_RETIRED.ANY", "The number of branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.MISPRED", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.MISPRED_NOT_TAKEN", "The number of not taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.MISPRED_TAKEN", "The number taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.PRED_NOT_TAKEN", "The number of not taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.PRED_TAKEN", "The number of taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.TAKEN", "The number of taken branch instructions retired."}, + {"BR_MISSP_EXEC", "The number of mispredicted branch instructions that were executed."}, + {"BR_RET_MISSP_EXEC", "The number of mispredicted RET instructions executed."}, + {"BR_RET_BAC_MISSP_EXEC", "The number of RET instructions executed that were mispredicted at decode time."}, + {"BR_RET_EXEC", "The number of RET instructions executed."}, + {"BR_TKN_BUBBLE_1", "The number of branch predicted taken with bubble 1."}, + {"BR_TKN_BUBBLE_2", "The number of branch predicted taken with bubble 2."}, + {"BUSQ_EMPTY", "The number of cycles during which the core did not have any pending transactions in the bus queue."}, + {"BUS_BNR_DRV", "Number of Bus Not Ready signals asserted on the bus."}, + {"BUS_DATA_RCV", "Number of bus cycles during which the processor is receiving data."}, + {"BUS_DRDY_CLOCKS", "The number of bus cycles during which the Data Ready signal is asserted on the bus."}, + {"BUS_HIT_DRV", "The number of bus cycles during which the processor drives the HIT# pin."}, + {"BUS_HITM_DRV", "The number of bus cycles during which the processor drives the HITM# pin."}, + {"BUS_IO_WAIT", "The number of core cycles during which I/O requests wait in the bus queue."}, + {"BUS_LOCK_CLOCKS", "The number of bus cycles during which the LOCK signal was asserted on the bus."}, + {"BUS_REQUEST_OUTSTANDING", "The number of pending full cache line read transactions on the bus occuring in each cycle."}, + {"BUS_TRANS_ANY", "The number of bus transactions of any kind."}, + {"BUS_TRANS_BRD", "The number of burst read transactions."}, + {"BUS_TRANS_BURST", "The number of burst transactions."}, + {"BUS_TRANS_DEF", "The number of deferred bus transactions."}, + {"BUS_TRANS_IFETCH", "The number of instruction fetch full cache line bus transactions."}, + {"BUS_TRANS_INVAL", "The number of invalidate bus transactions."}, + {"BUS_TRANS_IO", "The number of completed I/O bus transaactions due to IN and OUT instructions."}, + {"BUS_TRANS_MEM", "The number of memory bus transactions."}, + {"BUS_TRANS_P", "The number of partial bus transactions."}, + {"BUS_TRANS_PWR", "The number of partial write bus transactions."}, + {"BUS_TRANS_RFO", "The number of Read For Ownership bus transactions."}, + {"BUS_TRANS_WB", "The number of explicit writeback bus transactions due to dirty line evictions."}, + {"CMP_SNOOP", "The number of times the L1 data cache is snooped by the other core in the same processor."}, + {"CPU_CLK_UNHALTED.BUS", "The number of bus cycles when the core is not in the halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.CORE_P", "The number of core cycles while the core is not in a halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.NO_OTHER", "The number of bus cycles during which the core remains unhalted and the other core is halted."}, + {"CYCLES_DIV_BUSY", "The number of cycles the divider is busy. This event is only available on PMC0."}, + {"CYCLES_INT_MASKED", "The number of cycles during which interrupts are disabled."}, + {"CYCLES_INT_PENDING_AND_MASKED", "The number of cycles during which there were pending interrupts while interrupts were disabled."}, + {"CYCLES_L1I_MEM_STALLED", "The number of cycles for which an instruction fetch stalls."}, + {"DELAYED_BYPASS.FP", "The number of floating point operations that used data immediately after the data was generated by a non floating point execution unit."}, + {"DELAYED_BYPASS.LOAD", "The number of delayed bypass penalty cycles that a load operation incurred."}, + {"DELAYED_BYPASS.SIMD", "The number of times SIMD operations use data immediately after data, was generated by a non-SIMD execution unit."}, + {"DIV", "The number of divide operations executed."}, + {"DTLB_MISSES.ANY", "The number of Data TLB misses, including misses that result from speculative accesses."}, + {"DTLB_MISSES.L0_MISS_LD", "The number of level 0 DTLB misses due to load operations."}, + {"DTLB_MISSES.MISS_LD", "The number of Data TLB misses due to load operations."}, + {"DTLB_MISSES.MISS_ST", "The number of Data TLB misses due to store operations."}, + {"EIST_TRANS", "The number of Enhanced Intel SpeedStep Technology transitions."}, + {"ESP.ADDITIONS", "The number of automatic additions to the esp register."}, + {"ESP.SYNCH", "The number of times the esp register was explicitly used in an address expression after it is implicitly used by a PUSH or POP instruction."}, + {"EXT_SNOOP", "The number of snoop responses to bus transactions."}, + {"FP_ASSIST", "The number of floating point operations executed that needed a microcode assist."}, + {"FP_COMP_OPS_EXE", "The number of floating point computational micro-ops executed. The event is available only on PMC0."}, + {"FP_MMX_TRANS_TO_FP", "The number of transitions from MMX instructions to floating point instructions."}, + {"FP_MMX_TRANS_TO_MMX", "The number of transitions from floating point instructions to MMX instructions."}, + {"HW_INT_RCV", "The number of hardware interrupts recieved."}, + {"IDLE_DURING_DIV", "The number of cycles the divider is busy and no other execution unit or load operation was in progress. This event is available only on PMC0."}, + {"ILD_STALL", "The number of cycles the instruction length decoder stalled due to a length changing prefix."}, + {"INST_QUEUE.FULL", "The number of cycles during which the instruction queue is full."}, + {"INST_RETIRED.ANY_P", "The number of instructions retired. This is an architectural performance event."}, + {"INST_RETIRED.LOADS", "The number of instructions retired that contained a load operation."}, + {"INST_RETIRED.OTHER", "The number of instructions retired that did not contain a load or a store operation."}, + {"INST_RETIRED.STORES", "The number of instructions retired that contained a store operation."}, + {"INST_RETIRED.VM_H", "The number of instructions retired while in VMX root operation."}, + {"ITLB.FLUSH", "The number of ITLB flushes."}, + {"ITLB.LARGE_MISS", "The number of instruction fetches from large pages that miss the ITLB."}, + {"ITLB.MISSES", "The number of instruction fetches from both large and small pages that miss the ITLB."}, + {"ITLB.SMALL_MISS", "The number of instruction fetches from small pages that miss the ITLB."}, + {"ITLB_MISS_RETIRED", "The number of retired instructions that missed the ITLB when they were fetched."}, + {"L1D_ALL_CACHE_REF", "The number of data reads and writes to cacheable memory."}, + {"L1D_ALL_REF", "The number of references to L1 data cache counting loads and stores of to all memory types."}, + {"L1D_CACHE_LD", "Number of data reads from cacheable memory excluding locked reads."}, + {"L1D_CACHE_LOCK", "Number of locked reads from cacheable memory."}, + {"L1D_CACHE_LOCK_DURATION", "The number of cycles during which any cache line is locked by any locking instruction."}, + {"L1D_CACHE_ST", "The number of data writes to cacheable memory excluding locked writes."}, + {"L1D_M_EVICT", "The number of modified cache lines evicted from L1 data cache."}, + {"L1D_M_REPL", "The number of modified lines allocated in L1 data cache."}, + {"L1D_PEND_MISS", "The total number of outstanding L1 data cache misses at any clock."}, + {"L1D_PREFETCH.REQUESTS", "The number of times L1 data cache requested to prefetch a data cache line."}, + {"L1D_REPL", "The number of lines brought into L1 data cache."}, + {"L1D_SPLIT.LOADS", "The number of load operations that span two cache lines."}, + {"L1D_SPLIT.STORES", "The number of store operations that span two cache lines."}, + {"L1I_MISSES", "The number of instruction fetch unit misses."}, + {"L1I_READS", "The number of instruction fetches."}, + {"L2_ADS", "The number of cycles that the L2 address bus is in use."}, + {"L2_DBUS_BUSY_RD", "The number of cycles during which the L2 data bus is busy transferring data to the core."}, + {"L2_IFETCH", "The number of instruction cache line requests from the instruction fetch unit."}, + {"L2_LD", "The number of L2 cache read requests from L1 cache and L2 prefetchers."}, + {"L2_LINES_IN", "The number of cache lines allocated in L2 cache."}, + {"L2_LINES_OUT", "The number of L2 cache lines evicted."}, + {"L2_LOCK", "The number of locked accesses to cache lines that miss L1 data cache."}, + {"L2_M_LINES_IN", "The number of L2 cache line modifications."}, + {"L2_M_LINES_OUT", "The number of modified lines evicted from L2 cache."}, + {"L2_NO_REQ", "Number of cycles during which no L2 cache requests were pending from a core."}, + {"L2_REJECT_BUSQ", "Number of L2 cache requests that were rejected."}, + {"L2_RQSTS", "The number of completed L2 cache requests."}, + {"L2_RQSTS.SELF.DEMAND.I_STATE", "The number of completed L2 cache demand requests from this core that missed the L2 cache. This is an architectural performance event."}, + {"L2_RQSTS.SELF.DEMAND.MESI", "The number of completed L2 cache demand requests from this core. This is an architectural performance event."}, + {"L2_ST", "The number of store operations that miss the L1 cache and request data from the L2 cache."}, + {"LOAD_BLOCK.L1D", "The number of loads blocked by the L1 data cache."}, + {"LOAD_BLOCK.OVERLAP_STORE", "The number of loads that partially overlap an earlier store or are aliased with a previous store."}, + {"LOAD_BLOCK.STA", "The number of loads blocked by preceding stores whose address is yet to be calculated."}, + {"LOAD_BLOCK.STD", "The number of loads blocked by preceding stores to the same address whose data value is not known."}, + {"LOAD_BLOCK.UNTIL_RETIRE", "The numer of load operations that were blocked until retirement."}, + {"LOAD_HIT_PRE", "The number of load operations that conflicted with an prefetch to the same cache line."}, + {"MACHINE_NUKES.MEM_ORDER", "The number of times the execution pipeline was restarted due to a memory ordering conflict or memory disambiguation misprediction."}, + {"MACHINE_NUKES.SMC", "The number of times a program writes to a code section."}, + {"MACRO_INSTS.CISC_DECODED", "The number of complex instructions decoded."}, + {"MACRO_INSTS.DECODED", "The number of instructions decoded."}, + {"MEMORY_DISAMBIGUATION.RESET", "The number of cycles during which memory disambiguation misprediction occurs."}, + {"MEMORY_DISAMBIGUATION.SUCCESS", "The number of load operations that were successfully disambiguated."}, + {"MEM_LOAD_RETIRED.DTLB_MISS", "The number of retired loads that missed the DTLB."}, + {"MEM_LOAD_RETIRED.L1D_LINE_MISS", "The number of retired load operations that missed L1 data cache and that sent a request to L2 cache. This event is only available on PMC0."}, + {"MEM_LOAD_RETIRED.L1D_MISS", "The number of retired load operations that missed L1 data cache. This event is only available on PMC0."}, + {"MEM_LOAD_RETIRED.L2_LINE_MISS", "The number of load operations that missed L2 cache and that caused a bus request."}, + {"MEM_LOAD_RETIRED.L2_MISS", "The number of load operations that missed L2 cache."}, + {"MUL","The number of multiply operations executed (only available on PMC1.)"}, + {"PAGE_WALKS.COUNT", "The number of page walks executed due to an ITLB or DTLB miss."}, + {"PAGE_WALKS.CYCLES", "The number of cycles spent in a page walk caused by an ITLB or DTLB miss."}, + {"PREF_RQSTS_DN", "The number of downward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"PREF_RQSTS_UP", "The number of upward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"RAT_STALLS.ANY", "The number of stall cycles due to any of RAT_STALLS.FLAGS RAT_STALLS.FPSW, RAT_STALLS.PARTIAL and RAT_STALLS.ROB_READ_PORT."}, + {"RAT_STALLS.FLAGS", "The number of cycles execution stalled due to a flag register induced stall."}, + {"RAT_STALLS.FPSW", "The number of times the floating point status word was written."}, + {"RAT_STALLS.OTHER_SERIALIZATION_STALLS", "The number of stalls due to other RAT resource serialization not counted by umask 0FH."}, + {"RAT_STALLS.PARTIAL_CYCLES", "The number of cycles of added instruction execution latency due to the use of a register that was partially written by previous instructions."}, + {"RAT_STALLS.ROB_READ_PORT", "The number of cycles when ROB read port stalls occurred."}, + {"RESOURCE_STALLS.ANY", "The number of cycles during which any resource related stall occurred."}, + {"RESOURCE_STALLS.BR_MISS_CLEAR", "The number of cycles stalled due to branch misprediction."}, + {"RESOURCE_STALLS.FPCW", "The number of cycles stalled due to writing the floating point control word."}, + {"RESOURCE_STALLS.LD_ST", "The number of cycles during which the number of loads and stores in the pipeline exceeded their limits."}, + {"RESOURCE_STALLS.ROB_FULL", "The number of cycles when the reorder buffer was full."}, + {"RESOURCE_STALLS.RS_FULL", "The number of cycles during which the RS was full."}, + {"RS_UOPS_DISPATCHED", "The number of micro-ops dispatched for execution."}, + {"RS_UOPS_DISPATCHED.PORT0", "The number of cycles micro-ops were dispatched for execution on port 0."}, + {"RS_UOPS_DISPATCHED.PORT1", "The number of cycles micro-ops were dispatched for execution on port 1."}, + {"RS_UOPS_DISPATCHED.PORT2", "The number of cycles micro-ops were dispatched for execution on port 2."}, + {"RS_UOPS_DISPATCHED.PORT3", "The number of cycles micro-ops were dispatched for execution on port 3."}, + {"RS_UOPS_DISPATCHED.PORT4", "The number of cycles micro-ops were dispatched for execution on port 4."}, + {"RS_UOPS_DISPATCHED.PORT5", "The number of cycles micro-ops were dispatched for execution on port 5."}, + {"SB_DRAIN_CYCLES", "The number of cycles while the store buffer is draining."}, + {"SEGMENT_REG_LOADS", "The number of segment register loads."}, + {"SEG_REG_RENAMES.ANY", "The number of times the any segment register was renamed."}, + {"SEG_REG_RENAMES.DS", "The number of times the ds register is renamed."}, + {"SEG_REG_RENAMES.ES", "The number of times the es register is renamed."}, + {"SEG_REG_RENAMES.FS", "The number of times the fs register is renamed."}, + {"SEG_REG_RENAMES.GS", "The number of times the gs register is renamed."}, + {"SEG_RENAME_STALLS.ANY", "The number of stalls due to lack of resource to rename any segment register."}, + {"SEG_RENAME_STALLS.DS", "The number of stalls due to lack of renaming resources for the ds register."}, + {"SEG_RENAME_STALLS.ES", "The number of stalls due to lack of renaming resources for the es register."}, + {"SEG_RENAME_STALLS.FS", "The number of stalls due to lack of renaming resources for the fs register."}, + {"SEG_RENAME_STALLS.GS", "The number of stalls due to lack of renaming resources for the gs register."}, + {"SIMD_ASSIST", "The number SIMD assists invoked."}, + {"SIMD_COMP_INST_RETIRED.PACKED_DOUBLE", "Then number of computational SSE2 packed double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.PACKED_SINGLE", "Then number of computational SSE2 packed single precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE", "Then number of computational SSE2 scalar double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_SINGLE", "Then number of computational SSE2 scalar single precision instructions retired."}, + {"SIMD_INSTR_RETIRED", "The number of retired SIMD instructions that use MMX registers."}, + {"SIMD_INST_RETIRED.ANY", "The number of streaming SIMD instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_DOUBLE", "The number of SSE2 packed double precision instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_SINGLE", "The number of SSE packed single precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_DOUBLE", "The number of SSE2 scalar double precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_SINGLE", "The number of SSE scalar single precision instructions retired."}, + {"SIMD_INST_RETIRED.VECTOR", "The number of SSE2 vector instructions retired."}, + {"SIMD_SAT_INSTR_RETIRED", "The number of saturated arithmetic SIMD instructions retired."}, + {"SIMD_SAT_UOP_EXEC", "The number of SIMD saturated arithmetic micro-ops executed."}, + {"SIMD_UOPS_EXEC", "The number of SIMD micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.ARITHMETIC", "The number of SIMD packed arithmetic micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.LOGICAL", "The number of SIMD packed logical micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.MUL", "The number of SIMD packed multiply micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.PACK", "The number of SIMD pack micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.SHIFT", "The number of SIMD packed shift micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.UNPACK", "The number of SIMD unpack micro-ops executed."}, + {"SNOOP_STALL_DRV", "The number of times the bus stalled for snoops."}, + {"SSE_PRE_EXEC.L1", "The number of PREFETCHT0 instructions executed."}, + {"SSE_PRE_EXEC.L2", "The number of PREFETCHT1 instructions executed."}, + {"SSE_PRE_EXEC.NTA", "The number of PREFETCHNTA instructions executed."}, + {"SSE_PRE_EXEC.STORES", "The number of times SSE non-temporal store instructions were executed."}, + {"SSE_PRE_MISS.L1", "The number of times the PREFETCHT0 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.L2", "The number of times the PREFETCHT1 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.NTA", "The number of times the PREFETCHNTA instruction executed and missed all cache levels."}, + {"STORE_BLOCK.ORDER", "The number of cycles while a store was waiting for another store to be globally observed."}, + {"STORE_BLOCK.SNOOP", "The number of cycles while a store was blocked due to a conflict with an internal or external snoop."}, + {"THERMAL_TRIP", "The number of thermal trips."}, + {"UOPS_RETIRED.ANY", "The number of micro-ops retired."}, + {"UOPS_RETIRED.FUSED", "The number of fused micro-ops retired."}, + {"UOPS_RETIRED.LD_IND_BR", "The number of micro-ops retired that fused a load with another operation."}, + {"UOPS_RETIRED.MACRO_FUSION", "The number of times retired instruction pairs were fused into one micro-op."}, + {"UOPS_RETIRED.NON_FUSED", "he number of non-fused micro-ops retired."}, + {"UOPS_RETIRED.STD_STA", "The number of store address calculations that fused into one micro-op."}, + {"X87_OPS_RETIRED.ANY", "The number of floating point computational instructions retired."}, + {"X87_OPS_RETIRED.FXCH", "The number of FXCH instructions retired."}, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-core2-extreme.h b/src/freebsd/map-core2-extreme.h new file mode 100644 index 0000000..a56f18c --- /dev/null +++ b/src/freebsd/map-core2-extreme.h @@ -0,0 +1,229 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core2.h +* CVS: $Id$ +* Author: George Neville-Neil +* gnn@freebsd.org +*/ + +#ifndef FreeBSD_MAP_CORE2EXTREME_EXTREME +#define FreeBSD_MAP_CORE2EXTREME_EXTREME + +enum NativeEvent_Value_Core2ExtremeProcessor { + PNE_CORE2EXTREME_BACLEARS = PAPI_NATIVE_MASK , + PNE_CORE2EXTREME_BOGUS_BR, + PNE_CORE2EXTREME_BR_BAC_MISSP_EXEC, + PNE_CORE2EXTREME_BR_CALL_MISSP_EXEC, + PNE_CORE2EXTREME_BR_CALL_EXEC, + PNE_CORE2EXTREME_BR_CND_EXEC, + PNE_CORE2EXTREME_BR_CND_MISSP_EXEC, + PNE_CORE2EXTREME_BR_IND_CALL_EXEC, + PNE_CORE2EXTREME_BR_IND_EXEC, + PNE_CORE2EXTREME_BR_IND_MISSP_EXEC, + PNE_CORE2EXTREME_BR_INST_DECODED, + PNE_CORE2EXTREME_BR_INST_EXEC, + PNE_CORE2EXTREME_BR_INST_RETIRED_ANY, + PNE_CORE2EXTREME_BR_INST_RETIRED_MISPRED, + PNE_CORE2EXTREME_BR_INST_RETIRED_MISPRED_NOT_TAKEN, + PNE_CORE2EXTREME_BR_INST_RETIRED_MISPRED_TAKEN, + PNE_CORE2EXTREME_BR_INST_RETIRED_PRED_NOT_TAKEN, + PNE_CORE2EXTREME_BR_INST_RETIRED_PRED_TAKEN, + PNE_CORE2EXTREME_BR_INST_RETIRED_TAKEN, + PNE_CORE2EXTREME_BR_MISSP_EXEC, + PNE_CORE2EXTREME_BR_RET_MISSP_EXEC, + PNE_CORE2EXTREME_BR_RET_BAC_MISSP_EXEC, + PNE_CORE2EXTREME_BR_RET_EXEC, + PNE_CORE2EXTREME_BR_TKN_BUBBLE_1, + PNE_CORE2EXTREME_BR_TKN_BUBBLE_2, + PNE_CORE2EXTREME_BUSQ_EMPTY, + PNE_CORE2EXTREME_BUS_BNR_DRV, + PNE_CORE2EXTREME_BUS_DATA_RCV, + PNE_CORE2EXTREME_BUS_DRDY_CLOCKS, + PNE_CORE2EXTREME_BUS_HIT_DRV, + PNE_CORE2EXTREME_BUS_HITM_DRV, + PNE_CORE2EXTREME_BUS_IO_WAIT, + PNE_CORE2EXTREME_BUS_LOCK_CLOCKS, + PNE_CORE2EXTREME_BUS_REQUEST_OUTSTANDING, + PNE_CORE2EXTREME_BUS_TRANS_ANY, + PNE_CORE2EXTREME_BUS_TRANS_BRD, + PNE_CORE2EXTREME_BUS_TRANS_BURST, + PNE_CORE2EXTREME_BUS_TRANS_DEF, + PNE_CORE2EXTREME_BUS_TRANS_IFETCH, + PNE_CORE2EXTREME_BUS_TRANS_INVAL, + PNE_CORE2EXTREME_BUS_TRANS_IO, + PNE_CORE2EXTREME_BUS_TRANS_MEM, + PNE_CORE2EXTREME_BUS_TRANS_P, + PNE_CORE2EXTREME_BUS_TRANS_PWR, + PNE_CORE2EXTREME_BUS_TRANS_RFO, + PNE_CORE2EXTREME_BUS_TRANS_WB, + PNE_CORE2EXTREME_CMP_SNOOP, + PNE_CORE2EXTREME_CPU_CLK_UNHALTED_BUS, + PNE_CORE2EXTREME_CPU_CLK_UNHALTED_CORE_P, + PNE_CORE2EXTREME_CPU_CLK_UNHALTED_NO_OTHER, + PNE_CORE2EXTREME_CYCLES_DIV_BUSY, + PNE_CORE2EXTREME_CYCLES_INT_MASKED, + PNE_CORE2EXTREME_CYCLES_INT_PENDING_AND_MASKED, + PNE_CORE2EXTREME_CYCLES_L1I_MEM_STALLED, + PNE_CORE2EXTREME_DELAYED_BYPASS_FP, + PNE_CORE2EXTREME_DELAYED_BYPASS_LOAD, + PNE_CORE2EXTREME_DELAYED_BYPASS_SIMD, + PNE_CORE2EXTREME_DIV, + PNE_CORE2EXTREME_DTLB_MISSES_ANY, + PNE_CORE2EXTREME_DTLB_MISSES_L0_MISS_LD, + PNE_CORE2EXTREME_DTLB_MISSES_MISS_LD, + PNE_CORE2EXTREME_DTLB_MISSES_MISS_ST, + PNE_CORE2EXTREME_EIST_TRANS, + PNE_CORE2EXTREME_ESP_ADDITIONS, + PNE_CORE2EXTREME_ESP_SYNCH, + PNE_CORE2EXTREME_EXT_SNOOP, + PNE_CORE2EXTREME_FP_ASSIST, + PNE_CORE2EXTREME_FP_COMP_OPS_EXE, + PNE_CORE2EXTREME_FP_MMX_TRANS_TO_FP, + PNE_CORE2EXTREME_FP_MMX_TRANS_TO_MMX, + PNE_CORE2EXTREME_HW_INT_RCV, + PNE_CORE2EXTREME_IDLE_DURING_DIV, + PNE_CORE2EXTREME_ILD_STALL, + PNE_CORE2EXTREME_INST_QUEUE_FULL, + PNE_CORE2EXTREME_INST_RETIRED_ANY_P, + PNE_CORE2EXTREME_INST_RETIRED_LOADS, + PNE_CORE2EXTREME_INST_RETIRED_OTHER, + PNE_CORE2EXTREME_INST_RETIRED_STORES, + PNE_CORE2EXTREME_INST_RETIRED_VM_H, + PNE_CORE2EXTREME_ITLB_FLUSH, + PNE_CORE2EXTREME_ITLB_LARGE_MISS, + PNE_CORE2EXTREME_ITLB_MISSES, + PNE_CORE2EXTREME_ITLB_SMALL_MISS, + PNE_CORE2EXTREME_ITLB_MISS_RETIRED, + PNE_CORE2EXTREME_L1D_ALL_CACHE_REF, + PNE_CORE2EXTREME_L1D_ALL_REF, + PNE_CORE2EXTREME_L1D_CACHE_LD, + PNE_CORE2EXTREME_L1D_CACHE_LOCK, + PNE_CORE2EXTREME_L1D_CACHE_LOCK_DURATION, + PNE_CORE2EXTREME_L1D_CACHE_ST, + PNE_CORE2EXTREME_L1D_M_EVICT, + PNE_CORE2EXTREME_L1D_M_REPL, + PNE_CORE2EXTREME_L1D_PEND_MISS, + PNE_CORE2EXTREME_L1D_PREFETCH_REQUESTS, + PNE_CORE2EXTREME_L1D_REPL, + PNE_CORE2EXTREME_L1D_SPLIT_LOADS, + PNE_CORE2EXTREME_L1D_SPLIT_STORES, + PNE_CORE2EXTREME_L1I_MISSES, + PNE_CORE2EXTREME_L1I_READS, + PNE_CORE2EXTREME_L2_ADS, + PNE_CORE2EXTREME_L2_DBUS_BUSY_RD, + PNE_CORE2EXTREME_L2_IFETCH, + PNE_CORE2EXTREME_L2_LD, + PNE_CORE2EXTREME_L2_LINES_IN, + PNE_CORE2EXTREME_L2_LINES_OUT, + PNE_CORE2EXTREME_L2_LOCK, + PNE_CORE2EXTREME_L2_M_LINES_IN, + PNE_CORE2EXTREME_L2_M_LINES_OUT, + PNE_CORE2EXTREME_L2_NO_REQ, + PNE_CORE2EXTREME_L2_REJECT_BUSQ, + PNE_CORE2EXTREME_L2_RQSTS, + PNE_CORE2EXTREME_L2_RQSTS_SELF_DEMAND_I_STATE, + PNE_CORE2EXTREME_L2_RQSTS_SELF_DEMAND_MESI, + PNE_CORE2EXTREME_L2_ST, + PNE_CORE2EXTREME_LOAD_BLOCK_L1D, + PNE_CORE2EXTREME_LOAD_BLOCK_OVERLAP_STORE, + PNE_CORE2EXTREME_LOAD_BLOCK_STA, + PNE_CORE2EXTREME_LOAD_BLOCK_STD, + PNE_CORE2EXTREME_LOAD_BLOCK_UNTIL_RETIRE, + PNE_CORE2EXTREME_LOAD_HIT_PRE, + PNE_CORE2EXTREME_MACHINE_NUKES_MEM_ORDER, + PNE_CORE2EXTREME_MACHINE_NUKES_SMC, + PNE_CORE2EXTREME_MACRO_INSTS_CISC_DECODED, + PNE_CORE2EXTREME_MACRO_INSTS_DECODED, + PNE_CORE2EXTREME_MEMORY_DISAMBIGUATION_RESET, + PNE_CORE2EXTREME_MEMORY_DISAMBIGUATION_SUCCESS, + PNE_CORE2EXTREME_MEM_LOAD_RETIRED_DTLB_MISS, + PNE_CORE2EXTREME_MEM_LOAD_RETIRED_L1D_LINE_MISS, + PNE_CORE2EXTREME_MEM_LOAD_RETIRED_L1D_MISS, + PNE_CORE2EXTREME_MEM_LOAD_RETIRED_L2_LINE_MISS, + PNE_CORE2EXTREME_MEM_LOAD_RETIRED_L2_MISS, + PNE_CORE2EXTREME_MUL, + PNE_CORE2EXTREME_PAGE_WALKS_COUNT, + PNE_CORE2EXTREME_PAGE_WALKS_CYCLES, + PNE_CORE2EXTREME_PREF_RQSTS_DN, + PNE_CORE2EXTREME_PREF_RQSTS_UP, + PNE_CORE2EXTREME_RAT_STALLS_ANY, + PNE_CORE2EXTREME_RAT_STALLS_FLAGS, + PNE_CORE2EXTREME_RAT_STALLS_FPSW, + PNE_CORE2EXTREME_RAT_STALLS_OTHER_SERIALIZATION_STALLS, + PNE_CORE2EXTREME_RAT_STALLS_PARTIAL_CYCLES, + PNE_CORE2EXTREME_RAT_STALLS_ROB_READ_PORT, + PNE_CORE2EXTREME_RESOURCE_STALLS_ANY, + PNE_CORE2EXTREME_RESOURCE_STALLS_BR_MISS_CLEAR, + PNE_CORE2EXTREME_RESOURCE_STALLS_FPCW, + PNE_CORE2EXTREME_RESOURCE_STALLS_LD_ST, + PNE_CORE2EXTREME_RESOURCE_STALLS_ROB_FULL, + PNE_CORE2EXTREME_RESOURCE_STALLS_RS_FULL, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT0, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT1, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT2, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT3, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT4, + PNE_CORE2EXTREME_RS_UOPS_DISPATCHED_PORT5, + PNE_CORE2EXTREME_SB_DRAIN_CYCLES, + PNE_CORE2EXTREME_SEGMENT_REG_LOADS, + PNE_CORE2EXTREME_SEG_REG_RENAMES_ANY, + PNE_CORE2EXTREME_SEG_REG_RENAMES_DS, + PNE_CORE2EXTREME_SEG_REG_RENAMES_ES, + PNE_CORE2EXTREME_SEG_REG_RENAMES_FS, + PNE_CORE2EXTREME_SEG_REG_RENAMES_GS, + PNE_CORE2EXTREME_SEG_RENAME_STALLS_ANY, + PNE_CORE2EXTREME_SEG_RENAME_STALLS_DS, + PNE_CORE2EXTREME_SEG_RENAME_STALLS_ES, + PNE_CORE2EXTREME_SEG_RENAME_STALLS_FS, + PNE_CORE2EXTREME_SEG_RENAME_STALLS_GS, + PNE_CORE2EXTREME_SIMD_ASSIST, + PNE_CORE2EXTREME_SIMD_COMP_INST_RETIRED_PACKED_DOUBLE, + PNE_CORE2EXTREME_SIMD_COMP_INST_RETIRED_PACKED_SINGLE, + PNE_CORE2EXTREME_SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE, + PNE_CORE2EXTREME_SIMD_COMP_INST_RETIRED_SCALAR_SINGLE, + PNE_CORE2EXTREME_SIMD_INSTR_RETIRED, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_ANY, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_PACKED_DOUBLE, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_PACKED_SINGLE, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_SCALAR_DOUBLE, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_SCALAR_SINGLE, + PNE_CORE2EXTREME_SIMD_INST_RETIRED_VECTOR, + PNE_CORE2EXTREME_SIMD_SAT_INSTR_RETIRED, + PNE_CORE2EXTREME_SIMD_SAT_UOP_EXEC, + PNE_CORE2EXTREME_SIMD_UOPS_EXEC, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_ARITHMETIC, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_LOGICAL, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_MUL, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_PACK, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_SHIFT, + PNE_CORE2EXTREME_SIMD_UOP_TYPE_EXEC_UNPACK, + PNE_CORE2EXTREME_SNOOP_STALL_DRV, + PNE_CORE2EXTREME_SSE_PRE_EXEC_L1, + PNE_CORE2EXTREME_SSE_PRE_EXEC_L2, + PNE_CORE2EXTREME_SSE_PRE_EXEC_NTA, + PNE_CORE2EXTREME_SSE_PRE_EXEC_STORES, + PNE_CORE2EXTREME_SSE_PRE_MISS_L1, + PNE_CORE2EXTREME_SSE_PRE_MISS_L2, + PNE_CORE2EXTREME_SSE_PRE_MISS_NTA, + PNE_CORE2EXTREME_STORE_BLOCK_ORDER, + PNE_CORE2EXTREME_STORE_BLOCK_SNOOP, + PNE_CORE2EXTREME_THERMAL_TRIP, + PNE_CORE2EXTREME_UOPS_RETIRED_ANY, + PNE_CORE2EXTREME_UOPS_RETIRED_FUSED, + PNE_CORE2EXTREME_UOPS_RETIRED_LD_IND_BR, + PNE_CORE2EXTREME_UOPS_RETIRED_MACRO_FUSION, + PNE_CORE2EXTREME_UOPS_RETIRED_NON_FUSED, + PNE_CORE2EXTREME_UOPS_RETIRED_STD_STA, + PNE_CORE2EXTREME_X87_OPS_RETIRED_ANY, + PNE_CORE2EXTREME_X87_OPS_RETIRED_FXCH, + PNE_CORE2EXTREME_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t Core2ExtremeProcessor_info[]; +extern hwi_search_t Core2ExtremeProcessor_map[]; + +#endif diff --git a/src/freebsd/map-core2.c b/src/freebsd/map-core2.c new file mode 100644 index 0000000..b918db8 --- /dev/null +++ b/src/freebsd/map-core2.c @@ -0,0 +1,239 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core2.c +* Author: George Neville-Neil +* gnn@freebsd.org +* Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + CORE2 SUBSTRATE + CORE2 SUBSTRATE + CORE2 SUBSTRATE + CORE2 SUBSTRATE + CORE2 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_Core2Processor must match Core2Processor_info +*/ + +Native_Event_LabelDescription_t Core2Processor_info[] = +{ + {"BACLEARS", "The number of times the front end is resteered."}, + {"BOGUS_BR", "The number of byte sequences mistakenly detected as taken branch instructions."}, + {"BR_BAC_MISSP_EXEC", "The number of branch instructions that were mispredicted when decoded."}, + {"BR_CALL_MISSP_EXEC", "The number of mispredicted CALL instructions that were executed."}, + {"BR_CALL_EXEC", "The number of CALL instructions executed."}, + {"BR_CND_EXEC", "The number of conditional branches executed, but not necessarily retired."}, + {"BR_CND_MISSP_EXEC", "The number of mispredicted conditional branches executed."}, + {"BR_IND_CALL_EXEC", "The number of indirect CALL instructions executed."}, + {"BR_IND_EXEC", "The number of indirect branch instructions executed."}, + {"BR_IND_MISSP_EXEC", "The number of mispredicted indirect branch instructions executed."}, + {"BR_INST_DECODED", "The number of branch instructions decoded."}, + {"BR_INST_EXEC", "The number of branches executed, but not necessarily retired."}, + {"BR_INST_RETIRED.ANY", "The number of branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.MISPRED", "The number of mispredicted branch instructions retired. This is an architectural performance event."}, + {"BR_INST_RETIRED.MISPRED_NOT_TAKEN", "The number of not taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.MISPRED_TAKEN", "The number taken branch instructions retired that were mispredicted."}, + {"BR_INST_RETIRED.PRED_NOT_TAKEN", "The number of not taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.PRED_TAKEN", "The number of taken branch instructions retired that were correctly predicted."}, + {"BR_INST_RETIRED.TAKEN", "The number of taken branch instructions retired."}, + {"BR_MISSP_EXEC", "The number of mispredicted branch instructions that were executed."}, + {"BR_RET_MISSP_EXEC", "The number of mispredicted RET instructions executed."}, + {"BR_RET_BAC_MISSP_EXEC", "The number of RET instructions executed that were mispredicted at decode time."}, + {"BR_RET_EXEC", "The number of RET instructions executed."}, + {"BR_TKN_BUBBLE_1", "The number of branch predicted taken with bubble 1."}, + {"BR_TKN_BUBBLE_2", "The number of branch predicted taken with bubble 2."}, + {"BUSQ_EMPTY", "The number of cycles during which the core did not have any pending transactions in the bus queue."}, + {"BUS_BNR_DRV", "Number of Bus Not Ready signals asserted on the bus."}, + {"BUS_DATA_RCV", "Number of bus cycles during which the processor is receiving data."}, + {"BUS_DRDY_CLOCKS", "The number of bus cycles during which the Data Ready signal is asserted on the bus."}, + {"BUS_HIT_DRV", "The number of bus cycles during which the processor drives the HIT# pin."}, + {"BUS_HITM_DRV", "The number of bus cycles during which the processor drives the HITM# pin."}, + {"BUS_IO_WAIT", "The number of core cycles during which I/O requests wait in the bus queue."}, + {"BUS_LOCK_CLOCKS", "The number of bus cycles during which the LOCK signal was asserted on the bus."}, + {"BUS_REQUEST_OUTSTANDING", "The number of pending full cache line read transactions on the bus occuring in each cycle."}, + {"BUS_TRANS_ANY", "The number of bus transactions of any kind."}, + {"BUS_TRANS_BRD", "The number of burst read transactions."}, + {"BUS_TRANS_BURST", "The number of burst transactions."}, + {"BUS_TRANS_DEF", "The number of deferred bus transactions."}, + {"BUS_TRANS_IFETCH", "The number of instruction fetch full cache line bus transactions."}, + {"BUS_TRANS_INVAL", "The number of invalidate bus transactions."}, + {"BUS_TRANS_IO", "The number of completed I/O bus transaactions due to IN and OUT instructions."}, + {"BUS_TRANS_MEM", "The number of memory bus transactions."}, + {"BUS_TRANS_P", "The number of partial bus transactions."}, + {"BUS_TRANS_PWR", "The number of partial write bus transactions."}, + {"BUS_TRANS_RFO", "The number of Read For Ownership bus transactions."}, + {"BUS_TRANS_WB", "The number of explicit writeback bus transactions due to dirty line evictions."}, + {"CMP_SNOOP", "The number of times the L1 data cache is snooped by the other core in the same processor."}, + {"CPU_CLK_UNHALTED.BUS", "The number of bus cycles when the core is not in the halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.CORE_P", "The number of core cycles while the core is not in a halt state. This is an architectural performance event."}, + {"CPU_CLK_UNHALTED.NO_OTHER", "The number of bus cycles during which the core remains unhalted and the other core is halted."}, + {"CYCLES_DIV_BUSY", "The number of cycles the divider is busy. This event is only available on PMC0."}, + {"CYCLES_INT_MASKED", "The number of cycles during which interrupts are disabled."}, + {"CYCLES_INT_PENDING_AND_MASKED", "The number of cycles during which there were pending interrupts while interrupts were disabled."}, + {"CYCLES_L1I_MEM_STALLED", "The number of cycles for which an instruction fetch stalls."}, + {"DELAYED_BYPASS.FP", "The number of floating point operations that used data immediately after the data was generated by a non floating point execution unit."}, + {"DELAYED_BYPASS.LOAD", "The number of delayed bypass penalty cycles that a load operation incurred."}, + {"DELAYED_BYPASS.SIMD", "The number of times SIMD operations use data immediately after data, was generated by a non-SIMD execution unit."}, + {"DIV", "The number of divide operations executed."}, + {"DTLB_MISSES.ANY", "The number of Data TLB misses, including misses that result from speculative accesses."}, + {"DTLB_MISSES.L0_MISS_LD", "The number of level 0 DTLB misses due to load operations."}, + {"DTLB_MISSES.MISS_LD", "The number of Data TLB misses due to load operations."}, + {"DTLB_MISSES.MISS_ST", "The number of Data TLB misses due to store operations."}, + {"EIST_TRANS", "The number of Enhanced Intel SpeedStep Technology transitions."}, + {"ESP.ADDITIONS", "The number of automatic additions to the esp register."}, + {"ESP.SYNCH", "The number of times the esp register was explicitly used in an address expression after it is implicitly used by a PUSH or POP instruction."}, + {"EXT_SNOOP", "The number of snoop responses to bus transactions."}, + {"FP_ASSIST", "The number of floating point operations executed that needed a microcode assist."}, + {"FP_COMP_OPS_EXE", "The number of floating point computational micro-ops executed. The event is available only on PMC0."}, + {"FP_MMX_TRANS_TO_FP", "The number of transitions from MMX instructions to floating point instructions."}, + {"FP_MMX_TRANS_TO_MMX", "The number of transitions from floating point instructions to MMX instructions."}, + {"HW_INT_RCV", "The number of hardware interrupts recieved."}, + {"IDLE_DURING_DIV", "The number of cycles the divider is busy and no other execution unit or load operation was in progress. This event is available only on PMC0."}, + {"ILD_STALL", "The number of cycles the instruction length decoder stalled due to a length changing prefix."}, + {"INST_QUEUE.FULL", "The number of cycles during which the instruction queue is full."}, + {"INST_RETIRED.ANY_P", "The number of instructions retired. This is an architectural performance event."}, + {"INST_RETIRED.LOADS", "The number of instructions retired that contained a load operation."}, + {"INST_RETIRED.OTHER", "The number of instructions retired that did not contain a load or a store operation."}, + {"INST_RETIRED.STORES", "The number of instructions retired that contained a store operation."}, + {"ITLB.FLUSH", "The number of ITLB flushes."}, + {"ITLB.LARGE_MISS", "The number of instruction fetches from large pages that miss the ITLB."}, + {"ITLB.MISSES", "The number of instruction fetches from both large and small pages that miss the ITLB."}, + {"ITLB.SMALL_MISS", "The number of instruction fetches from small pages that miss the ITLB."}, + {"ITLB_MISS_RETIRED", "The number of retired instructions that missed the ITLB when they were fetched."}, + {"L1D_ALL_CACHE_REF", "The number of data reads and writes to cacheable memory."}, + {"L1D_ALL_REF", "The number of references to L1 data cache counting loads and stores of to all memory types."}, + {"L1D_CACHE_LD", "Number of data reads from cacheable memory excluding locked reads."}, + {"L1D_CACHE_LOCK", "Number of locked reads from cacheable memory."}, + {"L1D_CACHE_LOCK_DURATION", "The number of cycles during which any cache line is locked by any locking instruction."}, + {"L1D_CACHE_ST", "The number of data writes to cacheable memory excluding locked writes."}, + {"L1D_M_EVICT", "The number of modified cache lines evicted from L1 data cache."}, + {"L1D_M_REPL", "The number of modified lines allocated in L1 data cache."}, + {"L1D_PEND_MISS", "The total number of outstanding L1 data cache misses at any clock."}, + {"L1D_PREFETCH.REQUESTS", "The number of times L1 data cache requested to prefetch a data cache line."}, + {"L1D_REPL", "The number of lines brought into L1 data cache."}, + {"L1D_SPLIT.LOADS", "The number of load operations that span two cache lines."}, + {"L1D_SPLIT.STORES", "The number of store operations that span two cache lines."}, + {"L1I_MISSES", "The number of instruction fetch unit misses."}, + {"L1I_READS", "The number of instruction fetches."}, + {"L2_ADS", "The number of cycles that the L2 address bus is in use."}, + {"L2_DBUS_BUSY_RD", "The number of cycles during which the L2 data bus is busy transferring data to the core."}, + {"L2_IFETCH", "The number of instruction cache line requests from the instruction fetch unit."}, + {"L2_LD", "The number of L2 cache read requests from L1 cache and L2 prefetchers."}, + {"L2_LINES_IN", "The number of cache lines allocated in L2 cache."}, + {"L2_LINES_OUT", "The number of L2 cache lines evicted."}, + {"L2_LOCK", "The number of locked accesses to cache lines that miss L1 data cache."}, + {"L2_M_LINES_IN", "The number of L2 cache line modifications."}, + {"L2_M_LINES_OUT", "The number of modified lines evicted from L2 cache."}, + {"L2_NO_REQ", "Number of cycles during which no L2 cache requests were pending from a core."}, + {"L2_REJECT_BUSQ", "Number of L2 cache requests that were rejected."}, + {"L2_RQSTS", "The number of completed L2 cache requests."}, + {"L2_RQSTS.SELF.DEMAND.I_STATE", "The number of completed L2 cache demand requests from this core that missed the L2 cache. This is an architectural performance event."}, + {"L2_RQSTS.SELF.DEMAND.MESI", "The number of completed L2 cache demand requests from this core. This is an architectural performance event."}, + {"L2_ST", "The number of store operations that miss the L1 cache and request data from the L2 cache."}, + {"LOAD_BLOCK.L1D", "The number of loads blocked by the L1 data cache."}, + {"LOAD_BLOCK.OVERLAP_STORE", "The number of loads that partially overlap an earlier store or are aliased with a previous store."}, + {"LOAD_BLOCK.STA", "The number of loads blocked by preceding stores whose address is yet to be calculated."}, + {"LOAD_BLOCK.STD", "The number of loads blocked by preceding stores to the same address whose data value is not known."}, + {"LOAD_BLOCK.UNTIL_RETIRE", "The numer of load operations that were blocked until retirement."}, + {"LOAD_HIT_PRE", "The number of load operations that conflicted with an prefetch to the same cache line."}, + {"MACHINE_NUKES.MEM_ORDER", "The number of times the execution pipeline was restarted due to a memory ordering conflict or memory disambiguation misprediction."}, + {"MACHINE_NUKES.SMC", "The number of times a program writes to a code section."}, + {"MACRO_INSTS.CISC_DECODED", "The number of complex instructions decoded."}, + {"MACRO_INSTS.DECODED", "The number of instructions decoded."}, + {"MEMORY_DISAMBIGUATION.RESET", "The number of cycles during which memory disambiguation misprediction occurs."}, + {"MEMORY_DISAMBIGUATION.SUCCESS", "The number of load operations that were successfully disambiguated."}, + {"MEM_LOAD_RETIRED.DTLB_MISS", "The number of retired loads that missed the DTLB."}, + {"MEM_LOAD_RETIRED.L1D_LINE_MISS", "The number of retired load operations that missed L1 data cache and that sent a request to L2 cache. This event is only available on PMC0."}, + {"MEM_LOAD_RETIRED.L1D_MISS", "The number of retired load operations that missed L1 data cache. This event is only available on PMC0."}, + {"MEM_LOAD_RETIRED.L2_LINE_MISS", "The number of load operations that missed L2 cache and that caused a bus request."}, + {"MEM_LOAD_RETIRED.L2_MISS", "The number of load operations that missed L2 cache."}, + {"MUL","The number of multiply operations executed (only available on PMC1.)"}, + {"PAGE_WALKS.COUNT", "The number of page walks executed due to an ITLB or DTLB miss."}, + {"PAGE_WALKS.CYCLES", "The number of cycles spent in a page walk caused by an ITLB or DTLB miss."}, + {"PREF_RQSTS_DN", "The number of downward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"PREF_RQSTS_UP", "The number of upward prefetches issued from the Data Prefetch Logic unit to L2 cache."}, + {"RAT_STALLS.ANY", "The number of stall cycles due to any of RAT_STALLS.FLAGS RAT_STALLS.FPSW, RAT_STALLS.PARTIAL and RAT_STALLS.ROB_READ_PORT."}, + {"RAT_STALLS.FLAGS", "The number of cycles execution stalled due to a flag register induced stall."}, + {"RAT_STALLS.FPSW", "The number of times the floating point status word was written."}, + {"RAT_STALLS.PARTIAL_CYCLES", "The number of cycles of added instruction execution latency due to the use of a register that was partially written by previous instructions."}, + {"RAT_STALLS.ROB_READ_PORT", "The number of cycles when ROB read port stalls occurred."}, + {"RESOURCE_STALLS.ANY", "The number of cycles during which any resource related stall occurred."}, + {"RESOURCE_STALLS.BR_MISS_CLEAR", "The number of cycles stalled due to branch misprediction."}, + {"RESOURCE_STALLS.FPCW", "The number of cycles stalled due to writing the floating point control word."}, + {"RESOURCE_STALLS.LD_ST", "The number of cycles during which the number of loads and stores in the pipeline exceeded their limits."}, + {"RESOURCE_STALLS.ROB_FULL", "The number of cycles when the reorder buffer was full."}, + {"RESOURCE_STALLS.RS_FULL", "The number of cycles during which the RS was full."}, + {"RS_UOPS_DISPATCHED", "The number of micro-ops dispatched for execution."}, + {"RS_UOPS_DISPATCHED.PORT0", "The number of cycles micro-ops were dispatched for execution on port 0."}, + {"RS_UOPS_DISPATCHED.PORT1", "The number of cycles micro-ops were dispatched for execution on port 1."}, + {"RS_UOPS_DISPATCHED.PORT2", "The number of cycles micro-ops were dispatched for execution on port 2."}, + {"RS_UOPS_DISPATCHED.PORT3", "The number of cycles micro-ops were dispatched for execution on port 3."}, + {"RS_UOPS_DISPATCHED.PORT4", "The number of cycles micro-ops were dispatched for execution on port 4."}, + {"RS_UOPS_DISPATCHED.PORT5", "The number of cycles micro-ops were dispatched for execution on port 5."}, + {"SB_DRAIN_CYCLES", "The number of cycles while the store buffer is draining."}, + {"SEGMENT_REG_LOADS", "The number of segment register loads."}, + {"SEG_REG_RENAMES.ANY", "The number of times the any segment register was renamed."}, + {"SEG_REG_RENAMES.DS", "The number of times the ds register is renamed."}, + {"SEG_REG_RENAMES.ES", "The number of times the es register is renamed."}, + {"SEG_REG_RENAMES.FS", "The number of times the fs register is renamed."}, + {"SEG_REG_RENAMES.GS", "The number of times the gs register is renamed."}, + {"SEG_RENAME_STALLS.ANY", "The number of stalls due to lack of resource to rename any segment register."}, + {"SEG_RENAME_STALLS.DS", "The number of stalls due to lack of renaming resources for the ds register."}, + {"SEG_RENAME_STALLS.ES", "The number of stalls due to lack of renaming resources for the es register."}, + {"SEG_RENAME_STALLS.FS", "The number of stalls due to lack of renaming resources for the fs register."}, + {"SEG_RENAME_STALLS.GS", "The number of stalls due to lack of renaming resources for the gs register."}, + {"SIMD_ASSIST", "The number SIMD assists invoked."}, + {"SIMD_COMP_INST_RETIRED.PACKED_DOUBLE", "Then number of computational SSE2 packed double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.PACKED_SINGLE", "Then number of computational SSE2 packed single precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE", "Then number of computational SSE2 scalar double precision instructions retired."}, + {"SIMD_COMP_INST_RETIRED.SCALAR_SINGLE", "Then number of computational SSE2 scalar single precision instructions retired."}, + {"SIMD_INSTR_RETIRED", "The number of retired SIMD instructions that use MMX registers."}, + {"SIMD_INST_RETIRED.ANY", "The number of streaming SIMD instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_DOUBLE", "The number of SSE2 packed double precision instructions retired."}, + {"SIMD_INST_RETIRED.PACKED_SINGLE", "The number of SSE packed single precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_DOUBLE", "The number of SSE2 scalar double precision instructions retired."}, + {"SIMD_INST_RETIRED.SCALAR_SINGLE", "The number of SSE scalar single precision instructions retired."}, + {"SIMD_INST_RETIRED.VECTOR", "The number of SSE2 vector instructions retired."}, + {"SIMD_SAT_INSTR_RETIRED", "The number of saturated arithmetic SIMD instructions retired."}, + {"SIMD_SAT_UOP_EXEC", "The number of SIMD saturated arithmetic micro-ops executed."}, + {"SIMD_UOPS_EXEC", "The number of SIMD micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.ARITHMETIC", "The number of SIMD packed arithmetic micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.LOGICAL", "The number of SIMD packed logical micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.MUL", "The number of SIMD packed multiply micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.PACK", "The number of SIMD pack micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.SHIFT", "The number of SIMD packed shift micro-ops executed."}, + {"SIMD_UOP_TYPE_EXEC.UNPACK", "The number of SIMD unpack micro-ops executed."}, + {"SNOOP_STALL_DRV", "The number of times the bus stalled for snoops."}, + {"SSE_PRE_EXEC.L1", "The number of PREFETCHT0 instructions executed."}, + {"SSE_PRE_EXEC.L2", "The number of PREFETCHT1 instructions executed."}, + {"SSE_PRE_EXEC.NTA", "The number of PREFETCHNTA instructions executed."}, + {"SSE_PRE_EXEC.STORES", "The number of times SSE non-temporal store instructions were executed."}, + {"SSE_PRE_MISS.L1", "The number of times the PREFETCHT0 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.L2", "The number of times the PREFETCHT1 instruction executed and missed all cache levels."}, + {"SSE_PRE_MISS.NTA", "The number of times the PREFETCHNTA instruction executed and missed all cache levels."}, + {"STORE_BLOCK.ORDER", "The number of cycles while a store was waiting for another store to be globally observed."}, + {"STORE_BLOCK.SNOOP", "The number of cycles while a store was blocked due to a conflict with an internal or external snoop."}, + {"THERMAL_TRIP", "The number of thermal trips."}, + {"UOPS_RETIRED.ANY", "The number of micro-ops retired."}, + {"UOPS_RETIRED.FUSED", "The number of fused micro-ops retired."}, + {"UOPS_RETIRED.LD_IND_BR", "The number of micro-ops retired that fused a load with another operation."}, + {"UOPS_RETIRED.MACRO_FUSION", "The number of times retired instruction pairs were fused into one micro-op."}, + {"UOPS_RETIRED.NON_FUSED", "he number of non-fused micro-ops retired."}, + {"UOPS_RETIRED.STD_STA", "The number of store address calculations that fused into one micro-op."}, + {"X87_OPS_RETIRED.ANY", "The number of floating point computational instructions retired."}, + {"X87_OPS_RETIRED.FXCH", "The number of FXCH instructions retired."}, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-core2.h b/src/freebsd/map-core2.h new file mode 100644 index 0000000..95c2f3e --- /dev/null +++ b/src/freebsd/map-core2.h @@ -0,0 +1,227 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-core2.h +* CVS: $Id$ +* Author: George Neville-Neil +* gnn@freebsd.org +*/ + +#ifndef FreeBSD_MAP_CORE2 +#define FreeBSD_MAP_CORE2 + +enum NativeEvent_Value_Core2Processor { + PNE_CORE2_BACLEARS = PAPI_NATIVE_MASK , + PNE_CORE2_BOGUS_BR, + PNE_CORE2_BR_BAC_MISSP_EXEC, + PNE_CORE2_BR_CALL_MISSP_EXEC, + PNE_CORE2_BR_CALL_EXEC, + PNE_CORE2_BR_CND_EXEC, + PNE_CORE2_BR_CND_MISSP_EXEC, + PNE_CORE2_BR_IND_CALL_EXEC, + PNE_CORE2_BR_IND_EXEC, + PNE_CORE2_BR_IND_MISSP_EXEC, + PNE_CORE2_BR_INST_DECODED, + PNE_CORE2_BR_INST_EXEC, + PNE_CORE2_BR_INST_RETIRED_ANY, + PNE_CORE2_BR_INST_RETIRED_MISPRED, + PNE_CORE2_BR_INST_RETIRED_MISPRED_NOT_TAKEN, + PNE_CORE2_BR_INST_RETIRED_MISPRED_TAKEN, + PNE_CORE2_BR_INST_RETIRED_PRED_NOT_TAKEN, + PNE_CORE2_BR_INST_RETIRED_PRED_TAKEN, + PNE_CORE2_BR_INST_RETIRED_TAKEN, + PNE_CORE2_BR_MISSP_EXEC, + PNE_CORE2_BR_RET_MISSP_EXEC, + PNE_CORE2_BR_RET_BAC_MISSP_EXEC, + PNE_CORE2_BR_RET_EXEC, + PNE_CORE2_BR_TKN_BUBBLE_1, + PNE_CORE2_BR_TKN_BUBBLE_2, + PNE_CORE2_BUSQ_EMPTY, + PNE_CORE2_BUS_BNR_DRV, + PNE_CORE2_BUS_DATA_RCV, + PNE_CORE2_BUS_DRDY_CLOCKS, + PNE_CORE2_BUS_HIT_DRV, + PNE_CORE2_BUS_HITM_DRV, + PNE_CORE2_BUS_IO_WAIT, + PNE_CORE2_BUS_LOCK_CLOCKS, + PNE_CORE2_BUS_REQUEST_OUTSTANDING, + PNE_CORE2_BUS_TRANS_ANY, + PNE_CORE2_BUS_TRANS_BRD, + PNE_CORE2_BUS_TRANS_BURST, + PNE_CORE2_BUS_TRANS_DEF, + PNE_CORE2_BUS_TRANS_IFETCH, + PNE_CORE2_BUS_TRANS_INVAL, + PNE_CORE2_BUS_TRANS_IO, + PNE_CORE2_BUS_TRANS_MEM, + PNE_CORE2_BUS_TRANS_P, + PNE_CORE2_BUS_TRANS_PWR, + PNE_CORE2_BUS_TRANS_RFO, + PNE_CORE2_BUS_TRANS_WB, + PNE_CORE2_CMP_SNOOP, + PNE_CORE2_CPU_CLK_UNHALTED_BUS, + PNE_CORE2_CPU_CLK_UNHALTED_CORE_P, + PNE_CORE2_CPU_CLK_UNHALTED_NO_OTHER, + PNE_CORE2_CYCLES_DIV_BUSY, + PNE_CORE2_CYCLES_INT_MASKED, + PNE_CORE2_CYCLES_INT_PENDING_AND_MASKED, + PNE_CORE2_CYCLES_L1I_MEM_STALLED, + PNE_CORE2_DELAYED_BYPASS_FP, + PNE_CORE2_DELAYED_BYPASS_LOAD, + PNE_CORE2_DELAYED_BYPASS_SIMD, + PNE_CORE2_DIV, + PNE_CORE2_DTLB_MISSES_ANY, + PNE_CORE2_DTLB_MISSES_L0_MISS_LD, + PNE_CORE2_DTLB_MISSES_MISS_LD, + PNE_CORE2_DTLB_MISSES_MISS_ST, + PNE_CORE2_EIST_TRANS, + PNE_CORE2_ESP_ADDITIONS, + PNE_CORE2_ESP_SYNCH, + PNE_CORE2_EXT_SNOOP, + PNE_CORE2_FP_ASSIST, + PNE_CORE2_FP_COMP_OPS_EXE, + PNE_CORE2_FP_MMX_TRANS_TO_FP, + PNE_CORE2_FP_MMX_TRANS_TO_MMX, + PNE_CORE2_HW_INT_RCV, + PNE_CORE2_IDLE_DURING_DIV, + PNE_CORE2_ILD_STALL, + PNE_CORE2_INST_QUEUE_FULL, + PNE_CORE2_INST_RETIRED_ANY_P, + PNE_CORE2_INST_RETIRED_LOADS, + PNE_CORE2_INST_RETIRED_OTHER, + PNE_CORE2_INST_RETIRED_STORES, + PNE_CORE2_ITLB_FLUSH, + PNE_CORE2_ITLB_LARGE_MISS, + PNE_CORE2_ITLB_MISSES, + PNE_CORE2_ITLB_SMALL_MISS, + PNE_CORE2_ITLB_MISS_RETIRED, + PNE_CORE2_L1D_ALL_CACHE_REF, + PNE_CORE2_L1D_ALL_REF, + PNE_CORE2_L1D_CACHE_LD, + PNE_CORE2_L1D_CACHE_LOCK, + PNE_CORE2_L1D_CACHE_LOCK_DURATION, + PNE_CORE2_L1D_CACHE_ST, + PNE_CORE2_L1D_M_EVICT, + PNE_CORE2_L1D_M_REPL, + PNE_CORE2_L1D_PEND_MISS, + PNE_CORE2_L1D_PREFETCH_REQUESTS, + PNE_CORE2_L1D_REPL, + PNE_CORE2_L1D_SPLIT_LOADS, + PNE_CORE2_L1D_SPLIT_STORES, + PNE_CORE2_L1I_MISSES, + PNE_CORE2_L1I_READS, + PNE_CORE2_L2_ADS, + PNE_CORE2_L2_DBUS_BUSY_RD, + PNE_CORE2_L2_IFETCH, + PNE_CORE2_L2_LD, + PNE_CORE2_L2_LINES_IN, + PNE_CORE2_L2_LINES_OUT, + PNE_CORE2_L2_LOCK, + PNE_CORE2_L2_M_LINES_IN, + PNE_CORE2_L2_M_LINES_OUT, + PNE_CORE2_L2_NO_REQ, + PNE_CORE2_L2_REJECT_BUSQ, + PNE_CORE2_L2_RQSTS, + PNE_CORE2_L2_RQSTS_SELF_DEMAND_I_STATE, + PNE_CORE2_L2_RQSTS_SELF_DEMAND_MESI, + PNE_CORE2_L2_ST, + PNE_CORE2_LOAD_BLOCK_L1D, + PNE_CORE2_LOAD_BLOCK_OVERLAP_STORE, + PNE_CORE2_LOAD_BLOCK_STA, + PNE_CORE2_LOAD_BLOCK_STD, + PNE_CORE2_LOAD_BLOCK_UNTIL_RETIRE, + PNE_CORE2_LOAD_HIT_PRE, + PNE_CORE2_MACHINE_NUKES_MEM_ORDER, + PNE_CORE2_MACHINE_NUKES_SMC, + PNE_CORE2_MACRO_INSTS_CISC_DECODED, + PNE_CORE2_MACRO_INSTS_DECODED, + PNE_CORE2_MEMORY_DISAMBIGUATION_RESET, + PNE_CORE2_MEMORY_DISAMBIGUATION_SUCCESS, + PNE_CORE2_MEM_LOAD_RETIRED_DTLB_MISS, + PNE_CORE2_MEM_LOAD_RETIRED_L1D_LINE_MISS, + PNE_CORE2_MEM_LOAD_RETIRED_L1D_MISS, + PNE_CORE2_MEM_LOAD_RETIRED_L2_LINE_MISS, + PNE_CORE2_MEM_LOAD_RETIRED_L2_MISS, + PNE_CORE2_MUL, + PNE_CORE2_PAGE_WALKS_COUNT, + PNE_CORE2_PAGE_WALKS_CYCLES, + PNE_CORE2_PREF_RQSTS_DN, + PNE_CORE2_PREF_RQSTS_UP, + PNE_CORE2_RAT_STALLS_ANY, + PNE_CORE2_RAT_STALLS_FLAGS, + PNE_CORE2_RAT_STALLS_FPSW, + PNE_CORE2_RAT_STALLS_PARTIAL_CYCLES, + PNE_CORE2_RAT_STALLS_ROB_READ_PORT, + PNE_CORE2_RESOURCE_STALLS_ANY, + PNE_CORE2_RESOURCE_STALLS_BR_MISS_CLEAR, + PNE_CORE2_RESOURCE_STALLS_FPCW, + PNE_CORE2_RESOURCE_STALLS_LD_ST, + PNE_CORE2_RESOURCE_STALLS_ROB_FULL, + PNE_CORE2_RESOURCE_STALLS_RS_FULL, + PNE_CORE2_RS_UOPS_DISPATCHED, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT0, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT1, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT2, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT3, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT4, + PNE_CORE2_RS_UOPS_DISPATCHED_PORT5, + PNE_CORE2_SB_DRAIN_CYCLES, + PNE_CORE2_SEGMENT_REG_LOADS, + PNE_CORE2_SEG_REG_RENAMES_ANY, + PNE_CORE2_SEG_REG_RENAMES_DS, + PNE_CORE2_SEG_REG_RENAMES_ES, + PNE_CORE2_SEG_REG_RENAMES_FS, + PNE_CORE2_SEG_REG_RENAMES_GS, + PNE_CORE2_SEG_RENAME_STALLS_ANY, + PNE_CORE2_SEG_RENAME_STALLS_DS, + PNE_CORE2_SEG_RENAME_STALLS_ES, + PNE_CORE2_SEG_RENAME_STALLS_FS, + PNE_CORE2_SEG_RENAME_STALLS_GS, + PNE_CORE2_SIMD_ASSIST, + PNE_CORE2_SIMD_COMP_INST_RETIRED_PACKED_DOUBLE, + PNE_CORE2_SIMD_COMP_INST_RETIRED_PACKED_SINGLE, + PNE_CORE2_SIMD_COMP_INST_RETIRED_SCALAR_DOUBLE, + PNE_CORE2_SIMD_COMP_INST_RETIRED_SCALAR_SINGLE, + PNE_CORE2_SIMD_INSTR_RETIRED, + PNE_CORE2_SIMD_INST_RETIRED_ANY, + PNE_CORE2_SIMD_INST_RETIRED_PACKED_DOUBLE, + PNE_CORE2_SIMD_INST_RETIRED_PACKED_SINGLE, + PNE_CORE2_SIMD_INST_RETIRED_SCALAR_DOUBLE, + PNE_CORE2_SIMD_INST_RETIRED_SCALAR_SINGLE, + PNE_CORE2_SIMD_INST_RETIRED_VECTOR, + PNE_CORE2_SIMD_SAT_INSTR_RETIRED, + PNE_CORE2_SIMD_SAT_UOP_EXEC, + PNE_CORE2_SIMD_UOPS_EXEC, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_ARITHMETIC, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_LOGICAL, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_MUL, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_PACK, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_SHIFT, + PNE_CORE2_SIMD_UOP_TYPE_EXEC_UNPACK, + PNE_CORE2_SNOOP_STALL_DRV, + PNE_CORE2_SSE_PRE_EXEC_L1, + PNE_CORE2_SSE_PRE_EXEC_L2, + PNE_CORE2_SSE_PRE_EXEC_NTA, + PNE_CORE2_SSE_PRE_EXEC_STORES, + PNE_CORE2_SSE_PRE_MISS_L1, + PNE_CORE2_SSE_PRE_MISS_L2, + PNE_CORE2_SSE_PRE_MISS_NTA, + PNE_CORE2_STORE_BLOCK_ORDER, + PNE_CORE2_STORE_BLOCK_SNOOP, + PNE_CORE2_THERMAL_TRIP, + PNE_CORE2_UOPS_RETIRED_ANY, + PNE_CORE2_UOPS_RETIRED_FUSED, + PNE_CORE2_UOPS_RETIRED_LD_IND_BR, + PNE_CORE2_UOPS_RETIRED_MACRO_FUSION, + PNE_CORE2_UOPS_RETIRED_NON_FUSED, + PNE_CORE2_UOPS_RETIRED_STD_STA, + PNE_CORE2_X87_OPS_RETIRED_ANY, + PNE_CORE2_X87_OPS_RETIRED_FXCH, + PNE_CORE2_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t Core2Processor_info[]; +extern hwi_search_t Core2Processor_map[]; + +#endif diff --git a/src/freebsd/map-i7.c b/src/freebsd/map-i7.c new file mode 100644 index 0000000..0f01a69 --- /dev/null +++ b/src/freebsd/map-i7.c @@ -0,0 +1,509 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-i7.c +* Author: George Neville-Neil +* gnn@freebsd.org +* Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + + /**************************************************************************** + i7 SUBSTRATE + i7 SUBSTRATE + i7 SUBSTRATE + i7 SUBSTRATE + i7 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_i7 must match i7_info +*/ + +Native_Event_LabelDescription_t i7Processor_info[] = +{ + {"SB_FORWARD.ANY", "Counts the number of store forwards. "}, + {"LOAD_BLOCK.STD", "Counts the number of loads blocked by a preceding store with unknown data."}, + {"LOAD_BLOCK.ADDRESS_OFFSET", "Counts the number of loads blocked by a preceding store address."}, + {"SB_DRAIN.CYCLES", "Counts the cycles of store buffer drains."}, + {"MISALIGN_MEM_REF.LOAD", "Counts the number of misaligned load references."}, + {"MISALIGN_MEM_REF.STORE", "Counts the number of misaligned store references."}, + {"MISALIGN_MEM_REF.ANY", "Counts the number of misaligned memory references."}, + {"STORE_BLOCKS.NOT_STA", "This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load."}, + {"STORE_BLOCKS.STA", "This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)."}, + {"STORE_BLOCKS.AT_RET", "Counts number of loads delayed with at-Retirement block code. The following loads need to be executed at retirement and wait for all senior stores on the same thread to be drained: load splitting across 4K boundary (page split), load accessing uncacheable (UC or USWC) memory, load lock, and load with page table in UC or USWC memory region."}, + {"STORE_BLOCKS.L1D_BLOCK", "Cacheable loads delayed with L1D block code."}, + {"STORE_BLOCKS.ANY", "All loads delayed due to store blocks."}, + {"PARTIAL_ADDRESS_ALIAS", "Counts false dependency due to partial address aliasing."}, + {"DTLB_LOAD_MISSES.ANY", "Counts all load misses that cause a page walk."}, + {"DTLB_LOAD_MISSES.WALK_COMPLETED", "Counts number of completed page walks due to load miss in the STLB."}, + {"DTLB_LOAD_MISSES.STLB_HIT", "Number of cache load STLB hits."}, + {"DTLB_LOAD_MISSES.PDE_MISS", "Number of DTLB cache load misses where the low part of the linear to physical address translation was missed."}, + {"DTLB_LOAD_MISSES.PDP_MISS", "Number of DTLB cache load misses where the high part of the linear to physical address translation was missed."}, + {"DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to load miss in the STLB."}, + {"MEMORY_DISAMBIGURATION.RESET", "Counts memory disambiguration reset cycles."}, + {"MEMORY_DISAMBIGURATION.SUCCESS", "Counts the number of loads that memory disambiguration succeeded."}, + {"MEMORY_DISAMBIGURATION.WATCHDOG", "Counts the number of times the memory disambiguration watchdog kicked in."}, + {"MEMORY_DISAMBIGURATION.WATCH_CYCLES", "Counts the cycles that the memory disambiguration watchdog is active."}, + {"MEM_INST_RETIRED.LOADS", "Counts the number of instructions with an architecturally-visible store retired on the architected path."}, + {"MEM_INST_RETIRED.STORES", "Counts the number of instructions with an architecturally-visible store retired on the architected path."}, + {"MEM_STORE_RETIRED.DTLB_MISS", "The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not counter prefetches."}, + {"UOPS_ISSUED.ANY", "Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, + {"UOPS_ISSUED.FUSED", "Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station."}, + {"MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM", "Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket."}, + {"MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only counts locally homed lines."}, + {"MEM_UNCORE_RETIRED.REMOTE_DRAM", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and was remotely homed. This includes both DRAM access and HITM in a remote socket's cache for remotely homed lines."}, + {"MEM_UNCORE_RETIRED.LOCAL_DRAM", "Counts number of memory load instructions retired where the memory reference missed the L1, L2 and L3 caches and required a local socket memory reference. This includes locally homed cachelines that were in a modified state in another socket."}, + {"FP_COMP_OPS_EXE.X87", "Counts the number of FP Computational Uops Executed. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction."}, + {"FP_COMP_OPS_EXE.MMX", "Counts number of MMX Uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP", "Counts number of SSE and SSE2 FP uops executed."}, + {"FP_COMP_OPS_EXE.SSE2_INTEGER", "Counts number of SSE2 integer uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP_PACKED", "Counts number of SSE FP packed uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP_SCALAR", "Counts number of SSE FP scalar uops executed."}, + {"FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION", "Counts number of SSE* FP single precision uops executed."}, + {"FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION", "Counts number of SSE* FP double precision uops executed."}, + {"SIMD_INT_128.PACKED_MPY", "Counts number of 128 bit ED_MPY integer multiply operations."}, + {"SIMD_INT_128.PACKED_SHIFT", "Counts number of 128 bit SIMD integer shift operations."}, + {"SIMD_INT_128.PACK", " Counts number of 128 bit SIMD integer pack operations."}, + {"SIMD_INT_128.UNPACK", "Counts number of 128 bit SIMD integer unpack operations."}, + {"SIMD_INT_128.PACKED_LOGICAL", "Counts number of 128 bit SIMD integer logical operations."}, + {"SIMD_INT_128.PACKED_ARITH", "Counts number of 128 bit SIMD integer arithmetic operations."}, + {"SIMD_INT_128.SHUFFLE_MOVE", "Counts number of 128 bit SIMD integer shuffle and move operations."}, + {"LOAD_DISPATCH.RS", "Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer."}, + {"LOAD_DISPATCH.RS_DELAYED", "Counts the number of delayed RS dispatches at the stage latch. If an RS dispatch can not bypass to LB, it has another chance to dispatch from the one-cycle delayed staging latch before it is written into the LB."}, + {"LOAD_DISPATCH.MOB", "Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer."}, + {"LOAD_DISPATCH.ANY", "Counts all loads dispatched from the Reservation Station."}, + {"ARITH.CYCLES_DIV_BUSY", "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE."}, + {"ARITH.MUL", "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD."}, + {"INST_QUEUE_WRITES", "Counts the number of instructions written into the instruction queue every cycle."}, + {"INST_DECODED.DEC0", "Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop"}, + {"TWO_UOP_INSTS_DECODED", "An instruction that generates two uops was decoded."}, + {"HW_INT.RCV", "Number of interrupts received."}, + {"HW_INT.CYCLES_MASKED", "Number of cycles interrupts are masked."}, + {"HW_INT.CYCLES_PENDING_AND_MASKED", "Number of cycles interrupts are pending and masked."}, + {"INST_QUEUE_WRITE_CYCLES", "This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. If SSE* instructions that are 6 bytes or longer arrive one after another, then front end throughput may limit execution speed. "}, + {"L2_RQSTS.LD_HIT", "Counts number of loads that hit the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches. L2 loads can be rejected for various reasons. Only non rejected loads are counted."}, + {"L2_RQSTS.LD_MISS", "Counts the number of loads that miss the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches."}, + {"L2_RQSTS.LOADS", "Counts all L2 load requests. L2 loads include both L1D demand misses as well as L1D prefetches."}, + {"L2_RQSTS.RFO_HIT", "Counts the number of store RFO requests that hit the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Count includes WC memory requests, where the data is not fetched but the permission to write the line is required."}, + {"L2_RQSTS.RFO_MISS", "Counts the number of store RFO requests that miss the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, + {"L2_RQSTS.RFOS", "Counts all L2 store RFO requests. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, + {"L2_RQSTS.IFETCH_HIT", "Counts number of instruction fetches that hit the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.IFETCH_MISS", "Counts number of instruction fetches that miss the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.IFETCHES", "Counts all instruction fetches. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.PREFETCH_HIT", "Counts L2 prefetch hits for both code and data."}, + {"L2_RQSTS.PREFETCH_MISS", "Counts L2 prefetch misses for both code and data."}, + {"L2_RQSTS.PREFETCHES", "Counts all L2 prefetches for both code and data."}, + {"L2_RQSTS.MISS", "Counts all L2 misses for both code and data."}, + {"L2_RQSTS.REFERENCES", "Counts all L2 requests for both code and data."}, + {"L2_DATA_RQSTS.DEMAND.I_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.S_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.E_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.M_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.MESI", "Counts all L2 data demand requests. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.PREFETCH.I_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, + {"L2_DATA_RQSTS.PREFETCH.S_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state. A prefetch RFO will miss on an S state line, while a prefetch read will hit on an S state line."}, + {"L2_DATA_RQSTS.PREFETCH.E_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state."}, + {"L2_DATA_RQSTS.PREFETCH.M_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state."}, + {"L2_DATA_RQSTS.PREFETCH.MESI", "Counts all L2 prefetch requests."}, + {"L2_DATA_RQSTS.ANY", "Counts all L2 data requests."}, + {"L2_WRITE.RFO.I_STATE", "Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.S_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,. This is a demand RFO request."}, + {"L2_WRITE.RFO.E_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.M_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.HIT", "Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.MESI", "Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.LOCK.I_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, + {"L2_WRITE.LOCK.S_STATE", "Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state."}, + {"L2_WRITE.LOCK.E_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state."}, + {"L2_WRITE.LOCK.M_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state."}, + {"L2_WRITE.LOCK.HIT", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state."}, + {"L2_WRITE.LOCK.MESI", "Counts all L2 demand lock RFO requests."}, + {"L1D_WB_L2.I_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i.e. a cache miss."}, + {"L1D_WB_L2.S_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state."}, + {"L1D_WB_L2.E_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state."}, + {"L1D_WB_L2.M_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state."}, + {"L1D_WB_L2.MESI", "Counts all L1 writebacks to the L2."}, + {"L3_LAT_CACHE.REFERENCE", "This event counts requests originating from the core that reference a cache line in the last level cache. The event count includes speculative traffic but excludes cache line fills due to a L2 hardware-prefetch. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended."}, + {"L3_LAT_CACHE.MISS", "This event counts each cache miss condition for references to the last level cache. The event count may include speculative traffic but excludes cache line fills due to L2 hardware-prefetches. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended."}, + {"CPU_CLK_UNHALTED.THREAD_P", "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling."}, + {"CPU_CLK_UNHALTED.REF_P", "Increments at the frequency of TSC when not halted."}, + {"UOPS_DECODED.DEC0", "Counts micro-ops decoded by decoder 0."}, + {"L1D_CACHE_LD.I_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i.e. the read request missed the cache. Counter 0, 1 only."}, + {"L1D_CACHE_LD.S_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state. Counter 0, 1 only."}, + {"L1D_CACHE_LD.E_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state. Counter 0, 1 only."}, + {"L1D_CACHE_LD.M_STATE", "Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state. Counter 0, 1 only."}, + {"L1D_CACHE_LD.MESI", "Counts L1 data cache read requests. Counter 0, 1 only."}, + {"L1D_CACHE_ST.I_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state. Counter 0, 1 only."}, + {"L1D_CACHE_ST.S_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state. Counter 0, 1 only."}, + {"L1D_CACHE_ST.E_STATE", "Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state. Counter 0, 1 only."}, + {"L1D_CACHE_ST.M_STATE", "Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state. Counter 0, 1 only."}, + {"L1D_CACHE_ST.MESI", "Counts L1 data cache store RFO requests. Counter 0, 1 only."}, + {"L1D_CACHE_LOCK.HIT", "Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer. The lock portion of the load lock transaction must hit in the L1D. The initial load will pull the lock into the L1 data cache. Counter 0, 1 only."}, + {"L1D_CACHE_LOCK.S_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the shared state. Counter 0, 1 only."}, + {"L1D_CACHE_LOCK.E_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the exclusive state. Counter 0, 1 only."}, + {"L1D_CACHE_LOCK.M_STATE", "Counts L1 data cache retired load locks that hit the target cache line in the modified state. Counter 0, 1 only."}, + {"L1D_ALL_REF.ANY", "Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types. The event counts memory accesses only when they are actually performed. For example, a load blocked by unknown store address and later performed is only counted once. The event does not include non- memory accesses, such as I/O accesses. Counter 0, 1 only."}, + {"L1D_ALL_REF.CACHEABLE", "Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations. Counter 0, 1 only."}, + {"L1D_PEND_MISS.LOAD_BUFFERS_FULL", "Counts cycles of L1 data cache load fill buffers full. Counter 0, 1 only."}, + {"DTLB_MISSES.ANY", "Counts the number of misses in the STLB which causes a page walk."}, + {"DTLB_MISSES.WALK_COMPLETED", "Counts number of misses in the STLB which resulted in a completed page walk."}, + {"DTLB_MISSES.STLB_HIT", "Counts the number of DTLB first level misses that hit in the second level TLB. This event is only relevant if the core contains multiple DTLB levels."}, + {"DTLB_MISSES.PDE_MISS", "Number of DTLB cache misses where the low part of the linear to physical address translation was missed."}, + {"DTLB_MISSES.PDP_MISS", "Number of DTLB misses where the high part of the linear to physical address translation was missed."}, + {"DTLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, + {"SSE_MEM_EXEC.NTA", "Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache."}, + {"SSE_MEM_EXEC.STREAMING_STORES", "Counts number of SSE non- temporal stores."}, + {"LOAD_HIT_PRE", "Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished."}, + {"SFENCE_CYCLES", "Counts store fence cycles."}, + {"L1D_PREFETCH.REQUESTS", "Counts number of hardware prefetch requests dispatched out of the prefetch FIFO."}, + {"L1D_PREFETCH.MISS", "Counts number of hardware prefetch requests that miss the L1D. There are two prefetchers in the L1D. A streamer, which predicts lines sequentially after this one should be fetched, and the IP prefetcher that remembers access patterns for the current instruction. The streamer prefetcher stops on an L1D hit, while the IP prefetcher does not."}, + {"L1D_PREFETCH.TRIGGERS", "Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO. Some of the prefetch requests are dropped due to overwrites or competition between the IP index prefetcher and streamer prefetcher. The prefetch FIFO contains 4 entries."}, + {"EPT.EPDE_MISS", "Counts Extended Page Directory Entry misses. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches."}, + {"EPT.EPDPE_HIT", "Counts Extended Page Directory Pointer Entry hits."}, + {"EPT.EPDPE_MISS", "Counts Extended Page Directory Pointer Entry misses."}, + {"L1D.REPL", "Counts the number of lines brought into the L1 data cache. Counter 0, 1 only."}, + {"L1D.M_REPL", "Counts the number of modified lines brought into the L1 data cache. Counter 0, 1 only."}, + {"L1D.M_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to replacement. Counter 0, 1 only."}, + {"L1D.M_SNOOP_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention. Counter 0, 1 only."}, + {"L1D_CACHE_PREFETCH_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated instructions accepted into the fill buffer."}, + {"L1D_CACHE_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA", "Counts weighted cycles of offcore demand data read requests. Does not include L2 prefetch requests."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE", "Counts weighted cycles of offcore demand code read requests. Does not include L2 prefetch requests."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO", "Counts weighted cycles of offcore demand RFO requests. Does not include L2 prefetch requests."}, + {"OFFCORE_REQUESTS_OUTSTANDING.ANY.READ", "Counts weighted cycles of offcore read requests of any kind. Include L2 prefetch requests."}, + {"CACHE_LOCK_CYCLES.L1D_L2", "Cycle count during which the L1D and L2 are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. Counter 0, 1 only.L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such accesses."}, + {"CACHE_LOCK_CYCLES.L1D", "Counts the number of cycles that cacheline in the L1 data cache unit is locked. Counter 0, 1 only."}, + {"IO_TRANSACTIONS", "Counts the number of completed I/O transactions."}, + {"L1I.HITS", "Counts all instruction fetches that hit the L1 instruction cache."}, + {"L1I.MISSES", "Counts all instruction fetches that miss the L1I cache. This includes instruction cache misses, streaming buffer misses, victim cache misses and uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."}, + {"L1I.READS", "Counts all instruction fetches, including uncacheable fetches that bypass the L1I."}, + {"L1I.CYCLES_STALLED", "Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault."}, + {"IFU_IVC.FULL", "Instruction Fetch unit victim cache full."}, + {"IFU_IVC.L1I_EVICTION", "L1 Instruction cache evictions."}, + {"LARGE_ITLB.HIT", "Counts number of large ITLB hits."}, + {"L1I_OPPORTUNISTIC_HITS", "Opportunistic hits in streaming."}, + {"ITLB_MISSES.ANY", "Counts the number of misses in all levels of the ITLB which causes a page walk."}, + {"ITLB_MISSES.WALK_COMPLETED", "Counts number of misses in all levels of the ITLB which resulted in a completed page walk."}, + {"ITLB_MISSES.WALK_CYCLES", "Counts ITLB miss page walk cycles."}, + {"ITLB_MISSES.STLB_HIT", "Counts the number of ITLB misses that hit in the second level TLB."}, + {"ITLB_MISSES.PDE_MISS", "Number of ITLB misses where the low part of the linear to physical address translation was missed."}, + {"ITLB_MISSES.PDP_MISS", "Number of ITLB misses where the high part of the linear to physical address translation was missed."}, + {"ITLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, + {"ILD_STALL.ANY", ""}, + {"ILD_STALL.IQ_FULL", ""}, + {"ILD_STALL.LCP", "Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX.W (for EM64T) instructions which change the length of the decoded instruction."}, + {"ILD_STALL.MRU", ""}, + {"ILD_STALL.REGEN", ""}, + {"BR_INST_EXEC.ANY", "Counts all near executed branches (not necessarily retired). This includes only instructions and not micro-op branches. Frequent branching is not necessarily a major performance issue. However frequent branch mispredictions may be a problem."}, + {"BR_INST_EXEC.COND", ""}, + {"BR_INST_EXEC.DIRECT", ""}, + {"BR_INST_EXEC.DIRECT_NEAR_CALL", ""}, + {"BR_INST_EXEC.INDIRECT_NEAR_CALL", ""}, + {"BR_INST_EXEC.INDIRECT_NON_CALL", ""}, + {"BR_INST_EXEC.NEAR_CALLS", ""}, + {"BR_INST_EXEC.NON_CALLS", ""}, + {"BR_INST_EXEC.RETURN_NEAR", ""}, + {"BR_INST_EXEC.TAKEN", ""}, + {"BR_MISP_EXEC.COND", "Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired."}, + {"BR_MISP_EXEC.DIRECT", "Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)."}, + {"BR_MISP_EXEC.INDIRECT_NON_CALL", "Counts the number of executed mispredicted indirect near branch instructions that are not calls."}, + {"BR_MISP_EXEC.NON_CALLS", "Counts mispredicted non call near branches executed, but not necessarily retired."}, + {"BR_MISP_EXEC.RETURN_NEAR", "Counts mispredicted indirect branches that have a rear return mnemonic."}, + {"BR_MISP_EXEC.DIRECT_NEAR_CALL", "Counts mispredicted non-indirect near calls executed, (should always be 0)."}, + {"BR_MISP_EXEC.INDIRECT_NEAR_CALL", "Counts mispredicted indirect near calls exeucted, including both register and memory indirect."}, + {"BR_MISP_EXEC.NEAR_CALLS", "Counts all mispredicted near call branches executed, but not necessarily retired."}, + {"BR_MISP_EXEC.TAKEN", "Counts executed mispredicted near branches that are taken, but not necessarily retired."}, + {"BR_MISP_EXEC.ANY", "Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired."}, + {"RESOURCE_STALLS.ANY", "Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. Does not include stalls due to SuperQ (off core) queue full, too many cache misses, etc."}, + {"RESOURCE_STALLS.LOAD", "Counts the cycles of stall due to lack of load buffer for load operation."}, + {"RESOURCE_STALLS.RS_FULL", "This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle. A high count of this event indicates that there are long latency operations in the pipe (possibly load and store operations that miss the L2 cache, or instructions dependent upon instructions further down the pipeline that have yet to retire. When RS is full, new instructions can not enter the reservation station and start execution."}, + {"RESOURCE_STALLS.STORE", "This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i.e. all store buffers are used). The stall ends when a store instruction commits its data to the cache or memory."}, + {"RESOURCE_STALLS.ROB_FULL", "Counts the cycles of stall due to re- order buffer full."}, + {"RESOURCE_STALLS.FPCW", "Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word."}, + {"RESOURCE_STALLS.MXCSR", "Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename. The MXCSR provides control and status for the MMX registers."}, + {"RESOURCE_STALLS.OTHER", "Counts the number of cycles while execution was stalled due to other resource issues."}, + {"MACRO_INSTS.FUSIONS_DECODED", "Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired."}, + {"BACLEAR_FORCE_IQ", "Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediciton direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, + {"LSD.UOPS", "Counts the number of micro-ops delivered by loop stream detector Use cmask=1 and invert to count cycles."}, + {"ITLB.FLUSH", "Counts the number of ITLB flushes."}, + {"OFFCORE_REQUESTS.DEMAND.READ_DATA", "Counts number of offcore demand data read requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.DEMAND.READ_CODE", "Counts number of offcore demand code read requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.DEMAND.RFO", "Counts number of offcore demand RFO requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.ANY.READ", "Counts number of offcore read requests. Includes L2 prefetch requests."}, + {"OFFCORE_REQUESTS.ANY.RFO", "Counts number of offcore RFO requests. Includes L2 prefetch requests."}, + {"OFFCORE_REQUESTS.UNCACHED_MEM", "Counts number of offcore uncached memory requests."}, + {"OFFCORE_REQUESTS.L1D_WRITEBACK", "Counts number of L1D writebacks to the uncore."}, + {"OFFCORE_REQUESTS.ANY", "Counts all offcore requests."}, + {"UOPS_EXECUTED.PORT0", "Counts number of Uops executed that were issued on port 0. Port 0 handles integer arithmetic, SIMD and FP add Uops."}, + {"UOPS_EXECUTED.PORT1", "Counts number of Uops executed that were issued on port 1. Port 1 handles integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops."}, + {"UOPS_EXECUTED.PORT2_CORE", "Counts number of Uops executed that were issued on port 2. Port 2 handles the load Uops. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT3_CORE", "Counts number of Uops executed that were issued on port 3. Port 3 handles store Uops. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT4_CORE", "Counts number of Uops executed that where issued on port 4. Port 4 handles the value to be stored for the store Uops issued on port 3. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT5", "Counts number of Uops executed that where issued on port 5."}, + {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES", "Counts cycles when the Uops are executing."}, + {"UOPS_EXECUTED.PORT015", "Counts number of Uops executed that where issued on port 0, 1, or 5. use cmask=1, invert=1 to count stall cycles."}, + {"UOPS_EXECUTED.PORT234", "Counts number of Uops executed that where issued on port 2, 3, or 4."}, + {"OFFCORE_REQUESTS_SQ_FULL", "Counts number of cycles the SQ is full to handle off-core requests."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.DATA", "Counts weighted cycles of snoopq requests for data. Counter 0 only Use cmask=1 to count cycles not empty."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE", "Counts weighted cycles of snoopq invalidate requests. Counter 0 only Use cmask=1 to count cycles not empty."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.CODE", "Counts weighted cycles of snoopq requests for code. Counter 0 only Use cmask=1 to count cycles not empty."}, + {"OFF_CORE_RESPONSE_0", "see Section 19.17.1.3, ?Off-core Response Performance Monitoring in the Processor Core?"}, + {"SNOOP_RESPONSE.HIT", "Counts HIT snoop response sent by this thread in response to a snoop request."}, + {"SNOOP_RESPONSE.HITE", "Counts HIT E snoop response sent by this thread in response to a snoop request."}, + {"SNOOP_RESPONSE.HITM", "Counts HIT M snoop response sent by this thread in response to a snoop request."}, + {"PIC_ACCESSES.TPR_READS", "Counts number of TPR reads."}, + {"PIC_ACCESSES.TPR_WRITES", "Counts number of TPR writes."}, + {"INST_RETIRED.ANY_P", "See Table A-1 Notes: INST_RETIRED.ANY is counted by a designated fixed counter. INST_RETIRED.ANY_P is counted by a programmable counter and is an architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions."}, + {"INST_RETIRED.X87", "Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions."}, + {"UOPS_RETIRED.ANY", "Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two micro- ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. Use cmask=1 and invert to count active cycles or stalled cycles."}, + {"UOPS_RETIRED.RETIRE_SLOTS", "Counts the number of retirement slots used each cycle."}, + {"UOPS_RETIRED.MACRO_FUSED", "Counts number of macro-fused uops retired."}, + {"MACHINE_CLEARS.CYCLES", "Counts the cycles machine clear is asserted."}, + {"MACHINE_CLEARS.MEM_ORDER", "Counts the number of machine clears due to memory order conflicts."}, + {"MACHINE_CLEARS.SMC", "Counts the number of times that a program writes to a code section. Self-modifying code causes a sever penalty in all Intel 64 and IA-32 processors. The modified cache line is written back to the L2 and L3caches."}, + {"MACHINE_CLEARS.FUSION_ASSIST", "Counts the number of macro-fusion assists."}, + {"BR_INST_RETIRED.ALL_BRANCHES", "See Table A-1."}, + {"BR_INST_RETIRED.CONDITIONAL", "Counts the number of conditional branch instructions retired."}, + {"BR_INST_RETIRED.NEAR_CALL", "Counts the number of direct & indirect near unconditional calls retired."}, + {"BR_MISP_RETIRED.ALL_BRANCHES", "See Table A-1."}, + {"BR_MISP_RETIRED.NEAR_CALL", "Counts mispredicted direct & indirect near unconditional retired calls."}, + {"SSEX_UOPS_RETIRED.PACKED_SINGLE", "Counts SIMD packed single- precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.SCALAR_SINGLE", "Counts SIMD calar single-precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.PACKED_DOUBLE", "Counts SIMD packed double- precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.SCALAR_DOUBLE", "Counts SIMD scalar double-precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.VECTOR_INTEGER", "Counts 128-bit SIMD vector integer Uops retired."}, + {"ITLB_MISS_RETIRED", "Counts the number of retired instructions that missed the ITLB when the instruction was fetched."}, + {"MEM_LOAD_RETIRED.L1D_HIT", "Counts number of retired loads that hit the L1 data cache."}, + {"MEM_LOAD_RETIRED.L2_HIT", "Counts number of retired loads that hit the L2 data cache."}, + {"MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM", "Counts number of retired loads that hit in a sibling core's L2 (on die core). Since the L3 is inclusive of all cores on the package, this is an L3 hit. This counts both clean or modified hits."}, + {"MEM_LOAD_RETIRED.HIT_LFB", "Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache. This is counting secondary L1D misses."}, + {"MEM_LOAD_RETIRED.DTLB_MISS", "Counts the number of retired loads that missed the DTLB. The DTLB miss is not counted if the load operation causes a fault. This event counts loads from cacheable memory only. The event does not count loads by software prefetches. Counts both primary and secondary misses to the TLB."}, + {"MEM_LOAD_RETIRED.L3_MISS", "Counts number of retired loads that miss the L3 cache."}, + {"MEM_LOAD_RETIRED.L3_UNSHARED_HIT", "Couns number of retired loads that hit their own, unshared lines in the L3 cache."}, + {"FP_MMX_TRANS.TO_FP", "Counts the first floating-point instruction following any MMX instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"FP_MMX_TRANS.TO_MMX", "Counts the first MMX instruction following a floating-point instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"FP_MMX_TRANS.ANY", "Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"MACRO_INSTS.DECODED", "Counts the number of instructions decoded, (but not necessarily executed or retired)."}, + {"UOPS_DECODED.MS", "Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS delivers uops when the instruction is more than 4 uops long or a microcode assist is occurring."}, + {"UOPS_DECODED.ESP_FOLDING", "Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc. ESP instructions do not generate a Uop to increment or decrement ESP. Instead, they update an ESP_Offset register that keeps track of the delta to the current value of the ESP register."}, + {"UOPS_DECODED.ESP_SYNC", "Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register."}, + {"RAT_STALLS.FLAGS", "Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall. A partial register stall may occur when two conditions are met: 1) an instruction modifies some, but not all, of the flags in the flag register and 2) the next instruction, which depends on flags, depends on flags that were not modified by this instruction."}, + {"RAT_STALLS.REGISTERS", "This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction."}, + {"RAT_STALLS.ROB_READ_PORT", "Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline. Note that, at this stage in the pipeline, additional stalls may occur at the same cycle and prevent the stalled micro-ops from entering the pipe. In such a case, micro-ops retry entering the execution pipe in the next cycle and the ROB-read port stall is counted again."}, + {"RAT_STALLS.SCOREBOARD", "Counts the cycles where we stall due to microarchitecturally required serialization. Microcode scoreboarding stalls."}, + {"RAT_STALLS.ANY", "Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe. Cycles when partial register stalls occurred Cycles when flag stalls occurred Cycles floating-point unit (FPU) status word stalls occurred. To count each of these conditions separately use the events: RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and RAT_STALLS.FPSW."}, + {"SEG_RENAME_STALLS", "Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front-end of the pipeline until the renamed segment retires."}, + {"ES_REG_RENAMES", "Counts the number of times the ES segment register is renamed."}, + {"UOP_UNFUSION", "Counts unfusion events due to floating point exception to a fused uop."}, + {"BR_INST_DECODED", "Counts the number of branch instructions decoded."}, + {"BOGUS_BR", "Counts the number of bogus branches."}, + {"BPU_MISSED_CALL_RET", "Counts number of times the Branch Prediciton Unit missed predicting a call or return branch."}, + {"L2_HW_PREFETCH.DATA_TRIGGER", "Count L2 HW data prefetcher triggered."}, + {"L2_HW_PREFETCH.CODE_TRIGGER", "Count L2 HW code prefetcher triggered."}, + {"L2_HW_PREFETCH.DCA_TRIGGER", "Count L2 HW DCA prefetcher triggered."}, + {"L2_HW_PREFETCH.KICK_START", "Count L2 HW prefetcher kick started."}, + {"SQ_MISC.PROMOTION", "Counts the number of L2 secondary misses that hit the Super Queue."}, + {"SQ_MISC.PROMOTION_POST_GO", "Counts the number of L2 secondary misses during the Super Queue filling L2."}, + {"SQ_MISC.LRU_HINTS", "Counts number of Super Queue LRU hints sent to L3."}, + {"SQ_MISC.FILL_DROPPED", "Counts the number of SQ L2 fills dropped due to L2 busy."}, + {"SQ_MISC.SPLIT_LOCK", "Counts the number of SQ lock splits across a cache line."}, + {"SQ_FULL_STALL_CYCLES", "Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore."}, + {"FP_ASSIST.ALL", "Counts the number of floating point operations executed that required micro-code assist intervention. Assists are required in the following cases: SSE instructions, (Denormal input when the DAZ flag is off or Underflow result when the FTZ flag is off): x87 instructions, (NaN or denormal are loaded to a register or used as input from memory, Division by 0 or Underflow output)."}, + {"FP_ASSIST.OUTPUT", "Counts number of floating point micro-code assist when the output value (destination register) is invalid."}, + {"FP_ASSIST.INPUT", "Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid."}, + {"SEGMENT_REG_LOADS", "Counts number of segment register loads."}, + {"SIMD_INT_64.PACKED_MPY", "Counts number of SID integer 64 bit packed multiply operations."}, + {"SIMD_INT_64.PACKED_SHIFT", "Counts number of SID integer 64 bit packed shift operations."}, + {"SIMD_INT_64.PACK", "Counts number of SID integer 64 bit pack operations."}, + {"SIMD_INT_64.UNPACK", "Counts number of SID integer 64 bit unpack operations."}, + {"SIMD_INT_64.PACKED_LOGICAL", "Counts number of SID integer 64 bit logical operations."}, + {"SIMD_INT_64.PACKED_ARITH", "Counts number of SID integer 64 bit arithmetic operations."}, + {"SIMD_INT_64.SHUFFLE_MOVE", "Counts number of SID integer 64 bit shift or move operations."}, + {"INSTR_RETIRED_ANY", "Instructions retired (IAF)"}, + {"CPU_CLK_UNHALTED_CORE", "Unhalted core cycles (IAF)"}, + {"CPU_CLK_UNHALTED_REF", "Unhalted reference cycles (IAF)"}, + {"GQ_CYCLES_FULL.READ_TRACKER", "Uncore cycles Global Queue read tracker is full."}, + {"GQ_CYCLES_FULL.WRITE_TRACKER", "Uncore cycles Global Queue write tracker is full."}, + {"GQ_CYCLES_FULL.PEER_PROBE_TRACKER", "Uncore cycles Global Queue peer probe tracker is full. The peer probe tracker queue tracks snoops from the IOH and remote sockets."}, + {"GQ_CYCLES_NOT_EMPTY.READ_TRACKER", "Uncore cycles were Global Queue read tracker has at least one valid entry."}, + {"GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER", "Uncore cycles were Global Queue write tracker has at least one valid entry."}, + {"GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER", "Uncore cycles were Global Queue peer probe tracker has at least one valid entry. The peer probe tracker queue tracks IOH and remote socket snoops."}, + {"GQ_ALLOC.READ_TRACKER", "Counts the number of tread tracker allo- cate to deallocate entries. The GQ read tracker allocate to deal- locate occupancy count is divided by the count to obtain the average read tracker latency."}, + {"GQ_ALLOC.RT_L3_MISS", "Counts the number GQ read tracker entries for which a full cache line read has missed the L3. The GQ read tracker L3 miss to fill occupancy count is divided by this count to obtain the average cache line read L3 miss latency. The latency represents the time after which the L3 has determined that the cache line has missed. The time between a GQ read tracker allocation and the L3 determining that the cache line has missed is the average L3 hit latency. The total L3 cache line read miss latency is the hit latency + L3 miss latency."}, + {"GQ_ALLOC.RT_TO_L3_RESP", "Counts the number of GQ read tracker entries that are allocated in the read tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy count is divided by this count to obtain the average L3 hit latency."}, + {"GQ_ALLOC.RT_TO_RTID_ACQUIRED", "Counts the number of GQ read tracker entries that are allocated in the read tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ read tracker L3 miss to RTID acquired occupancy count is divided by this count to obtain the average latency for a read L3 miss to acquire an RTID."}, + {"GQ_ALLOC.WT_TO_RTID_ACQUIRED", "Counts the number of GQ write tracker entries that are allocated in the write tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is divided by this count to obtain the average latency for a write L3 miss to acquire an RTID."}, + {"GQ_ALLOC.WRITE_TRACKER", "Counts the number of GQ write tracker entries that are allocated in the write tracker queue that miss the L3. The GQ write tracker occupancy count is divided by the this count to obtain the average L3 write miss latency."}, + {"GQ_ALLOC.PEER_PROBE_TRACKER", "Counts the number of GQ peer probe tracker (snoop) entries that are allocated in the peer probe tracker queue that miss the L3. The GQ peer probe occupancy count is divided by this count to obtain the average L3 peer probe miss latency."}, + {"GQ_DATA.FROM_QPI", "Cycles Global Queue Quickpath Interface input data port is busy importing data from the Quickpath Inter- face. Each cycle the input port can transfer 8 or 16 bytes of data."}, + {"GQ_DATA.FROM_QMC", "Cycles Global Queue Quickpath Memory Interface input data port is busy importing data from the Quick- path Memory Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, + {"GQ_DATA.FROM_L3", "Cycles GQ L3 input data port is busy importing data from the Last Level Cache. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.FROM_CORES_02", "Cycles GQ Core 0 and 2 input data port is busy importing data from processor cores 0 and 2. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.FROM_CORES_13", "Cycles GQ Core 1 and 3 input data port is busy importing data from processor cores 1 and 3. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_QPI_QMC", "Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath Interface or Quickpath Memory Interface. Each cycle the output port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_L3", "Cycles GQ L3 output data port is busy sending data to the Last Level Cache. Each cycle the output port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_CORES", "Cycles GQ Core output data port is busy sending data to the Cores. Each cycle the output port can trans- fer 32 bytes of data."}, + {"SNP_RESP_TO_LOCAL_HOME.I_STATE", "Number of snoop responses to the local home that L3 does not have the referenced cache line."}, + {"SNP_RESP_TO_LOCAL_HOME.S_STATE", "Number of snoop responses to the local home that L3 has the referenced line cached in the S state."}, + {"SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to the local home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the local home in the S state."}, + {"SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to the local home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the local home in the M state."}, + {"SNP_RESP_TO_LOCAL_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, + {"SNP_RESP_TO_LOCAL_HOME.WB", "Number of responses to code or data read snoops to the local home that the L3 has the referenced line cached in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.I_STATE", "Number of snoop responses to a remote home that L3 does not have the referenced cache line."}, + {"SNP_RESP_TO_REMOTE_HOME.S_STATE", "Number of snoop responses to a remote home that L3 has the referenced line cached in the S state."}, + {"SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the remote home in the S state."}, + {"SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to a remote home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the remote home in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, + {"SNP_RESP_TO_REMOTE_HOME.WB", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced line cached in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.HITM", "Number of HITM snoop responses to a remote home."}, + {"L3_HITS.READ", "Number of code read, data read and RFO requests that hit in the L3."}, + {"L3_HITS.WRITE", "Number of writeback requests that hit in the L3. Writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, + {"L3_HITS.PROBE", "Number of snoops from IOH or remote sock- ets that hit in the L3."}, + {"L3_HITS.ANY", "Number of reads and writes that hit the L3."}, + {"L3_MISS.READ", "Number of code read, data read and RFO requests that miss the L3."}, + {"L3_MISS.WRITE", "Number of writeback requests that miss the L3. Should always be zero as writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, + {"L3_MISS.PROBE", "Number of snoops from IOH or remote sock- ets that miss the L3."}, + {"L3_MISS.ANY", "Number of reads and writes that miss the L3."}, + {"L3_LINES_IN.M_STATE", "Counts the number of L3 lines allocated in M state. The only time a cache line is allocated in the M state is when the line was forwarded in M state is forwarded due to a Snoop Read Invalidate Own request."}, + {"L3_LINES_IN.E_STATE", "Counts the number of L3 lines allocated in E state."}, + {"L3_LINES_IN.S_STATE", "Counts the number of L3 lines allocated in S state."}, + {"L3_LINES_IN.F_STATE", "Counts the number of L3 lines allocated in F state."}, + {"L3_LINES_IN.ANY", "Counts the number of L3 lines allocated in any state."}, + {"L3_LINES_OUT.M_STATE", "Counts the number of L3 lines victimized that were in the M state. When the victim cache line is in M state, the line is written to its home cache agent which can be either local or remote."}, + {"L3_LINES_OUT.E_STATE", "Counts the number of L3 lines victimized that were in the E state."}, + {"L3_LINES_OUT.S_STATE", "Counts the number of L3 lines victimized that were in the S state."}, + {"L3_LINES_OUT.I_STATE", "Counts the number of L3 lines victimized that were in the I state."}, + {"L3_LINES_OUT.F_STATE", "Counts the number of L3 lines victimized that were in the F state."}, + {"L3_LINES_OUT.ANY", "Counts the number of L3 lines victimized in any state."}, + {"QHL_REQUESTS.IOH_READS", "Counts number of Quickpath Home Logic read requests from the IOH."}, + {"QHL_REQUESTS.IOH_WRITES", "Counts number of Quickpath Home Logic write requests from the IOH."}, + {"QHL_REQUESTS.REMOTE_READS", "Counts number of Quickpath Home Logic read requests from a remote socket."}, + {"QHL_REQUESTS.REMOTE_WRITES", "Counts number of Quickpath Home Logic write requests from a remote socket."}, + {"QHL_REQUESTS.LOCAL_READS", "Counts number of Quickpath Home Logic read requests from the local socket."}, + {"QHL_REQUESTS.LOCAL_WRITES", "Counts number of Quickpath Home Logic write requests from the local socket."}, + {"QHL_CYCLES_FULL.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH are full."}, + {"QHL_CYCLES_FULL.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker are full."}, + {"QHL_CYCLES_FULL.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker are full."}, + {"QHL_CYCLES_NOT_EMPTY.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy."}, + {"QHL_CYCLES_NOT_EMPTY.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is busy."}, + {"QHL_CYCLES_NOT_EMPTY.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker is busy."}, + {"QHL_OCCUPANCY.IOH", "QHL IOH tracker allocate to deallocate read occupancy."}, + {"QHL_OCCUPANCY.REMOTE", "QHL remote tracker allocate to deallocate read occupancy."}, + {"QHL_OCCUPANCY.LOCAL", "QHL local tracker allocate to deallocate read occupancy."}, + {"QHL_ADDRESS_CONFLICTS.2WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 conflicts. The AAT is a struc- ture that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, + {"QHL_ADDRESS_CONFLICTS.3WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 conflicts. The AAT is a struc- ture that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, + {"QHL_CONFLICT_CYCLES.IOH", "Counts cycles the Quickpath Home Logic IOH Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, + {"QHL_CONFLICT_CYCLES.REMOTE", "Counts cycles the Quickpath Home Logic Remote Tracker contains two or more requests with an address con- flict. A max of 3 requests can be in conflict."}, + {"QHL_CONFLICT_CYCLES.LOCAL", "Counts cycles the Quickpath Home Logic Local Tracker contains two or more requests with an address con- flict. A max of 3 requests can be in conflict."}, + {"QHL_TO_QMC_BYPASS", "Counts number or requests to the Quickpath Memory Controller that bypass the Quickpath Home Logic. All local accesses can be bypassed. For remote requests, only read requests can be bypassed."}, + {"QMC_NORMAL_FULL.READ.CH0", "Uncore cycles all the entries in the DRAM channel 0 medium or low priority queue are occupied with read requests."}, + {"QMC_NORMAL_FULL.READ.CH1", "Uncore cycles all the entries in the DRAM channel 1 medium or low priority queue are occupied with read requests."}, + {"QMC_NORMAL_FULL.READ.CH2", "Uncore cycles all the entries in the DRAM channel 2 medium or low priority queue are occupied with read requests."}, + {"QMC_NORMAL_FULL.WRITE.CH0", "Uncore cycles all the entries in the DRAM channel 0 medium or low priority queue are occupied with write requests."}, + {"QMC_NORMAL_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 medium or low priority queue are occupied with write requests."}, + {"QMC_NORMAL_FULL.WRITE.CH2", "Uncore cycles all the entries in the DRAM channel 2 medium or low priority queue are occupied with write requests."}, + {"QMC_ISOC_FULL.READ.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.READ.CH1", "Counts cycles all the entries in the DRAM channel 1high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.READ.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.WRITE.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous write requests."}, + {"QMC_ISOC_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous write requests."}, + {"QMC_ISOC_FULL.WRITE.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous write requests."}, + {"QMC_BUSY.READ.CH0", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 0."}, + {"QMC_BUSY.READ.CH1", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 1."}, + {"QMC_BUSY.READ.CH2", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding read request to DRAM channel 2."}, + {"QMC_BUSY.WRITE.CH0", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 0."}, + {"QMC_BUSY.WRITE.CH1", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 1."}, + {"QMC_BUSY.WRITE.CH2", "Counts cycles where Quickpath Memory Con- troller has at least 1 outstanding write request to DRAM channel 2."}, + {"QMC_OCCUPANCY.CH0", "IMC channel 0 normal read request occupancy."}, + {"QMC_OCCUPANCY.CH1", "IMC channel 1 normal read request occupancy."}, + {"QMC_OCCUPANCY.CH2", "IMC channel 2 normal read request occupancy."}, + {"QMC_ISSOC_OCCUPANCY.CH0", "IMC channel 0 issoc read request occupancy."}, + {"QMC_ISSOC_OCCUPANCY.CH1", "IMC channel 1 issoc read request occupancy."}, + {"QMC_ISSOC_OCCUPANCY.CH2", "IMC channel 2 issoc read request occu- pancy."}, + {"QMC_ISSOC_READS.ANY", "IMC issoc read request occupancy."}, + {"QMC_NORMAL_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 medium and low priority read requests. The QMC channel 0 normal read occupancy divided by this count provides the average QMC channel 0 read latency."}, + {"QMC_NORMAL_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 medium and low priority read requests. The QMC channel 1 normal read occupancy divided by this count provides the average QMC channel 1 read latency."}, + {"QMC_NORMAL_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 medium and low priority read requests. The QMC channel 2 normal read occupancy divided by this count provides the average QMC channel 2 read latency."}, + {"QMC_NORMAL_READS.ANY", "Counts the number of Quickpath Memory Con- troller medium and low priority read requests. The QMC normal read occupancy divided by this count provides the average QMC read latency."}, + {"QMC_HIGH_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Con- troller high priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Con- troller channel 0 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Con- troller channel 1 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Con- troller channel 2 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Con- troller critical priority isochronous read requests."}, + {"QMC_WRITES.FULL.CH0", "Counts number of full cache line writes to DRAM channel 0."}, + {"QMC_WRITES.FULL.CH1", "Counts number of full cache line writes to DRAM channel 1."}, + {"QMC_WRITES.FULL.CH2", "Counts number of full cache line writes to DRAM channel 2."}, + {"QMC_WRITES.FULL.ANY", "Counts number of full cache line writes to DRAM."}, + {"QMC_WRITES.PARTIAL.CH0", "Counts number of partial cache line writes to DRAM channel 0."}, + {"QMC_WRITES.PARTIAL.CH1", "Counts number of partial cache line writes to DRAM channel 1."}, + {"QMC_WRITES.PARTIAL.CH2", "Counts number of partial cache line writes to DRAM channel 2."}, + {"QMC_WRITES.PARTIAL.ANY", "Counts number of partial cache line writes to DRAM."}, + {"QMC_CANCEL.CH0", "Counts number of DRAM channel 0 cancel requests."}, + {"QMC_CANCEL.CH1", "Counts number of DRAM channel 1 cancel requests."}, + {"QMC_CANCEL.CH2", "Counts number of DRAM channel 2 cancel requests."}, + {"QMC_CANCEL.ANY", "Counts number of DRAM cancel requests."}, + {"QMC_PRIORITY_UPDATES.CH0", "Counts number of DRAM channel 0 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.CH1", "Counts number of DRAM channel 1 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.CH2", "Counts number of DRAM channel 2 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.ANY", "Counts number of DRAM priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QHL_FRC_ACK_CNFLTS.LOCAL", "Counts number of Force Acknowledge Con- flict messages sent by the Quickpath Home Logic to the local home."}, + {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0", "Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0", "Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0", "Counts cycles the Quickpath outbound link 0 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1", "Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1", "Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1", "Counts cycles the Quickpath outbound link 1 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0", "Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1", "Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of VNA and VN0 cred- its. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of VNA and VN0 cred- its. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_HEADER.BUSY.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is busy."}, + {"QPI_TX_HEADER.BUSY.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is busy."}, + {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0", "Number of cycles that snoop packets incom- ing to the Quickpath Interface link 0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, + {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1", "Number of cycles that snoop packets incom- ing to the Quickpath Interface link 1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, + {"DRAM_OPEN.CH0", "Counts number of DRAM Channel 0 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_OPEN.CH1", "Counts number of DRAM Channel 1 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_OPEN.CH2", "Counts number of DRAM Channel 2 open com- mands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_PAGE_CLOSE.CH0", "DRAM channel 0 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_CLOSE.CH1", "DRAM channel 1 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_CLOSE.CH2", "DRAM channel 2 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH0", "Counts the number of precharges (PRE) that were issued to DRAM channel 0 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH1", "Counts the number of precharges (PRE) that were issued to DRAM channel 1 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH2", "Counts the number of precharges (PRE) that were issued to DRAM channel 2 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_READ_CAS.CH0", "Counts the number of times a read CAS com- mand was issued on DRAM channel 0."}, + {"DRAM_READ_CAS.AUTOPRE_CH0", "Counts the number of times a read CAS com- mand was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_READ_CAS.CH1", "Counts the number of times a read CAS com- mand was issued on DRAM channel 1."}, + {"DRAM_READ_CAS.AUTOPRE_CH1", "Counts the number of times a read CAS com- mand was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_READ_CAS.CH2", "Counts the number of times a read CAS com- mand was issued on DRAM channel 2."}, + {"DRAM_READ_CAS.AUTOPRE_CH2", "Counts the number of times a read CAS com- mand was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_REFRESH.CH0", "Counts number of DRAM channel 0 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_REFRESH.CH1", "Counts number of DRAM channel 1 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_REFRESH.CH2", "Counts number of DRAM channel 2 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_PRE_ALL.CH0", "Counts number of DRAM Channel 0 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + {"DRAM_PRE_ALL.CH1", "Counts number of DRAM Channel 1 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + {"DRAM_PRE_ALL.CH2", "Counts number of DRAM Channel 2 precharge- all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + { NULL, NULL } +}; diff --git a/src/freebsd/map-i7.h b/src/freebsd/map-i7.h new file mode 100644 index 0000000..6c7ec39 --- /dev/null +++ b/src/freebsd/map-i7.h @@ -0,0 +1,498 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-i7.h +* CVS: $Id: map-i7.h,v 1.1.2.2 2010/03/06 16:12:08 servat Exp $ +* Author: George Neville-Neil +* gnn@freebsd.org +*/ + +#ifndef FreeBSD_MAP_I7 +#define FreeBSD_MAP_I7 + +enum NativeEvent_Value_i7Processor { + PNE_I7_SB_FORWARD_ANY= PAPI_NATIVE_MASK , + PNE_I7_LOAD_BLOCK_STD, + PNE_I7_LOAD_BLOCK_ADDRESS_OFFSET, + PNE_I7_SB_DRAIN_CYCLES, + PNE_I7_MISALIGN_MEM_REF_LOAD, + PNE_I7_MISALIGN_MEM_REF_STORE, + PNE_I7_MISALIGN_MEM_REF_ANY, + PNE_I7_STORE_BLOCKS_NOT_STA, + PNE_I7_STORE_BLOCKS_STA, + PNE_I7_STORE_BLOCKS_AT_RET, + PNE_I7_STORE_BLOCKS_L1D_BLOCK, + PNE_I7_STORE_BLOCKS_ANY, + PNE_I7_PARTIAL_ADDRESS_ALIAS, + PNE_I7_DTLB_LOAD_MISSES_ANY, + PNE_I7_DTLB_LOAD_MISSES_WALK_COMPLETED, + PNE_I7_DTLB_LOAD_MISSES_STLB_HIT, + PNE_I7_DTLB_LOAD_MISSES_PDE_MISS, + PNE_I7_DTLB_LOAD_MISSES_PDP_MISS, + PNE_I7_DTLB_LOAD_MISSES_LARGE_WALK_COMPLETED, + PNE_I7_MEMORY_DISAMBIGURATION_RESET, + PNE_I7_MEMORY_DISAMBIGURATION_SUCCESS, + PNE_I7_MEMORY_DISAMBIGURATION_WATCHDOG, + PNE_I7_MEMORY_DISAMBIGURATION_WATCH_CYCLES, + PNE_I7_MEM_INST_RETIRED_LOADS, + PNE_I7_MEM_INST_RETIRED_STORES, + PNE_I7_MEM_STORE_RETIRED_DTLB_MISS, + PNE_I7_UOPS_ISSUED_ANY, + PNE_I7_UOPS_ISSUED_FUSED, + PNE_I7_MEM_UNCORE_RETIRED_OTHER_CORE_L2_HITM, + PNE_I7_MEM_UNCORE_RETIRED_REMOTE_CACHE_LOCAL_HOME_HIT, + PNE_I7_MEM_UNCORE_RETIRED_REMOTE_DRAM, + PNE_I7_MEM_UNCORE_RETIRED_LOCAL_DRAM, + PNE_I7_FP_COMP_OPS_EXE_X87, + PNE_I7_FP_COMP_OPS_EXE_MMX, + PNE_I7_FP_COMP_OPS_EXE_SSE_FP, + PNE_I7_FP_COMP_OPS_EXE_SSE2_INTEGER, + PNE_I7_FP_COMP_OPS_EXE_SSE_FP_PACKED, + PNE_I7_FP_COMP_OPS_EXE_SSE_FP_SCALAR, + PNE_I7_FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION, + PNE_I7_FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION, + PNE_I7_SIMD_INT_128_PACKED_MPY, + PNE_I7_SIMD_INT_128_PACKED_SHIFT, + PNE_I7_SIMD_INT_128_PACK, + PNE_I7_SIMD_INT_128_UNPACK, + PNE_I7_SIMD_INT_128_PACKED_LOGICAL, + PNE_I7_SIMD_INT_128_PACKED_ARITH, + PNE_I7_SIMD_INT_128_SHUFFLE_MOVE, + PNE_I7_LOAD_DISPATCH_RS, + PNE_I7_LOAD_DISPATCH_RS_DELAYED, + PNE_I7_LOAD_DISPATCH_MOB, + PNE_I7_LOAD_DISPATCH_ANY, + PNE_I7_ARITH_CYCLES_DIV_BUSY, + PNE_I7_ARITH_MUL, + PNE_I7_INST_QUEUE_WRITES, + PNE_I7_INST_DECODED_DEC0, + PNE_I7_TWO_UOP_INSTS_DECODED, + PNE_I7_HW_INT_RCV, + PNE_I7_HW_INT_CYCLES_MASKED, + PNE_I7_HW_INT_CYCLES_PENDING_AND_MASKED, + PNE_I7_INST_QUEUE_WRITE_CYCLES, + PNE_I7_L2_RQSTS_LD_HIT, + PNE_I7_L2_RQSTS_LD_MISS, + PNE_I7_L2_RQSTS_LOADS, + PNE_I7_L2_RQSTS_RFO_HIT, + PNE_I7_L2_RQSTS_RFO_MISS, + PNE_I7_L2_RQSTS_RFOS, + PNE_I7_L2_RQSTS_IFETCH_HIT, + PNE_I7_L2_RQSTS_IFETCH_MISS, + PNE_I7_L2_RQSTS_IFETCHES, + PNE_I7_L2_RQSTS_PREFETCH_HIT, + PNE_I7_L2_RQSTS_PREFETCH_MISS, + PNE_I7_L2_RQSTS_PREFETCHES, + PNE_I7_L2_RQSTS_MISS, + PNE_I7_L2_RQSTS_REFERENCES, + PNE_I7_L2_DATA_RQSTS_DEMAND_I_STATE, + PNE_I7_L2_DATA_RQSTS_DEMAND_S_STATE, + PNE_I7_L2_DATA_RQSTS_DEMAND_E_STATE, + PNE_I7_L2_DATA_RQSTS_DEMAND_M_STATE, + PNE_I7_L2_DATA_RQSTS_DEMAND_MESI, + PNE_I7_L2_DATA_RQSTS_PREFETCH_I_STATE, + PNE_I7_L2_DATA_RQSTS_PREFETCH_S_STATE, + PNE_I7_L2_DATA_RQSTS_PREFETCH_E_STATE, + PNE_I7_L2_DATA_RQSTS_PREFETCH_M_STATE, + PNE_I7_L2_DATA_RQSTS_PREFETCH_MESI, + PNE_I7_L2_DATA_RQSTS_ANY, + PNE_I7_L2_WRITE_RFO_I_STATE, + PNE_I7_L2_WRITE_RFO_S_STATE, + PNE_I7_L2_WRITE_RFO_E_STATE, + PNE_I7_L2_WRITE_RFO_M_STATE, + PNE_I7_L2_WRITE_RFO_HIT, + PNE_I7_L2_WRITE_RFO_MESI, + PNE_I7_L2_WRITE_LOCK_I_STATE, + PNE_I7_L2_WRITE_LOCK_S_STATE, + PNE_I7_L2_WRITE_LOCK_E_STATE, + PNE_I7_L2_WRITE_LOCK_M_STATE, + PNE_I7_L2_WRITE_LOCK_HIT, + PNE_I7_L2_WRITE_LOCK_MESI, + PNE_I7_L1D_WB_L2_I_STATE, + PNE_I7_L1D_WB_L2_S_STATE, + PNE_I7_L1D_WB_L2_E_STATE, + PNE_I7_L1D_WB_L2_M_STATE, + PNE_I7_L1D_WB_L2_MESI, + PNE_I7_L3_LAT_CACHE_REFERENCE, + PNE_I7_L3_LAT_CACHE_MISS, + PNE_I7_CPU_CLK_UNHALTED_THREAD_P, + PNE_I7_CPU_CLK_UNHALTED_REF_P, + PNE_I7_UOPS_DECODED_DEC0, + PNE_I7_L1D_CACHE_LD_I_STATE, + PNE_I7_L1D_CACHE_LD_S_STATE, + PNE_I7_L1D_CACHE_LD_E_STATE, + PNE_I7_L1D_CACHE_LD_M_STATE, + PNE_I7_L1D_CACHE_LD_MESI, + PNE_I7_L1D_CACHE_ST_I_STATE, + PNE_I7_L1D_CACHE_ST_S_STATE, + PNE_I7_L1D_CACHE_ST_E_STATE, + PNE_I7_L1D_CACHE_ST_M_STATE, + PNE_I7_L1D_CACHE_ST_MESI, + PNE_I7_L1D_CACHE_LOCK_HIT, + PNE_I7_L1D_CACHE_LOCK_S_STATE, + PNE_I7_L1D_CACHE_LOCK_E_STATE, + PNE_I7_L1D_CACHE_LOCK_M_STATE, + PNE_I7_L1D_ALL_REF_ANY, + PNE_I7_L1D_ALL_REF_CACHEABLE, + PNE_I7_L1D_PEND_MISS_LOAD_BUFFERS_FULL, + PNE_I7_DTLB_MISSES_ANY, + PNE_I7_DTLB_MISSES_WALK_COMPLETED, + PNE_I7_DTLB_MISSES_STLB_HIT, + PNE_I7_DTLB_MISSES_PDE_MISS, + PNE_I7_DTLB_MISSES_PDP_MISS, + PNE_I7_DTLB_MISSES_LARGE_WALK_COMPLETED, + PNE_I7_SSE_MEM_EXEC_NTA, + PNE_I7_SSE_MEM_EXEC_STREAMING_STORES, + PNE_I7_LOAD_HIT_PRE, + PNE_I7_SFENCE_CYCLES, + PNE_I7_L1D_PREFETCH_REQUESTS, + PNE_I7_L1D_PREFETCH_MISS, + PNE_I7_L1D_PREFETCH_TRIGGERS, + PNE_I7_EPT_EPDE_MISS, + PNE_I7_EPT_EPDPE_HIT, + PNE_I7_EPT_EPDPE_MISS, + PNE_I7_L1D_REPL, + PNE_I7_L1D_M_REPL, + PNE_I7_L1D_M_EVICT, + PNE_I7_L1D_M_SNOOP_EVICT, + PNE_I7_L1D_CACHE_PREFETCH_LOCK_FB_HIT, + PNE_I7_L1D_CACHE_LOCK_FB_HIT, + PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_DATA, + PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_CODE, + PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_RFO, + PNE_I7_OFFCORE_REQUESTS_OUTSTANDING_ANY_READ, + PNE_I7_CACHE_LOCK_CYCLES_L1D_L2, + PNE_I7_CACHE_LOCK_CYCLES_L1D, + PNE_I7_IO_TRANSACTIONS, + PNE_I7_L1I_HITS, + PNE_I7_L1I_MISSES, + PNE_I7_L1I_READS, + PNE_I7_L1I_CYCLES_STALLED, + PNE_I7_IFU_IVC_FULL, + PNE_I7_IFU_IVC_L1I_EVICTION, + PNE_I7_LARGE_ITLB_HIT, + PNE_I7_L1I_OPPORTUNISTIC_HITS, + PNE_I7_ITLB_MISSES_ANY, + PNE_I7_ITLB_MISSES_WALK_COMPLETED, + PNE_I7_ITLB_MISSES_WALK_CYCLES, + PNE_I7_ITLB_MISSES_STLB_HIT, + PNE_I7_ITLB_MISSES_PDE_MISS, + PNE_I7_ITLB_MISSES_PDP_MISS, + PNE_I7_ITLB_MISSES_LARGE_WALK_COMPLETED, + PNE_I7_ILD_STALL_ANY, + PNE_I7_ILD_STALL_IQ_FULL, + PNE_I7_ILD_STALL_LCP, + PNE_I7_ILD_STALL_MRU, + PNE_I7_ILD_STALL_REGEN, + PNE_I7_BR_INST_EXEC_ANY, + PNE_I7_BR_INST_EXEC_COND, + PNE_I7_BR_INST_EXEC_DIRECT, + PNE_I7_BR_INST_EXEC_DIRECT_NEAR_CALL, + PNE_I7_BR_INST_EXEC_INDIRECT_NEAR_CALL, + PNE_I7_BR_INST_EXEC_INDIRECT_NON_CALL, + PNE_I7_BR_INST_EXEC_NEAR_CALLS, + PNE_I7_BR_INST_EXEC_NON_CALLS, + PNE_I7_BR_INST_EXEC_RETURN_NEAR, + PNE_I7_BR_INST_EXEC_TAKEN, + PNE_I7_BR_MISP_EXEC_COND, + PNE_I7_BR_MISP_EXEC_DIRECT, + PNE_I7_BR_MISP_EXEC_INDIRECT_NON_CALL, + PNE_I7_BR_MISP_EXEC_NON_CALLS, + PNE_I7_BR_MISP_EXEC_RETURN_NEAR, + PNE_I7_BR_MISP_EXEC_DIRECT_NEAR_CALL, + PNE_I7_BR_MISP_EXEC_INDIRECT_NEAR_CALL, + PNE_I7_BR_MISP_EXEC_NEAR_CALLS, + PNE_I7_BR_MISP_EXEC_TAKEN, + PNE_I7_BR_MISP_EXEC_ANY, + PNE_I7_RESOURCE_STALLS_ANY, + PNE_I7_RESOURCE_STALLS_LOAD, + PNE_I7_RESOURCE_STALLS_RS_FULL, + PNE_I7_RESOURCE_STALLS_STORE, + PNE_I7_RESOURCE_STALLS_ROB_FULL, + PNE_I7_RESOURCE_STALLS_FPCW, + PNE_I7_RESOURCE_STALLS_MXCSR, + PNE_I7_RESOURCE_STALLS_OTHER, + PNE_I7_MACRO_INSTS_FUSIONS_DECODED, + PNE_I7_BACLEAR_FORCE_IQ, + PNE_I7_LSD_UOPS, + PNE_I7_ITLB_FLUSH, + PNE_I7_OFFCORE_REQUESTS_DEMAND_READ_DATA, + PNE_I7_OFFCORE_REQUESTS_DEMAND_READ_CODE, + PNE_I7_OFFCORE_REQUESTS_DEMAND_RFO, + PNE_I7_OFFCORE_REQUESTS_ANY_READ, + PNE_I7_OFFCORE_REQUESTS_ANY_RFO, + PNE_I7_OFFCORE_REQUESTS_UNCACHED_MEM, + PNE_I7_OFFCORE_REQUESTS_L1D_WRITEBACK, + PNE_I7_OFFCORE_REQUESTS_ANY, + PNE_I7_UOPS_EXECUTED_PORT0, + PNE_I7_UOPS_EXECUTED_PORT1, + PNE_I7_UOPS_EXECUTED_PORT2_CORE, + PNE_I7_UOPS_EXECUTED_PORT3_CORE, + PNE_I7_UOPS_EXECUTED_PORT4_CORE, + PNE_I7_UOPS_EXECUTED_PORT5, + PNE_I7_UOPS_EXECUTED_CORE_ACTIVE_CYCLES, + PNE_I7_UOPS_EXECUTED_PORT015, + PNE_I7_UOPS_EXECUTED_PORT234, + PNE_I7_OFFCORE_REQUESTS_SQ_FULL, + PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_DATA, + PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_INVALIDATE, + PNE_I7_SNOOPQ_REQUESTS_OUTSTANDING_CODE, + PNE_I7_OFF_CORE_RESPONSE_0, + PNE_I7_SNOOP_RESPONSE_HIT, + PNE_I7_SNOOP_RESPONSE_HITE, + PNE_I7_SNOOP_RESPONSE_HITM, + PNE_I7_PIC_ACCESSES_TPR_READS, + PNE_I7_PIC_ACCESSES_TPR_WRITES, + PNE_I7_INST_RETIRED_ANY_P, + PNE_I7_INST_RETIRED_X87, + PNE_I7_UOPS_RETIRED_ANY, + PNE_I7_UOPS_RETIRED_RETIRE_SLOTS, + PNE_I7_UOPS_RETIRED_MACRO_FUSED, + PNE_I7_MACHINE_CLEARS_CYCLES, + PNE_I7_MACHINE_CLEARS_MEM_ORDER, + PNE_I7_MACHINE_CLEARS_SMC, + PNE_I7_MACHINE_CLEARS_FUSION_ASSIST, + PNE_I7_BR_INST_RETIRED_ALL_BRANCHES, + PNE_I7_BR_INST_RETIRED_CONDITIONAL, + PNE_I7_BR_INST_RETIRED_NEAR_CALL, + PNE_I7_BR_MISP_RETIRED_ALL_BRANCHES, + PNE_I7_BR_MISP_RETIRED_NEAR_CALL, + PNE_I7_SSEX_UOPS_RETIRED_PACKED_SINGLE, + PNE_I7_SSEX_UOPS_RETIRED_SCALAR_SINGLE, + PNE_I7_SSEX_UOPS_RETIRED_PACKED_DOUBLE, + PNE_I7_SSEX_UOPS_RETIRED_SCALAR_DOUBLE, + PNE_I7_SSEX_UOPS_RETIRED_VECTOR_INTEGER, + PNE_I7_ITLB_MISS_RETIRED, + PNE_I7_MEM_LOAD_RETIRED_L1D_HIT, + PNE_I7_MEM_LOAD_RETIRED_L2_HIT, + PNE_I7_MEM_LOAD_RETIRED_OTHER_CORE_L2_HIT_HITM, + PNE_I7_MEM_LOAD_RETIRED_HIT_LFB, + PNE_I7_MEM_LOAD_RETIRED_DTLB_MISS, + PNE_I7_MEM_LOAD_RETIRED_L3_MISS, + PNE_I7_MEM_LOAD_RETIRED_L3_UNSHARED_HIT, + PNE_I7_FP_MMX_TRANS_TO_FP, + PNE_I7_FP_MMX_TRANS_TO_MMX, + PNE_I7_FP_MMX_TRANS_ANY, + PNE_I7_MACRO_INSTS_DECODED, + PNE_I7_UOPS_DECODED_MS, + PNE_I7_UOPS_DECODED_ESP_FOLDING, + PNE_I7_UOPS_DECODED_ESP_SYNC, + PNE_I7_RAT_STALLS_FLAGS, + PNE_I7_RAT_STALLS_REGISTERS, + PNE_I7_RAT_STALLS_ROB_READ_PORT, + PNE_I7_RAT_STALLS_SCOREBOARD, + PNE_I7_RAT_STALLS_ANY, + PNE_I7_SEG_RENAME_STALLS, + PNE_I7_ES_REG_RENAMES, + PNE_I7_UOP_UNFUSION, + PNE_I7_BR_INST_DECODED, + PNE_I7_BOGUS_BR, + PNE_I7_BPU_MISSED_CALL_RET, + PNE_I7_L2_HW_PREFETCH_DATA_TRIGGER, + PNE_I7_L2_HW_PREFETCH_CODE_TRIGGER, + PNE_I7_L2_HW_PREFETCH_DCA_TRIGGER, + PNE_I7_L2_HW_PREFETCH_KICK_START, + PNE_I7_SQ_MISC_PROMOTION, + PNE_I7_SQ_MISC_PROMOTION_POST_GO, + PNE_I7_SQ_MISC_LRU_HINTS, + PNE_I7_SQ_MISC_FILL_DROPPED, + PNE_I7_SQ_MISC_SPLIT_LOCK, + PNE_I7_SQ_FULL_STALL_CYCLES, + PNE_I7_FP_ASSIST_ALL, + PNE_I7_FP_ASSIST_OUTPUT, + PNE_I7_FP_ASSIST_INPUT, + PNE_I7_SEGMENT_REG_LOADS, + PNE_I7_SIMD_INT_64_PACKED_MPY, + PNE_I7_SIMD_INT_64_PACKED_SHIFT, + PNE_I7_SIMD_INT_64_PACK, + PNE_I7_SIMD_INT_64_UNPACK, + PNE_I7_SIMD_INT_64_PACKED_LOGICAL, + PNE_I7_SIMD_INT_64_PACKED_ARITH, + PNE_I7_SIMD_INT_64_SHUFFLE_MOVE, + PNE_I7_INSTR_RETIRED_ANY, + PNE_I7_CPU_CLK_UNHALTED_CORE, + PNE_I7_CPU_CLK_UNHALTED_REF, + PNE_I7_GQ_CYCLES_FULL_READ_TRACKER, + PNE_I7_GQ_CYCLES_FULL_WRITE_TRACKER, + PNE_I7_GQ_CYCLES_FULL_PEER_PROBE_TRACKER, + PNE_I7_GQ_CYCLES_NOT_EMPTY_READ_TRACKER, + PNE_I7_GQ_CYCLES_NOT_EMPTY_WRITE_TRACKER, + PNE_I7_GQ_CYCLES_NOT_EMPTY_PEER_PROBE_TRACKER, + PNE_I7_GQ_ALLOC_READ_TRACKER, + PNE_I7_GQ_ALLOC_RT_L3_MISS, + PNE_I7_GQ_ALLOC_RT_TO_L3_RESP, + PNE_I7_GQ_ALLOC_RT_TO_RTID_ACQUIRED, + PNE_I7_GQ_ALLOC_WT_TO_RTID_ACQUIRED, + PNE_I7_GQ_ALLOC_WRITE_TRACKER, + PNE_I7_GQ_ALLOC_PEER_PROBE_TRACKER, + PNE_I7_GQ_DATA_FROM_QPI, + PNE_I7_GQ_DATA_FROM_QMC, + PNE_I7_GQ_DATA_FROM_L3, + PNE_I7_GQ_DATA_FROM_CORES_02, + PNE_I7_GQ_DATA_FROM_CORES_13, + PNE_I7_GQ_DATA_TO_QPI_QMC, + PNE_I7_GQ_DATA_TO_L3, + PNE_I7_GQ_DATA_TO_CORES, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_I_STATE, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_S_STATE, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_FWD_S_STATE, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_FWD_I_STATE, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_CONFLICT, + PNE_I7_SNP_RESP_TO_LOCAL_HOME_WB, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_I_STATE, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_S_STATE, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_FWD_S_STATE, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_FWD_I_STATE, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_CONFLICT, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_WB, + PNE_I7_SNP_RESP_TO_REMOTE_HOME_HITM, + PNE_I7_L3_HITS_READ, + PNE_I7_L3_HITS_WRITE, + PNE_I7_L3_HITS_PROBE, + PNE_I7_L3_HITS_ANY, + PNE_I7_L3_MISS_READ, + PNE_I7_L3_MISS_WRITE, + PNE_I7_L3_MISS_PROBE, + PNE_I7_L3_MISS_ANY, + PNE_I7_L3_LINES_IN_M_STATE, + PNE_I7_L3_LINES_IN_E_STATE, + PNE_I7_L3_LINES_IN_S_STATE, + PNE_I7_L3_LINES_IN_F_STATE, + PNE_I7_L3_LINES_IN_ANY, + PNE_I7_L3_LINES_OUT_M_STATE, + PNE_I7_L3_LINES_OUT_E_STATE, + PNE_I7_L3_LINES_OUT_S_STATE, + PNE_I7_L3_LINES_OUT_I_STATE, + PNE_I7_L3_LINES_OUT_F_STATE, + PNE_I7_L3_LINES_OUT_ANY, + PNE_I7_QHL_REQUESTS_IOH_READS, + PNE_I7_QHL_REQUESTS_IOH_WRITES, + PNE_I7_QHL_REQUESTS_REMOTE_READS, + PNE_I7_QHL_REQUESTS_REMOTE_WRITES, + PNE_I7_QHL_REQUESTS_LOCAL_READS, + PNE_I7_QHL_REQUESTS_LOCAL_WRITES, + PNE_I7_QHL_CYCLES_FULL_IOH, + PNE_I7_QHL_CYCLES_FULL_REMOTE, + PNE_I7_QHL_CYCLES_FULL_LOCAL, + PNE_I7_QHL_CYCLES_NOT_EMPTY_IOH, + PNE_I7_QHL_CYCLES_NOT_EMPTY_REMOTE, + PNE_I7_QHL_CYCLES_NOT_EMPTY_LOCAL, + PNE_I7_QHL_OCCUPANCY_IOH, + PNE_I7_QHL_OCCUPANCY_REMOTE, + PNE_I7_QHL_OCCUPANCY_LOCAL, + PNE_I7_QHL_ADDRESS_CONFLICTS_2WAY, + PNE_I7_QHL_ADDRESS_CONFLICTS_3WAY, + PNE_I7_QHL_CONFLICT_CYCLES_IOH, + PNE_I7_QHL_CONFLICT_CYCLES_REMOTE, + PNE_I7_QHL_CONFLICT_CYCLES_LOCAL, + PNE_I7_QHL_TO_QMC_BYPASS, + PNE_I7_QMC_NORMAL_FULL_READ_CH0, + PNE_I7_QMC_NORMAL_FULL_READ_CH1, + PNE_I7_QMC_NORMAL_FULL_READ_CH2, + PNE_I7_QMC_NORMAL_FULL_WRITE_CH0, + PNE_I7_QMC_NORMAL_FULL_WRITE_CH1, + PNE_I7_QMC_NORMAL_FULL_WRITE_CH2, + PNE_I7_QMC_ISOC_FULL_READ_CH0, + PNE_I7_QMC_ISOC_FULL_READ_CH1, + PNE_I7_QMC_ISOC_FULL_READ_CH2, + PNE_I7_QMC_ISOC_FULL_WRITE_CH0, + PNE_I7_QMC_ISOC_FULL_WRITE_CH1, + PNE_I7_QMC_ISOC_FULL_WRITE_CH2, + PNE_I7_QMC_BUSY_READ_CH0, + PNE_I7_QMC_BUSY_READ_CH1, + PNE_I7_QMC_BUSY_READ_CH2, + PNE_I7_QMC_BUSY_WRITE_CH0, + PNE_I7_QMC_BUSY_WRITE_CH1, + PNE_I7_QMC_BUSY_WRITE_CH2, + PNE_I7_QMC_OCCUPANCY_CH0, + PNE_I7_QMC_OCCUPANCY_CH1, + PNE_I7_QMC_OCCUPANCY_CH2, + PNE_I7_QMC_ISSOC_OCCUPANCY_CH0, + PNE_I7_QMC_ISSOC_OCCUPANCY_CH1, + PNE_I7_QMC_ISSOC_OCCUPANCY_CH2, + PNE_I7_QMC_ISSOC_READS_ANY, + PNE_I7_QMC_NORMAL_READS_CH0, + PNE_I7_QMC_NORMAL_READS_CH1, + PNE_I7_QMC_NORMAL_READS_CH2, + PNE_I7_QMC_NORMAL_READS_ANY, + PNE_I7_QMC_HIGH_PRIORITY_READS_CH0, + PNE_I7_QMC_HIGH_PRIORITY_READS_CH1, + PNE_I7_QMC_HIGH_PRIORITY_READS_CH2, + PNE_I7_QMC_HIGH_PRIORITY_READS_ANY, + PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH0, + PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH1, + PNE_I7_QMC_CRITICAL_PRIORITY_READS_CH2, + PNE_I7_QMC_CRITICAL_PRIORITY_READS_ANY, + PNE_I7_QMC_WRITES_FULL_CH0, + PNE_I7_QMC_WRITES_FULL_CH1, + PNE_I7_QMC_WRITES_FULL_CH2, + PNE_I7_QMC_WRITES_FULL_ANY, + PNE_I7_QMC_WRITES_PARTIAL_CH0, + PNE_I7_QMC_WRITES_PARTIAL_CH1, + PNE_I7_QMC_WRITES_PARTIAL_CH2, + PNE_I7_QMC_WRITES_PARTIAL_ANY, + PNE_I7_QMC_CANCEL_CH0, + PNE_I7_QMC_CANCEL_CH1, + PNE_I7_QMC_CANCEL_CH2, + PNE_I7_QMC_CANCEL_ANY, + PNE_I7_QMC_PRIORITY_UPDATES_CH0, + PNE_I7_QMC_PRIORITY_UPDATES_CH1, + PNE_I7_QMC_PRIORITY_UPDATES_CH2, + PNE_I7_QMC_PRIORITY_UPDATES_ANY, + PNE_I7_QHL_FRC_ACK_CNFLTS_LOCAL, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_0, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_0, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_0, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_1, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_1, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_1, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_LINK_0, + PNE_I7_QPI_TX_STALLED_SINGLE_FLIT_LINK_1, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_0, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_0, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_0, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_1, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_1, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_1, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_LINK_0, + PNE_I7_QPI_TX_STALLED_MULTI_FLIT_LINK_1, + PNE_I7_QPI_TX_HEADER_BUSY_LINK_0, + PNE_I7_QPI_TX_HEADER_BUSY_LINK_1, + PNE_I7_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_0, + PNE_I7_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_1, + PNE_I7_DRAM_OPEN_CH0, + PNE_I7_DRAM_OPEN_CH1, + PNE_I7_DRAM_OPEN_CH2, + PNE_I7_DRAM_PAGE_CLOSE_CH0, + PNE_I7_DRAM_PAGE_CLOSE_CH1, + PNE_I7_DRAM_PAGE_CLOSE_CH2, + PNE_I7_DRAM_PAGE_MISS_CH0, + PNE_I7_DRAM_PAGE_MISS_CH1, + PNE_I7_DRAM_PAGE_MISS_CH2, + PNE_I7_DRAM_READ_CAS_CH0, + PNE_I7_DRAM_READ_CAS_AUTOPRE_CH0, + PNE_I7_DRAM_READ_CAS_CH1, + PNE_I7_DRAM_READ_CAS_AUTOPRE_CH1, + PNE_I7_DRAM_READ_CAS_CH2, + PNE_I7_DRAM_READ_CAS_AUTOPRE_CH2, + PNE_I7_DRAM_WRITE_CAS_CH0, + PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH0, + PNE_I7_DRAM_WRITE_CAS_CH1, + PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH1, + PNE_I7_DRAM_WRITE_CAS_CH2, + PNE_I7_DRAM_WRITE_CAS_AUTOPRE_CH2, + PNE_I7_DRAM_REFRESH_CH0, + PNE_I7_DRAM_REFRESH_CH1, + PNE_I7_DRAM_REFRESH_CH2, + PNE_I7_DRAM_PRE_ALL_CH0, + PNE_I7_DRAM_PRE_ALL_CH1, + PNE_I7_DRAM_PRE_ALL_CH2, + PNE_I7_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t i7Processor_info[]; +extern hwi_search_t i7Processor_map[]; + +#endif diff --git a/src/freebsd/map-k7.c b/src/freebsd/map-k7.c new file mode 100644 index 0000000..34b2ea7 --- /dev/null +++ b/src/freebsd/map-k7.c @@ -0,0 +1,61 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-k7.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + K7 SUBSTRATE + K7 SUBSTRATE + K7 SUBSTRATE (aka Athlon) + K7 SUBSTRATE + K7 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_K7Processor must match K7Processor_info +*/ + +Native_Event_LabelDescription_t K7Processor_info[] = +{ + { "k7-dc-accesses", "Count data cache accesses." }, + { "k7-dc-misses", "Count data cache misses." }, + { "k7-dc-refills-from-l2", "Count data cache refills from L2 cache." }, + { "k7-dc-refills-from-system", "Count data cache refills from system memory." }, + { "k7-dc-writebacks", "Count data cache writebacks." }, + { "k7-l1-dtlb-miss-and-l2-dtlb-hits", "Count L1 DTLB misses and L2 DTLB hits." }, + { "k7-l1-and-l2-dtlb-misses", "Count L1 and L2 DTLB misses." }, + { "k7-misaligned-references", "Count misaligned data references." }, + { "k7-ic-fetches", "Count instruction cache fetches." }, + { "k7-ic-misses", "Count instruction cache misses." }, + { "k7-l1-itlb-misses", "Count L1 ITLB misses that are L2 ITLB hits." }, + { "k7-l1-l2-itlb-misses", "Count L1 (and L2) ITLB misses." }, + { "k7-retired-instructions", "Count all retired instructions." }, + { "k7-retired-ops", "Count retired ops." }, + { "k7-retired-branches", "Count all retired branches (conditional, unconditional, exceptions and interrupts)."}, + { "k7-retired-branches-mispredicted", "Count all misprediced retired branches." }, + { "k7-retired-taken-branches", "Count retired taken branches." }, + { "k7-retired-taken-branches-mispredicted", "Count mispredicted taken branches that were retired." }, + { "k7-retired-far-control-transfers", "Count retired far control transfers." }, + { "k7-retired-resync-branches", "Count retired resync branches (non control transfer branches)." }, + { "k7-interrupts-masked-cycles", "Count the number of cycles when the processor's IF flag was zero." }, + { "k7-interrupts-masked-while-pending-cycles", "Count the number of cycles interrupts were masked while pending due to the processor's IF flag being zero." }, + { "k7-hardware-interrupts", "Count the number of taken hardware interrupts." }, + /* Nearly special counters */ + { "k7-dc-refills-from-l2,unitmask=+m", "Count data cache refills from L2 cache (in M state)." }, + { "k7-dc-refills-from-l2,unitmask=+oes", "Count data cache refills from L2 cache (in OES state)." }, + { "k7-dc-refills-from-system,unitmask=+m", "Count data cache refills from system memory (in M state)." }, + { "k7-dc-refills-from-system,unitmask=+oes", "Count data cache refills from system memory (in OES state)." }, + { NULL, NULL } +}; + + diff --git a/src/freebsd/map-k7.h b/src/freebsd/map-k7.h new file mode 100644 index 0000000..cae707b --- /dev/null +++ b/src/freebsd/map-k7.h @@ -0,0 +1,51 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-k7.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_K7 +#define FreeBSD_MAP_K7 + + +enum NativeEvent_Value_K7Processor { + PNE_K7_DC_ACCESSES = PAPI_NATIVE_MASK, + PNE_K7_DC_MISSES, + PNE_K7_DC_REFILLS_FROM_L2, + PNE_K7_DC_REFILLS_FROM_SYSTEM, + PNE_K7_DC_WRITEBACKS, + PNE_K7_L1_DTLB_MISS_AND_L2_DTLB_HITS, + PNE_K7_L1_AND_L2_DTLB_MISSES, + PNE_K7_MISALIGNED_REFERENCES, + PNE_K7_IC_FETCHES, + PNE_K7_IC_MISSES, + PNE_K7_L1_ITLB_MISSES, + PNE_K7_L1_AND_L2_ITLB_MISSES, + PNE_K7_RETIRED_INSTRUCTIONS, + PNE_K7_RETIRED_OPS, + PNE_K7_RETIRED_BRANCHES, + PNE_K7_RETIRED_BRANCHES_MISPREDICTED, + PNE_K7_RETIRED_TAKEN_BRANCHES, + PNE_K7_RETIRED_TAKEN_BRANCHES_MISPREDICTED, + PNE_K7_RETIRED_FAR_CONTROL_TRANSFERS, + PNE_K7_RETIRED_RESYNC_BRANCHES, + PNE_K7_INTERRUPTS_MASKED_CYCLES, + PNE_K7_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, + PNE_K7_HARDWARE_INTERRUPTS, + /* Nearly special counters */ + PNE_K7_DC_REFILLS_FROM_L2_M, + PNE_K7_DC_REFILLS_FROM_L2_OES, + PNE_K7_DC_REFILLS_FROM_SYSTEM_M, + PNE_K7_DC_REFILLS_FROM_SYSTEM_OES, + PNE_K7_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t K7Processor_info[]; +extern hwi_search_t K7Processor_map[]; + +#endif diff --git a/src/freebsd/map-k8.c b/src/freebsd/map-k8.c new file mode 100644 index 0000000..46f305a --- /dev/null +++ b/src/freebsd/map-k8.c @@ -0,0 +1,121 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-k8.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + K8 SUBSTRATE + K8 SUBSTRATE + K8 SUBSTRATE (aka Athlon64) + K8 SUBSTRATE + K8 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_K8Processor must match K8Processor_info +*/ + +Native_Event_LabelDescription_t K8Processor_info[] = +{ + { "k8-bu-cpu-clk-unhalted", "Count the number of clock cycles when the CPU is not in the HLT or STPCLCK states" }, + { "k8-bu-fill-request-l2-miss", "Count fill requests that missed in the L2 cache."}, + { "k8-bu-internal-l2-request", "Count internally generated requests to the L2 cache." }, + { "k8-dc-access", "Count data cache accesses including microcode scratchpad accesses."}, + { "k8-dc-copyback", "Count data cache copyback operations."}, + { "k8-dc-dcache-accesses-by-locks", "Count data cache accesses by lock instructions." }, + { "k8-dc-dispatched-prefetch-instructions", "Count the number of dispatched prefetch instructions." }, + { "k8-dc-l1-dtlb-miss-and-l2-dtlb-hit", "Count L1 DTLB misses that are L2 DTLB hits." }, + { "k8-dc-l1-dtlb-miss-and-l2-dtlb-miss", "Count L1 DTLB misses that are also misses in the L2 DTLB." }, + { "k8-dc-microarchitectural-early-cancel-of-an-access", "Count microarchitectural early cancels of data cache accesses." }, + { "k8-dc-microarchitectural-late-cancel-of-an-access", "Count microarchitectural late cancels of data cache accesses." }, + { "k8-dc-misaligned-data-reference", "Count misaligned data references." }, + { "k8-dc-miss", "Count data cache misses."}, + { "k8-dc-one-bit-ecc-error", "Count one bit ECC errors found by the scrubber." }, + { "k8-dc-refill-from-l2", "Count data cache refills from L2 cache." }, + { "k8-dc-refill-from-system", "Count data cache refills from system memory." }, + { "k8-fp-dispatched-fpu-ops", "Count the number of dispatched FPU ops." }, + { "k8-fp-cycles-with-no-fpu-ops-retired", "Count cycles when no FPU ops were retired." }, + { "k8-fp-dispatched-fpu-fast-flag-ops", "Count dispatched FPU ops that use the fast flag interface." }, + { "k8-fr-decoder-empty", "Count cycles when there was nothing to dispatch." }, + { "k8-fr-dispatch-stalls", "Count all dispatch stalls." }, + { "k8-fr-dispatch-stall-for-segment-load", "Count dispatch stalls for segment loads." }, + { "k8-fr-dispatch-stall-for-serialization", "Count dispatch stalls for serialization." }, + { "k8-fr-dispatch-stall-from-branch-abort-to-retire", "Count dispatch stalls from branch abort to retiral." }, + { "k8-fr-dispatch-stall-when-fpu-is-full", "Count dispatch stalls when the FPU is full." }, + { "k8-fr-dispatch-stall-when-ls-is-full", "Count dispatch stalls when the load/store unit is full." }, + { "k8-fr-dispatch-stall-when-reorder-buffer-is-full", "Count dispatch stalls when the reorder buffer is full." }, + { "k8-fr-dispatch-stall-when-reservation-stations-are-full", "Count dispatch stalls when reservation stations are full." }, + { "k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet", "Count dispatch stalls when waiting for all to be quiet." }, + { "k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending", "Count dispatch stalls when a far control transfer or a resync branch is pending." }, + { "k8-fr-fpu-exceptions", "Count FPU exceptions." }, + { "k8-fr-interrupts-masked-cycles", "Count cycles when interrupts were masked." }, + { "k8-fr-interrupts-masked-while-pending-cycles", "Count cycles while interrupts were masked while pending" }, + { "k8-fr-number-of-breakpoints-for-dr0", "Count the number of breakpoints for DR0." }, + { "k8-fr-number-of-breakpoints-for-dr1", "Count the number of breakpoints for DR1." }, + { "k8-fr-number-of-breakpoints-for-dr2", "Count the number of breakpoints for DR2." }, + { "k8-fr-number-of-breakpoints-for-dr3", "Count the number of breakpoints for DR3." }, + { "k8-fr-retired-branches", "Count retired branches including exceptions and interrupts." }, + { "k8-fr-retired-branches-mispredicted", "Count mispredicted retired branches." }, + { "k8-fr-retired-far-control-transfers", "Count retired far control transfers" }, + { "k8-fr-retired-fastpath-double-op-instructions", "Count retired fastpath double op instructions." }, + { "k8-fr-retired-fpu-instructions", "Count retired FPU instructions." }, + { "k8-fr-retired-near-returns", "Count retired near returns." }, + { "k8-fr-retired-near-returns-mispredicted", "Count mispredicted near returns." }, + { "k8-fr-retired-resyncs", "Count retired resyncs" }, + { "k8-fr-retired-taken-hardware-interrupts", "Count retired taken hardware interrupts."}, + { "k8-fr-retired-taken-branches", "Count retired taken branches." }, + { "k8-fr-retired-taken-branches-mispredicted", "Count retired taken branches that were mispredicted." }, + { "k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare", "Count retired taken branches that were mispredicted only due to an address miscompare." }, + { "k8-fr-retired-uops", "Count retired uops." }, + { "k8-fr-retired-x86-instructions", "Count retired x86 instructions including exceptions and interrupts"}, + { "k8-ic-fetch", "Count instruction cache fetches." }, + { "k8-ic-instruction-fetch-stall", "Count cycles in stalls due to instruction fetch." }, + { "k8-ic-l1-itlb-miss-and-l2-itlb-hit", "Count L1 ITLB misses that are L2 ITLB hits." }, + { "k8-ic-l1-itlb-miss-and-l2-itlb-miss", "Count ITLB misses that miss in both L1 and L2 ITLBs." }, + { "k8-ic-microarchitectural-resync-by-snoop", "Count microarchitectural resyncs caused by snoops." }, + { "k8-ic-miss", "Count instruction cache misses." }, + { "k8-ic-refill-from-l2", "Count instruction cache refills from L2 cache." }, + { "k8-ic-refill-from-system", "Count instruction cache refills from system memory." }, + { "k8-ic-return-stack-hits", "Count hits to the return stack." }, + { "k8-ic-return-stack-overflow", "Count overflows of the return stack." }, + { "k8-ls-buffer2-full", "Count load/store buffer2 full events." }, + { "k8-ls-locked-operation", "Count locked operations." }, + { "k8-ls-microarchitectural-late-cancel", "Count microarchitectural late cancels of operations in the load/store unit" }, + { "k8-ls-microarchitectural-resync-by-self-modifying-code", "Count microarchitectural resyncs caused by self-modifying code." }, + { "k8-ls-microarchitectural-resync-by-snoop", "Count microarchitectural resyncs caused by snoops." }, + { "k8-ls-retired-cflush-instructions", "Count retired CFLUSH instructions." }, + { "k8-ls-retired-cpuid-instructions", "Count retired CPUID instructions." }, + { "k8-ls-segment-register-load", "Count segment register loads." }, + { "k8-nb-memory-controller-bypass-saturation", "Count memory controller bypass counter saturation events." }, + { "k8-nb-memory-controller-dram-slots-missed", "Count memory controller DRAM command slots missed (in MemClks)." }, + { "k8-nb-memory-controller-page-access-event", "Count memory controller page access events." }, + { "k8-nb-memory-controller-page-table-overflow", "Count memory control page table overflow events." }, + { "k8-nb-probe-result", "Count probe events." }, + { "k8-nb-sized-commands", "Count sized commands issued." }, + { "k8-nb-memory-controller-turnaround", "Count memory control turnaround events." }, + { "k8-nb-ht-bus0-bandwidth", "Count events on the HyperTransport(tm) bus #0" }, + { "k8-nb-ht-bus1-bandwidth", "Count events on the HyperTransport(tm) bus #1" }, + { "k8-nb-ht-bus2-bandwidth", "Count events on the HyperTransport(tm) bus #2" }, + /* Special counters with some masks activated */ + { "k8-dc-refill-from-l2,mask=+modified,+owner,+exclusive,+shared", "Count data cache refills from L2 cache (in MOES state)." }, + { "k8-dc-refill-from-l2,mask=+owner,+exclusive,+shared", "Count data cache refills from L2 cache (in OES state)." }, + { "k8-dc-refill-from-l2,mask=+modified", "Count data cache refills from L2 cache (in M state)." }, + { "k8-dc-refill-from-system,mask=+modified,+owner,+exclusive,+shared", "Count data cache refills from system memory (in MOES state)." }, + { "k8-dc-refill-from-system,mask=+owner,+exclusive,+shared", "Count data cache refills from system memory (in OES state)." }, + { "k8-dc-refill-from-system,mask=+modified", "Count data cache refills from system memory (in M state)." }, + { "k8-fp-dispatched-fpu-ops,mask=+multiply-pipe-junk-ops", "Count the number of dispatched FPU multiplies." }, + { "k8-fp-dispatched-fpu-ops,mask=+add-pipe-junk-ops", "Count the number of dispatched FPU adds." }, + { "k8-fp-dispatched-fpu-ops,mask=+multiply-pipe-junk-ops,+add-pipe-junk-ops", "Count the number of dispatched FPU adds and multiplies." }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-k8.h b/src/freebsd/map-k8.h new file mode 100644 index 0000000..2a49928 --- /dev/null +++ b/src/freebsd/map-k8.h @@ -0,0 +1,111 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-k8.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_K8 +#define FreeBSD_MAP_K8 + +enum NativeEvent_Value_K8Processor { + PNE_K8_BU_CPU_CLK_UNHALTED = PAPI_NATIVE_MASK, + PNE_K8_BU_FILL_REQUEST_L2_MISS, + PNE_K8_BU_INTERNAL_L2_REQUEST, + PNE_K8_DC_ACCESS, + PNE_K8_DC_COPYBACK, + PNE_K8_DC_DCACHE_ACCESSES_BY_LOCKS, + PNE_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS, + PNE_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_HIT, + PNE_K8_DC_L1_DTLB_MISS_AND_L2_DTLB_MISS, + PNE_K8_DC_MICROARCHITECTURAL_EARLY_CANCEL_OF_AN_ACCESS, + PNE_K8_DC_MICROARCHITECTURAL_LATE_CANCEL_OF_AN_ACCESS, + PNE_K8_DC_MISALIGNED_DATA_REFERENCE, + PNE_K8_DC_MISS, + PNE_K8_DC_ONE_BIT_ECC_ERROR, + PNE_K8_DC_REFILL_FROM_L2, + PNE_K8_DC_REFILL_FROM_SYSTEM, + PNE_K8_FP_DISPATCHED_FPU_OPS, + PNE_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, + PNE_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS, + PNE_K8_FR_DECODER_EMPTY, + PNE_K8_FR_DISPATCH_STALLS, + PNE_K8_FR_DISPATCH_STALL_FOR_SEGMENT_LOAD, + PNE_K8_FR_DISPATCH_STALL_FOR_SERIALIZATION, + PNE_K8_FR_DISPATCH_STALL_FOR_BRANCH_ABORT_TO_RETIRE, + PNE_K8_FR_DISPATCH_STALL_WHEN_FPU_IS_FULL, + PNE_K8_FR_DISPATCH_STALL_WHEN_LS_IS_FULL, + PNE_K8_FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL, + PNE_K8_FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL, + PNE_K8_FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET, + PNE_K8_FR_DISPATCH_STALL_WHEN_WAITING_FAR_XFER_OR_RESYNC_BRANCH_PENDING, + PNE_K8_FR_FPU_EXCEPTIONS, + PNE_K8_FR_INTERRUPTS_MASKED_CYCLES, + PNE_K8_FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, + PNE_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR0, + PNE_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR1, + PNE_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR2, + PNE_K8_FR_NUMBER_OF_BREAKPOINTS_FOR_DR3, + PNE_K8_FR_RETIRED_BRANCHES, + PNE_K8_FR_RETIRED_BRANCHES_MISPREDICTED, + PNE_K8_FR_RETIRED_FAR_CONTROL_TRANSFERS, + PNE_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS, + PNE_K8_FR_RETIRED_FPU_INSTRUCTIONS, + PNE_K8_FR_RETIRED_NEAR_RETURNS, + PNE_K8_FR_RETIRED_NEAR_RETURNS_MISPREDICTED, + PNE_K8_FR_RETIRED_RESYNCS, + PNE_K8_FR_RETIRED_TAKEN_HARDWARE_INTERRUPTS, + PNE_K8_FR_RETIRED_TAKEN_BRANCHES, + PNE_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED, + PNE_K8_FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE, + PNE_K8_FR_RETIRED_UOPS, + PNE_K8_FR_RETIRED_X86_INSTRUCTIONS, + PNE_K8_IC_FETCH, + PNE_K8_IC_INSTRUCTION_FETCH_STALL, + PNE_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_HIT, + PNE_K8_IC_L1_ITLB_MISS_AND_L2_ITLB_MISS, + PNE_K8_IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP, + PNE_K8_IC_MISS, + PNE_K8_IC_REFILL_FROM_L2, + PNE_K8_IC_REFILL_FROM_SYSTEM, + PNE_K8_RETURN_STACK_HITS, + PNE_K8_RETURN_STACK_OVERFLOW, + PNE_K8_LS_BUFFER2_FULL, + PNE_K8_LS_LOCKED_OPERATION, + PNE_K8_LS_MICROARCHITECTURAL_LATE_CANCEL, + PNE_K8_LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE, + PNE_K8_LS_MICROARCHITECTURAL_RESYNc_BY_SNOOP, + PNE_K8_LS_RETIRED_CFLUSH_INSTRUCTIONS, + PNE_K8_LS_RETIRED_CPUID_INSTRUCTIONS, + PNE_K8_LS_SEGMENT_REGISTER_LOAD, + PNE_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION, + PNE_K8_NB_MEMORY_CONTROLLER_DRAM_SLOTS_MISSED, + PNE_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT, + PNE_K8_NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW, + PNE_K8_NB_PROBE_RESULT, + PNE_K8_NB_SIZED_COMMANDS, + PNE_K8_NB_MEMORY_CONTROLLER_TURNAROUND, + PNE_K8_NB_HT_BUS0_BANDWIDTH, + PNE_K8_NB_HT_BUS1_BANDWIDTH, + PNE_K8_NB_HT_BUS2_BANDWIDTH, + /* Special counters */ + PNE_K8_DC_REFILL_FROM_L2_MOES, + PNE_K8_DC_REFILL_FROM_L2_OES, + PNE_K8_DC_REFILL_FROM_L2_M, + PNE_K8_DC_REFILL_FROM_SYSTEM_MOES, + PNE_K8_DC_REFILL_FROM_SYSTEM_OES, + PNE_K8_DC_REFILL_FROM_SYSTEM_M, + PNE_K8_FP_DISPATCHED_FPU_MULS, + PNE_K8_FP_DISPATCHED_FPU_ADDS, + PNE_K8_FP_DISPATCHED_FPU_ADDS_AND_MULS, + PNE_K8_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t K8Processor_info[]; +extern hwi_search_t K8Processor_map[]; + +#endif diff --git a/src/freebsd/map-p4.c b/src/freebsd/map-p4.c new file mode 100644 index 0000000..1ad0fe3 --- /dev/null +++ b/src/freebsd/map-p4.c @@ -0,0 +1,92 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p4.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + P4 SUBSTRATE + P4 SUBSTRATE + P4 SUBSTRATE (aka Pentium IV) + P4 SUBSTRATE + P4 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P4Processor must match P4Processor_info +*/ + +Native_Event_LabelDescription_t P4Processor_info[] = +{ + { "p4-128bit-mmx-uop", "Count integer SIMD SSE2 instructions that operate on 128 bit SIMD operands." }, + { "p4-64bit-mmx-uop", "Count MMX instructions that operate on 64 bit SIMD operands." }, + { "p4-b2b-cycles", "Count back-to-back bys cycles." }, + { "p4-bnr", "Count bus-not-ready conditions." }, + { "p4-bpu-fetch-request", "Count instruction fetch requests." }, + { "p4-branch-retired", "Counts retired branches." }, + { "p4-bsq-active-entries", "Count the number of entries (clipped at 15) currently active in the BSQ." }, + { "p4-bsq-allocation", "Count allocations in the bus sequence unit." }, + { "p4-bsq-cache-reference", "Count cache references as seen by the bus unit." }, + { "p4-execution-event", "Count the retirement uops through the execution mechanism." }, + { "p4-front-end-event", "Count the retirement uops through the frontend mechanism." }, + { "p4-fsb-data-activity", "Count each DBSY or DRDY event." }, + { "p4-global-power-events", "Count cycles during which the processor is not stopped." }, + { "p4-instr-retired", "Count all kind of instructions retired during a clock cycle." }, + { "p4-ioq-active-entries", "Count the number of entries (clipped at 15) in the IOQ that are active." }, + { "p4-ioq-allocation", "Count various types of transactions on the bus." }, + { "p4-itlb-reference", "Count translations using the intruction translation look-aside buffer." }, + { "p4-load-port-replay", "Count replayed events at the load port." }, + { "p4-mispred-branch-retired", "Count mispredicted IA-32 branch instructions." }, + { "p4-machine-clear", "Count the number of pipeline clears seen by the processor." }, + { "p4-memory-cancel", " Count the cancelling of various kinds of requests in the data cache address control unit of the CPU." }, + { "p4-memory-complete", "Count the completion of load split, store split, uncacheable split and uncacheable load operations." }, + { "p4-mob-load-replay", "Count load replays triggered by the memory order buffer." }, + { "p4-packed-dp-uop", "Count packed double-precision uops." }, + { "p4-packed-sp-uop", "Count packed single-precision uops." }, + { "p4-page-walk-type", "Count page walks performed by the page miss handler." }, + { "p4-replay-event", "Count the retirement of tagged uops" }, + { "p4-resource-stall", "Count the occurrence or latency of stalls in the allocator." }, + { "p4-response", "Count different types of responses." }, + { "p4-retired-branch-type", "Count branches retired." }, + { "p4-retired-mispred-branch-type", "Count mispredicted branches retired." }, + { "p4-scalar-dp-uop", "Count the number of scalar double-precision uops." }, + { "p4-scalar-sp-uop", "Count the number of scalar single-precision uops." }, + { "p4-snoop", "Count snoop traffic." }, + { "p4-sse-input-assist", "Count the number of times an assist is required to handle problems with the operands for SSE and SSE2 operations." }, + { "p4-store-port-replay", "Count events replayed at the store port." }, + { "p4-tc-deliver-mode", "Count the duration in cycles of operating modes of the trace cache and decode engine." }, + { "p4-tc-ms-xfer", "Count the number of times uop delivery changed from the trace cache to MS ROM." }, + { "p4-uop-queue-writes", "Count the number of valid uops written to the uop queue." }, + { "p4-uop-type", "This event is used in conjunction with the front-end at-retirement mechanism to tag load and store uops." }, + { "p4-uops-retired", "Count uops retired during a clock cycle." }, + { "p4-wc-buffer", "Count write-combining buffer operations." }, + { "p4-x87-assist", "Count the retirement of x87 instructions that required special handling." }, + { "p4-x87-fp-uop", "Count x87 floating-point uops." }, + { "p4-x87-simd-moves-uop", "Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store data or perform register-to-register moves." }, + /* counters with some modifiers */ + { "p4-uop-queue-writes,mask=+from-tc-build,+from-tc-deliver", "Count the number of valid uops written to the uop queue." }, + { "p4-page-walk-type,mask=+dtmiss", "Count data page walks performed by the page miss handler." }, + { "p4-page-walk-type,mask=+itmiss", "Count instruction page walks performed by the page miss handler." }, + { "p4-instr-retired,mask=+nbogusntag,+nbogustag", "Count all non-bogus instructions retired during a clock cycle." }, + { "p4-branch-retired,mask=+mmnp,+mmnm", "Count branches not-taken." }, + { "p4-branch-retired,mask=+mmtm,+mmtp", "Count branches taken." }, + { "p4-branch-retired,mask=+mmnp,+mmtp", "Count branches predicted." }, + { "p4-branch-retired,mask=+mmnm,+mmtm", "Count branches mis-predicted." }, + { "p4-bsq-cache-reference,mask=+rd-2ndl-miss", "Count 2nd level cache misses." }, + { "p4-bsq-cache-reference,mask=+rd-2ndl-miss,+rd-2ndl-hits,+rd-2ndl-hite,+rd-2ndl-hitm", "Count 2nd level cache accesses." }, + { "p4-bsq-cache-reference,mask=+rd-2ndl-hits,+rd-2ndl-hite,+rd-2ndl-hitm", "Count 2nd level cache hits." }, + { "p4-bsq-cache-reference,mask=+rd-3rdl-miss", "Count 3rd level cache misses." }, + { "p4-bsq-cache-reference,mask=+rd-3rdl-miss,+rd-3rdl-hits,+rd-3rdl-hite,+rd-3rdl-hitm", "Count 3rd level cache accesses." }, + { "p4-bsq-cache-reference,mask=+rd-3rdl-hits,+rd-3rdl-hite,+rd-3rdl-hitm", "Count 3rd level cache hits." }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p4.h b/src/freebsd/map-p4.h new file mode 100644 index 0000000..1125735 --- /dev/null +++ b/src/freebsd/map-p4.h @@ -0,0 +1,82 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p4.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P4 +#define FreeBSD_MAP_P4 + +enum NativeEvent_Value_P4Processor { + PNE_P4_128BIT_MMX_UOP = PAPI_NATIVE_MASK, + PNE_P4_64BIT_MMX_UOP, + PNE_P4_B2B_CYCLES, + PNE_P4_BNR, + PNE_P4_BPU_FETCH_REQUEST, + PNE_P4_BRANH_RETIRED, + PNE_P4_BSQ_ACTIVE_ENTRIES, + PNE_P4_BSQ_ALLOCATION, + PNE_P4_BSQ_CACHE_REFERENCE, + PNE_P4_EXECUTION_EVENT, + PNE_P4_FRONT_END_EVENT, + PNE_P4_FSB_DATA_ACTIVITY, + PNE_P4_GLOBAL_POWER_EVENTS, + PNE_P4_INSTR_RETIRED, + PNE_P4_IOQ_ACTIVE_ENTRIES, + PNE_P4_IOQ_ALLOCATION, + PNE_P4_ITLB_REFERENCE, + PNE_P4_LOAD_PORT_REPLAY, + PNE_P4_MISPRED_BRANCH_RETIRED, + PNE_P4_MACHINE_CLEAR, + PNE_P4_MEMORY_CANCEL, + PNE_P4_MEMORY_COMPLETE, + PNE_P4_MOB_LOAD_REPLAY, + PNE_P4_PACKED_DP_UOP, + PNE_P4_PACKED_SP_UOP, + PNE_P4_PAGE_WALK_TYPE, + PNE_P4_REPLAY_EVENT, + PNE_P4_RESOURCE_STALL, + PNE_P4_RESPONSE, + PNE_P4_RETIRED_BRANCH_TYPE, + PNE_P4_RETIRED_MISPRED_BRANCH_TYPE, + PNE_P4_SCALAR_DP_UOP, + PNE_P4_SCALAR_SP_UOP, + PNE_P4_SNOOP, + PNE_P4_SSE_INPUT_ASSIST, + PNE_P4_STORE_PORT_REPLAY, + PNE_P4_TC_DELIVER_MODE, + PNE_P4_TC_MS_XFER, + PNE_P4_UOP_QUEUE_WRITES, + PNE_P4_UOP_TYPE, + PNE_P4_UOPS_RETIRED, + PNE_P4_WC_BUFFER, + PNE_P4_X87_ASSIST, + PNE_P4_X87_FP_UOP, + PNE_P4_X87_SIMD_MOVES_UOP, + /* Special counters */ + PNE_P4_UOP_QUEUE_WRITES_TC_BUILD_DELIVER, + PNE_P4_PAGE_WALK_TYPE_D, + PNE_P4_PAGE_WALK_TYPE_I, + PNE_P4_INSTR_RETIRED_NON_BOGUS, + PNE_P4_BRANCH_RETIRED_NOT_TAKEN, + PNE_P4_BRANCH_RETIRED_TAKEN, + PNE_P4_BRANCH_RETIRED_PREDICTED, + PNE_P4_BRANCH_RETIRED_MISPREDICTED, + PNE_P4_BSQ_CACHE_REFERENCE_2L_MISSES, + PNE_P4_BSQ_CACHE_REFERENCE_2L_ACCESSES, + PNE_P4_BSQ_CACHE_REFERENCE_2L_HITS, + PNE_P4_BSQ_CACHE_REFERENCE_3L_MISSES, + PNE_P4_BSQ_CACHE_REFERENCE_3L_ACCESSES, + PNE_P4_BSQ_CACHE_REFERENCE_3L_HITS, + PNE_P4_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P4Processor_info[]; +extern hwi_search_t P4Processor_map[]; + +#endif diff --git a/src/freebsd/map-p6-2.c b/src/freebsd/map-p6-2.c new file mode 100644 index 0000000..13492fd --- /dev/null +++ b/src/freebsd/map-p6-2.c @@ -0,0 +1,109 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-2.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + P6_2 SUBSTRATE + P6_2 SUBSTRATE + P6_2 SUBSTRATE (aka Pentium II) + P6_2 SUBSTRATE + P6_2 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P6_2_Processor must match P6_2_Processor_info +*/ + +Native_Event_LabelDescription_t P6_2_Processor_info[] = +{ + /* Common P6 counters */ + { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, + { "p6-br-bogus", "Count the number of bogus branches." }, + { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, + { "p6-br-inst-retired", "Count the number of branch instructions retired." }, + { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, + { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, + { "p6-br-taken-retired", "Count the number of taken branches retired." }, + { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, + { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, + { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, + { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, + { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, + { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, + { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, + { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, + { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, + { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, + { "p6-bus-tran-brd", "Count the number of burst read transactions." }, + { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, + { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, + { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, + { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, + { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, + { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, + { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, + { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, + { "p6-bus-trans-p", "Count the number of completed partial transactions." }, + { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, + { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, + { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, + { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, + { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, + { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, + { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, + { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, + { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, + { "p6-flops", "Count the number of computational floating point operations retired." }, + { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, + { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, + { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, + { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, + { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, + { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, + { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, + { "p6-inst-decoded", "Count the number of instructions decoded." }, + { "p6-inst-retired", "Count the number of instructions retired." }, + { "p6-itlb-miss", "Count the number of instruction TLB misses." }, + { "p6-l2-ads", "Count the number of L2 address strobes." }, + { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, + { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, + { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, + { "p6-l2-ld", "Count the number of L2 data loads." }, + { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, + { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, + { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, + { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, + { "p6-l2-rqsts", "Count the total number of L2 requests." }, + { "p6-l2-st", "Count the number of L2 data stores." }, + { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, + { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, + { "p6-mul", "Count the number of floating point multiplies." }, + { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, + { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, + { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, + { "p6-segment-reg-loads", "Count the number of segment register loads." }, + { "p6-uops-retired", "Count the number of micro-ops retired."}, + /* Specific Pentium 2 counters */ + { "p6-fp-mmx-trans", "Count the number of transitions between MMX and floating-point instructions." }, + { "p6-mmx-assist", "Count the number of MMX assists executed" }, + { "p6-mmx-instr-exec", "Count the number of MMX instructions executed" }, + { "p6-mmx-instr-ret", "Count the number of MMX instructions retired." }, + { "p6-mmx-sat-instr-exec", "Count the number of MMX saturating instructions executed" }, + { "p6-mmx-uops-exec", "Count the number of MMX micro-ops executed" }, + { "p6-ret-seg-renames", "Count the number of segment register rename events retired." }, + { "p6-seg-rename-stalls", "Count the number of segment register renaming stalls" }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p6-2.h b/src/freebsd/map-p6-2.h new file mode 100644 index 0000000..0ac4b8e --- /dev/null +++ b/src/freebsd/map-p6-2.h @@ -0,0 +1,100 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-2.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P6_2 +#define FreeBSD_MAP_P6_2 + +enum NativeEvent_Value_P6_2_Processor { + /* P6 common events */ + PNE_P6_2_BACLEARS = PAPI_NATIVE_MASK, + PNE_P6_2_BR_BOGUS, + PNE_P6_2_BR_INST_DECODED, + PNE_P6_2_BR_INST_RETIRED, + PNE_P6_2_BR_MISS_PRED_RETIRED, + PNE_P6_2_BR_MISS_PRED_TAKEN_RET, + PNE_P6_2_BR_TAKEN_RETIRED, + PNE_P6_2_BTB_MISSES, + PNE_P6_2_BUS_BNR_DRV, + PNE_P6_2_BUS_DATA_RCV, + PNE_P6_2_BUS_DRDY_CLOCKS, + PNE_P6_2_BUS_HIT_DRV, + PNE_P6_2_BUS_HITM_DRV, + PNE_P6_2_BUS_LOCK_CLOCKS, + PNE_P6_2_BUS_REQ_OUTSTANDING, + PNE_P6_2_BUS_SNOOP_STALL, + PNE_P6_2_BUS_TRAN_ANY, + PNE_P6_2_BUS_TRAN_BRD, + PNE_P6_2_BUS_TRAN_BURST, + PNE_P6_2_BUS_TRAN_DEF, + PNE_P6_2_BUS_TRAN_IFETCH, + PNE_P6_2_BUS_TRAN_INVAL, + PNE_P6_2_BUS_TRAN_MEM, + PNE_P6_2_BUS_TRAN_POWER, + PNE_P6_2_BUS_TRAN_RFO, + PNE_P6_2_BUS_TRANS_IO, + PNE_P6_2_BUS_TRANS_P, + PNE_P6_2_BUS_TRANS_WB, + PNE_P6_2_CPU_CLK_UNHALTED, + PNE_P6_2_CYCLES_DIV_BUSY, + PNE_P6_2_CYCLES_IN_PENDING_AND_MASKED, + PNE_P6_2_CYCLES_INT_MASKED, + PNE_P6_2_DATA_MEM_REFS, + PNE_P6_2_DCU_LINES_IN, + PNE_P6_2_DCU_M_LINES_IN, + PNE_P6_2_DCU_M_LINES_OUT, + PNE_P6_2_DCU_MISS_OUTSTANDING, + PNE_P6_2_DIV, + PNE_P6_2_FLOPS, + PNE_P6_2_FP_ASSIST, + PNE_P6_2_FTP_COMPS_OPS_EXE, + PNE_P6_2_HW_INT_RX, + PNE_P6_2_IFU_FETCH, + PNE_P6_2_IFU_FETCH_MISS, + PNE_P6_2_IFU_MEM_STALL, + PNE_P6_2_ILD_STALL, + PNE_P6_2_INST_DECODED, + PNE_P6_2_INST_RETIRED, + PNE_P6_2_ITLB_MISS, + PNE_P6_2_L2_ADS, + PNE_P6_2_L2_DBUS_BUSY, + PNE_P6_2_L2_DBUS_BUSY_RD, + PNE_P6_2_L2_IFETCH, + PNE_P6_2_L2_LD, + PNE_P6_2_L2_LINES_IN, + PNE_P6_2_L2_LINES_OUT, + PNE_P6_2_L2M_LINES_INM, + PNE_P6_2_L2M_LINES_OUTM, + PNE_P6_2_L2_RQSTS, + PNE_P6_2_L2_ST, + PNE_P6_2_LD_BLOCKS, + PNE_P6_2_MISALIGN_MEM_REF, + PNE_P6_2_MUL, + PNE_P6_2_PARTIAL_RAT_STALLS, + PNE_P6_2_RESOURCE_STALL, + PNE_P6_2_SB_DRAINS, + PNE_P6_2_SEGMENT_REG_LOADS, + PNE_P6_2_UOPS_RETIRED, + /* Pentium 2 specific events */ + PNE_P6_2_FP_MMX_TRANS, + PNE_P6_2_MMX_ASSIST, + PNE_P6_2_MMX_INSTR_EXEC, + PNE_P6_2_MMX_INSTR_RET, + PNE_P6_2_MMX_SAT_INSTR_EXEC, + PNE_P6_2_MMX_UOPS_EXEC, + PNE_P6_2_RET_SEG_RENAMES, + PNE_P6_2_SEG_RENAME_STALLS, + PNE_P6_2_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P6_2_Processor_info[]; +extern hwi_search_t P6_2_Processor_map[]; + +#endif diff --git a/src/freebsd/map-p6-3.c b/src/freebsd/map-p6-3.c new file mode 100644 index 0000000..ccc35e7 --- /dev/null +++ b/src/freebsd/map-p6-3.c @@ -0,0 +1,113 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-3.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + P6_3 SUBSTRATE + P6_3 SUBSTRATE + P6_3 SUBSTRATE (aka Pentium III) + P6_3 SUBSTRATE + P6_3 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P6_3_Processor must match P6_3_Processor_info +*/ + +Native_Event_LabelDescription_t P6_3_Processor_info[] = +{ + /* Common P6 counters */ + { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, + { "p6-br-bogus", "Count the number of bogus branches." }, + { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, + { "p6-br-inst-retired", "Count the number of branch instructions retired." }, + { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, + { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, + { "p6-br-taken-retired", "Count the number of taken branches retired." }, + { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, + { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, + { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, + { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, + { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, + { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, + { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, + { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, + { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, + { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, + { "p6-bus-tran-brd", "Count the number of burst read transactions." }, + { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, + { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, + { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, + { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, + { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, + { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, + { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, + { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, + { "p6-bus-trans-p", "Count the number of completed partial transactions." }, + { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, + { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, + { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, + { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, + { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, + { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, + { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, + { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, + { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, + { "p6-flops", "Count the number of computational floating point operations retired." }, + { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, + { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, + { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, + { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, + { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, + { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, + { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, + { "p6-inst-decoded", "Count the number of instructions decoded." }, + { "p6-inst-retired", "Count the number of instructions retired." }, + { "p6-itlb-miss", "Count the number of instruction TLB misses." }, + { "p6-l2-ads", "Count the number of L2 address strobes." }, + { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, + { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, + { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, + { "p6-l2-ld", "Count the number of L2 data loads." }, + { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, + { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, + { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, + { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, + { "p6-l2-rqsts", "Count the total number of L2 requests." }, + { "p6-l2-st", "Count the number of L2 data stores." }, + { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, + { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, + { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies" }, + { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, + { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, + { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, + { "p6-segment-reg-loads", "Count the number of segment register loads." }, + { "p6-uops-retired", "Count the number of micro-ops retired."}, + /* Specific Pentium 3 counters */ + { "p6-fp-mmx-trans", "Count the number of transitions between MMX and floating-point instructions." }, + { "p6-mmx-assist", "Count the number of MMX assists executed" }, + { "p6-mmx-instr-exec", "Count the number of MMX instructions executed" }, + { "p6-mmx-instr-ret", "Count the number of MMX instructions retired." }, + { "p6-mmx-sat-instr-exec", "Count the number of MMX saturating instructions executed" }, + { "p6-mmx-uops-exec", "Count the number of MMX micro-ops executed" }, + { "p6-ret-seg-renames", "Count the number of segment register rename events retired." }, + { "p6-seg-rename-stalls", "Count the number of segment register renaming stalls" }, + { "p6-emon-kni-comp-inst-ret", "Count the number of SSE computational instructions retired" }, + { "p6-emon-kni-inst-retired", "Count the number of SSE instructions retired." }, + { "p6-emon-kni-pref-dispatched", "Count the number of SSE prefetch or weakly ordered instructions dispatched." }, + { "p6-emon-kni-pref-miss", "Count the number of prefetch or weakly ordered instructions that miss all caches." }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p6-3.h b/src/freebsd/map-p6-3.h new file mode 100644 index 0000000..e080a93 --- /dev/null +++ b/src/freebsd/map-p6-3.h @@ -0,0 +1,104 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-3.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P6_3 +#define FreeBSD_MAP_P6_3 + +enum NativeEvent_Value_P6_3_Processor { + /* P6 common events */ + PNE_P6_3_BACLEARS = PAPI_NATIVE_MASK, + PNE_P6_3_BR_BOGUS, + PNE_P6_3_BR_INST_DECODED, + PNE_P6_3_BR_INST_RETIRED, + PNE_P6_3_BR_MISS_PRED_RETIRED, + PNE_P6_3_BR_MISS_PRED_TAKEN_RET, + PNE_P6_3_BR_TAKEN_RETIRED, + PNE_P6_3_BTB_MISSES, + PNE_P6_3_BUS_BNR_DRV, + PNE_P6_3_BUS_DATA_RCV, + PNE_P6_3_BUS_DRDY_CLOCKS, + PNE_P6_3_BUS_HIT_DRV, + PNE_P6_3_BUS_HITM_DRV, + PNE_P6_3_BUS_LOCK_CLOCKS, + PNE_P6_3_BUS_REQ_OUTSTANDING, + PNE_P6_3_BUS_SNOOP_STALL, + PNE_P6_3_BUS_TRAN_ANY, + PNE_P6_3_BUS_TRAN_BRD, + PNE_P6_3_BUS_TRAN_BURST, + PNE_P6_3_BUS_TRAN_DEF, + PNE_P6_3_BUS_TRAN_IFETCH, + PNE_P6_3_BUS_TRAN_INVAL, + PNE_P6_3_BUS_TRAN_MEM, + PNE_P6_3_BUS_TRAN_POWER, + PNE_P6_3_BUS_TRAN_RFO, + PNE_P6_3_BUS_TRANS_IO, + PNE_P6_3_BUS_TRANS_P, + PNE_P6_3_BUS_TRANS_WB, + PNE_P6_3_CPU_CLK_UNHALTED, + PNE_P6_3_CYCLES_DIV_BUSY, + PNE_P6_3_CYCLES_IN_PENDING_AND_MASKED, + PNE_P6_3_CYCLES_INT_MASKED, + PNE_P6_3_DATA_MEM_REFS, + PNE_P6_3_DCU_LINES_IN, + PNE_P6_3_DCU_M_LINES_IN, + PNE_P6_3_DCU_M_LINES_OUT, + PNE_P6_3_DCU_MISS_OUTSTANDING, + PNE_P6_3_DIV, + PNE_P6_3_FLOPS, + PNE_P6_3_FP_ASSIST, + PNE_P6_3_FTP_COMPS_OPS_EXE, + PNE_P6_3_HW_INT_RX, + PNE_P6_3_IFU_FETCH, + PNE_P6_3_IFU_FETCH_MISS, + PNE_P6_3_IFU_MEM_STALL, + PNE_P6_3_ILD_STALL, + PNE_P6_3_INST_DECODED, + PNE_P6_3_INST_RETIRED, + PNE_P6_3_ITLB_MISS, + PNE_P6_3_L2_ADS, + PNE_P6_3_L2_DBUS_BUSY, + PNE_P6_3_L2_DBUS_BUSY_RD, + PNE_P6_3_L2_IFETCH, + PNE_P6_3_L2_LD, + PNE_P6_3_L2_LINES_IN, + PNE_P6_3_L2_LINES_OUT, + PNE_P6_3_L2M_LINES_INM, + PNE_P6_3_L2M_LINES_OUTM, + PNE_P6_3_L2_RQSTS, + PNE_P6_3_L2_ST, + PNE_P6_3_LD_BLOCKS, + PNE_P6_3_MISALIGN_MEM_REF, + PNE_P6_3_MUL, + PNE_P6_3_PARTIAL_RAT_STALLS, + PNE_P6_3_RESOURCE_STALL, + PNE_P6_3_SB_DRAINS, + PNE_P6_3_SEGMENT_REG_LOADS, + PNE_P6_3_UOPS_RETIRED, + /* Pentium 3 specific events */ + PNE_P6_3_FP_MMX_TRANS, + PNE_P6_3_MMX_ASSIST, + PNE_P6_3_MMX_INSTR_EXEC, + PNE_P6_3_MMX_INSTR_RET, + PNE_P6_3_MMX_SAT_INSTR_EXEC, + PNE_P6_3_MMX_UOPS_EXEC, + PNE_P6_3_RET_SEG_RENAMES, + PNE_P6_3_SEG_RENAME_STALLS, + PNE_P6_3_EMON_KNI_COMP_INST_RET, + PNE_P6_3_EMON_KNI_INST_RETIRED, + PNE_P6_3_EMON_KNI_PREF_DISPATCHED, + PNE_P6_3_EMON_KNI_PREF_MISS, + PNE_P6_3_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P6_3_Processor_info[]; +extern hwi_search_t P6_3_Processor_map[]; + +#endif diff --git a/src/freebsd/map-p6-c.c b/src/freebsd/map-p6-c.c new file mode 100644 index 0000000..a6a936b --- /dev/null +++ b/src/freebsd/map-p6-c.c @@ -0,0 +1,102 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + P6_C SUBSTRATE + P6_C SUBSTRATE + P6_C SUBSTRATE (aka Celeron) + P6_C SUBSTRATE + P6_C SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P6_C_Processor must match P6_C_Processor_info +*/ + +Native_Event_LabelDescription_t P6_C_Processor_info[] = +{ + /* Common P6 counters */ + { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, + { "p6-br-bogus", "Count the number of bogus branches." }, + { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, + { "p6-br-inst-retired", "Count the number of branch instructions retired." }, + { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, + { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, + { "p6-br-taken-retired", "Count the number of taken branches retired." }, + { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, + { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, + { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, + { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, + { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, + { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, + { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, + { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, + { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, + { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, + { "p6-bus-tran-brd", "Count the number of burst read transactions." }, + { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, + { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, + { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, + { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, + { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, + { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, + { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, + { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, + { "p6-bus-trans-p", "Count the number of completed partial transactions." }, + { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, + { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, + { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, + { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, + { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, + { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, + { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, + { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, + { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, + { "p6-flops", "Count the number of computational floating point operations retired." }, + { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, + { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, + { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, + { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, + { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, + { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, + { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, + { "p6-inst-decoded", "Count the number of instructions decoded." }, + { "p6-inst-retired", "Count the number of instructions retired." }, + { "p6-itlb-miss", "Count the number of instruction TLB misses." }, + { "p6-l2-ads", "Count the number of L2 address strobes." }, + { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, + { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, + { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, + { "p6-l2-ld", "Count the number of L2 data loads." }, + { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, + { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, + { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, + { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, + { "p6-l2-rqsts", "Count the total number of L2 requests." }, + { "p6-l2-st", "Count the number of L2 data stores." }, + { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, + { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, + { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies." }, + { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, + { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, + { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, + { "p6-segment-reg-loads", "Count the number of segment register loads." }, + { "p6-uops-retired", "Count the number of micro-ops retired."}, + /* Specific Celeron counters */ + { "p6-mmx-instr-exec", "Count the number of MMX instructions executed" }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p6-c.h b/src/freebsd/map-p6-c.h new file mode 100644 index 0000000..cc1e1da --- /dev/null +++ b/src/freebsd/map-p6-c.h @@ -0,0 +1,93 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P6_C +#define FreeBSD_MAP_P6_C + +enum NativeEvent_Value_P6_C_Processor { + /* P6 common events */ + PNE_P6_C_BACLEARS = PAPI_NATIVE_MASK, + PNE_P6_C_BR_BOGUS, + PNE_P6_C_BR_INST_DECODED, + PNE_P6_C_BR_INST_RETIRED, + PNE_P6_C_BR_MISS_PRED_RETIRED, + PNE_P6_C_BR_MISS_PRED_TAKEN_RET, + PNE_P6_C_BR_TAKEN_RETIRED, + PNE_P6_C_BTB_MISSES, + PNE_P6_C_BUS_BNR_DRV, + PNE_P6_C_BUS_DATA_RCV, + PNE_P6_C_BUS_DRDY_CLOCKS, + PNE_P6_C_BUS_HIT_DRV, + PNE_P6_C_BUS_HITM_DRV, + PNE_P6_C_BUS_LOCK_CLOCKS, + PNE_P6_C_BUS_REQ_OUTSTANDING, + PNE_P6_C_BUS_SNOOP_STALL, + PNE_P6_C_BUS_TRAN_ANY, + PNE_P6_C_BUS_TRAN_BRD, + PNE_P6_C_BUS_TRAN_BURST, + PNE_P6_C_BUS_TRAN_DEF, + PNE_P6_C_BUS_TRAN_IFETCH, + PNE_P6_C_BUS_TRAN_INVAL, + PNE_P6_C_BUS_TRAN_MEM, + PNE_P6_C_BUS_TRAN_POWER, + PNE_P6_C_BUS_TRAN_RFO, + PNE_P6_C_BUS_TRANS_IO, + PNE_P6_C_BUS_TRANS_P, + PNE_P6_C_BUS_TRANS_WB, + PNE_P6_C_CPU_CLK_UNHALTED, + PNE_P6_C_CYCLES_DIV_BUSY, + PNE_P6_C_CYCLES_IN_PENDING_AND_MASKED, + PNE_P6_C_CYCLES_INT_MASKED, + PNE_P6_C_DATA_MEM_REFS, + PNE_P6_C_DCU_LINES_IN, + PNE_P6_C_DCU_M_LINES_IN, + PNE_P6_C_DCU_M_LINES_OUT, + PNE_P6_C_DCU_MISS_OUTSTANDING, + PNE_P6_C_DIV, + PNE_P6_C_FLOPS, + PNE_P6_C_FP_ASSIST, + PNE_P6_C_FTP_COMPS_OPS_EXE, + PNE_P6_C_HW_INT_RX, + PNE_P6_C_IFU_FETCH, + PNE_P6_C_IFU_FETCH_MISS, + PNE_P6_C_IFU_MEM_STALL, + PNE_P6_C_ILD_STALL, + PNE_P6_C_INST_DECODED, + PNE_P6_C_INST_RETIRED, + PNE_P6_C_ITLB_MISS, + PNE_P6_C_L2_ADS, + PNE_P6_C_L2_DBUS_BUSY, + PNE_P6_C_L2_DBUS_BUSY_RD, + PNE_P6_C_L2_IFETCH, + PNE_P6_C_L2_LD, + PNE_P6_C_L2_LINES_IN, + PNE_P6_C_L2_LINES_OUT, + PNE_P6_C_L2M_LINES_INM, + PNE_P6_C_L2M_LINES_OUTM, + PNE_P6_C_L2_RQSTS, + PNE_P6_C_L2_ST, + PNE_P6_C_LD_BLOCKS, + PNE_P6_C_MISALIGN_MEM_REF, + PNE_P6_C_MUL, + PNE_P6_C_PARTIAL_RAT_STALLS, + PNE_P6_C_RESOURCE_STALL, + PNE_P6_C_SB_DRAINS, + PNE_P6_C_SEGMENT_REG_LOADS, + PNE_P6_C_UOPS_RETIRED, + /* Celeron specific events */ + PNE_P6_C_MMX_INSTR_EXEC, + PNE_P6_C_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P6_C_Processor_info[]; +extern hwi_search_t P6_C_Processor_map[]; + +#endif diff --git a/src/freebsd/map-p6-m.c b/src/freebsd/map-p6-m.c new file mode 100644 index 0000000..18a91b5 --- /dev/null +++ b/src/freebsd/map-p6-m.c @@ -0,0 +1,139 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-M.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + P6_M SUBSTRATE + P6_M SUBSTRATE + P6_M SUBSTRATE (aka Pentium M) + P6_M SUBSTRATE + P6_M SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P6_M_Processor must match P6_M_Processor_info +*/ + +Native_Event_LabelDescription_t P6_M_Processor_info[] = +{ + /* Common P6 counters */ + { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, + { "p6-br-bogus", "Count the number of bogus branches." }, + { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, + { "p6-br-inst-retired", "Count the number of branch instructions retired." }, + { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, + { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, + { "p6-br-taken-retired", "Count the number of taken branches retired." }, + { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, + { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, + { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, + { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, + { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, + { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, + { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, + { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, + { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, + { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, + { "p6-bus-tran-brd", "Count the number of burst read transactions." }, + { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, + { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, + { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, + { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, + { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, + { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, + { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, + { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, + { "p6-bus-trans-p", "Count the number of completed partial transactions." }, + { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, + /* { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, THIS IS DIFFERENT IN PM */ + { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted and not in a thermal trip." }, + { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, + { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, + { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, + { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, + { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, + { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, + { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, + { "p6-flops", "Count the number of computational floating point operations retired." }, + { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, + { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, + { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, + { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, + { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, + { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, + { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, + { "p6-inst-decoded", "Count the number of instructions decoded." }, + { "p6-inst-retired", "Count the number of instructions retired." }, + { "p6-itlb-miss", "Count the number of instruction TLB misses." }, + { "p6-l2-ads", "Count the number of L2 address strobes." }, + { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, + { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, + { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, + { "p6-l2-ld", "Count the number of L2 data loads." }, + { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, + { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, + { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, + { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, + { "p6-l2-rqsts", "Count the total number of L2 requests." }, + { "p6-l2-st", "Count the number of L2 data stores." }, + { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, + { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, + { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies." }, + { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, + { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, + { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, + { "p6-segment-reg-loads", "Count the number of segment register loads." }, + { "p6-uops-retired", "Count the number of micro-ops retired."}, + /* Specific Pentium 3 counters */ + { "p6-fp-mmx-trans", "Count the number of transitions between MMX and floating-point instructions." }, + { "p6-mmx-assist", "Count the number of MMX assists executed" }, + { "p6-mmx-instr-exec", "Count the number of MMX instructions executed" }, + { "p6-mmx-instr-ret", "Count the number of MMX instructions retired." }, + { "p6-mmx-sat-instr-exec", "Count the number of MMX saturating instructions executed" }, + { "p6-mmx-uops-exec", "Count the number of MMX micro-ops executed" }, + { "p6-ret-seg-renames", "Count the number of segment register rename events retired." }, + { "p6-seg-rename-stalls", "Count the number of segment register renaming stalls" }, + { "p6-emon-kni-comp-inst-ret", "Count the number of SSE computational instructions retired" }, + { "p6-emon-kni-inst-retired", "Count the number of SSE instructions retired." }, + { "p6-emon-kni-pref-dispatched", "Count the number of SSE prefetch or weakly ordered instructions dispatched." }, + { "p6-emon-kni-pref-miss", "Count the number of prefetch or weakly ordered instructions that miss all caches." }, + /* Specific Pentium M counters */ + { "p6-br-bac-missp-exec", "Count the number of branch instructions executed that where mispredicted at the Front End (BAC)." }, + { "p6-br-call-exec", "Count the number of call instructions executed." }, + { "p6-br-call-missp-exec", "Count the number of call instructions executed that were mispredicted." }, + { "p6-br-cnd-exec", "Count the number of conditional branch instructions excuted" }, + { "p6-br-cnd-missp-exec", "Count the number of conditional branch instructions executed that were mispredicted." }, + { "p6-br-ind-call-exec", "Count the number of indirect call instructions executed" }, + { "p6-br-ind-exec", "Count the number of indirect branch instructions executed" }, + { "p6-br-ind-missp-exec", "Count the number of indirect branch instructions executed that were mispredicted." }, + { "p6-br-inst-exec", "Count the number of branch instructions executed but necessarily retired." }, + { "p6-br-missp-exec", "Count the number of branch instructions executed that were mispredicted at execution." }, + { "p6-br-ret-bac-missp-exec", "Count the number of return instructions executed that were mispredicted at the Front End (BAC)." }, + { "p6-br-ret-exec", "Count the number of return instructions executed." }, + { "p6-br-ret-missp-exec", "Count the number of return instructions executed that were mispredicted at execution." }, + { "p6-emon-esp-uops", "Count the total number of micro-ops." }, + { "p6-emon-est-trans", "Count the number of Enhanced Intel SpeedStep transitions" }, + { "p6-emon-fused-uops-ret", "Count the number of retired fused micro-ops." }, + { "p6-emon-pref-rqsts-dn", "Count the number of downward prefetches issued." }, + { "p6-emon-pref-rqsts-up", "Count the number of upward prefetches issued." }, + { "p6-emon-simd-instr-retired", "Count the number of retired MMX instructions." }, + { "p6-emon-sse-sse2-comp-inst-retired", "Count the number of computational SSE instructions retired." }, + { "p6-emon-sse-sse2-inst-retired", "Count the number of SSE instructions retired." }, + { "p6-emon-synch-uops", "Count the number of sync micro-ops." }, + { "p6-emon-thermal-trip", "Count the duration or occurrences of thermal trips." }, + { "p6-emon-unfusion", "Count the number of unfusion events in the reorder buffer." }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p6-m.h b/src/freebsd/map-p6-m.h new file mode 100644 index 0000000..042a8e1 --- /dev/null +++ b/src/freebsd/map-p6-m.h @@ -0,0 +1,129 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6-M.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P6_M +#define FreeBSD_MAP_P6_M + +enum NativeEvent_Value_P6_M_Processor { + /* P6 common events */ + PNE_P6_M_BACLEARS = PAPI_NATIVE_MASK, + PNE_P6_M_BR_BOGUS, + PNE_P6_M_BR_INST_DECODED, + PNE_P6_M_BR_INST_RETIRED, + PNE_P6_M_BR_MISS_PRED_RETIRED, + PNE_P6_M_BR_MISS_PRED_TAKEN_RET, + PNE_P6_M_BR_TAKEN_RETIRED, + PNE_P6_M_BTB_MISSES, + PNE_P6_M_BUS_BNR_DRV, + PNE_P6_M_BUS_DATA_RCV, + PNE_P6_M_BUS_DRDY_CLOCKS, + PNE_P6_M_BUS_HIT_DRV, + PNE_P6_M_BUS_HITM_DRV, + PNE_P6_M_BUS_LOCK_CLOCKS, + PNE_P6_M_BUS_REQ_OUTSTANDING, + PNE_P6_M_BUS_SNOOP_STALL, + PNE_P6_M_BUS_TRAN_ANY, + PNE_P6_M_BUS_TRAN_BRD, + PNE_P6_M_BUS_TRAN_BURST, + PNE_P6_M_BUS_TRAN_DEF, + PNE_P6_M_BUS_TRAN_IFETCH, + PNE_P6_M_BUS_TRAN_INVAL, + PNE_P6_M_BUS_TRAN_MEM, + PNE_P6_M_BUS_TRAN_POWER, + PNE_P6_M_BUS_TRAN_RFO, + PNE_P6_M_BUS_TRANS_IO, + PNE_P6_M_BUS_TRANS_P, + PNE_P6_M_BUS_TRANS_WB, + PNE_P6_M_CPU_CLK_UNHALTED, + PNE_P6_M_CYCLES_DIV_BUSY, + PNE_P6_M_CYCLES_IN_PENDING_AND_MASKED, + PNE_P6_M_CYCLES_INT_MASKED, + PNE_P6_M_DATA_MEM_REFS, + PNE_P6_M_DCU_LINES_IN, + PNE_P6_M_DCU_M_LINES_IN, + PNE_P6_M_DCU_M_LINES_OUT, + PNE_P6_M_DCU_MISS_OUTSTANDING, + PNE_P6_M_DIV, + PNE_P6_M_FLOPS, + PNE_P6_M_FP_ASSIST, + PNE_P6_M_FTP_COMPS_OPS_EXE, + PNE_P6_M_HW_INT_RX, + PNE_P6_M_IFU_FETCH, + PNE_P6_M_IFU_FETCH_MISS, + PNE_P6_M_IFU_MEM_STALL, + PNE_P6_M_ILD_STALL, + PNE_P6_M_INST_DECODED, + PNE_P6_M_INST_RETIRED, + PNE_P6_M_ITLB_MISS, + PNE_P6_M_L2_ADS, + PNE_P6_M_L2_DBUS_BUSY, + PNE_P6_M_L2_DBUS_BUSY_RD, + PNE_P6_M_L2_IFETCH, + PNE_P6_M_L2_LD, + PNE_P6_M_L2_LINES_IN, + PNE_P6_M_L2_LINES_OUT, + PNE_P6_M_L2M_LINES_INM, + PNE_P6_M_L2M_LINES_OUTM, + PNE_P6_M_L2_RQSTS, + PNE_P6_M_L2_ST, + PNE_P6_M_LD_BLOCKS, + PNE_P6_M_MISALIGN_MEM_REF, + PNE_P6_M_MUL, + PNE_P6_M_PARTIAL_RAT_STALLS, + PNE_P6_M_RESOURCE_STALL, + PNE_P6_M_SB_DRAINS, + PNE_P6_M_SEGMENT_REG_LOADS, + PNE_P6_M_UOPS_RETIRED, + /* Pentium 3 specific events */ + PNE_P6_M_FP_MMX_TRANS, + PNE_P6_M_MMX_ASSIST, + PNE_P6_M_MMX_INSTR_EXEC, + PNE_P6_M_MMX_INSTR_RET, + PNE_P6_M_MMX_SAT_INSTR_EXEC, + PNE_P6_M_MMX_UOPS_EXEC, + PNE_P6_M_RET_SEG_RENAMES, + PNE_P6_M_SEG_RENAME_STALLS, + PNE_P6_M_EMON_KNI_COMP_INST_RET, + PNE_P6_M_EMON_KNI_INST_RETIRED, + PNE_P6_M_EMON_KNI_PREF_DISPATCHED, + PNE_P6_M_EMON_KNI_PREF_MISS, + /* Pentium M specific events */ + PNE_P6_M_BR_BAC_MISSP_EXEC, + PNE_P6_M_BR_CALL_EXEC, + PNE_P6_M_BR_CALL_MISSP_EXEC, + PNE_P6_M_BR_CND_EXEC, + PNE_P6_M_BR_CND_MISSP_EXEC, + PNE_P6_M_BR_IND_CALL_EXEC, + PNE_P6_M_BR_IND_EXEC, + PNE_P6_M_BR_IND_MISSP_EXEC, + PNE_P6_M_BR_INST_EXEC, + PNE_P6_M_BR_MISSP_EXEC, + PNE_P6_M_BR_RET_BAC_MISSP_EXEC, + PNE_P6_M_BR_RET_EXEC, + PNE_P6_M_BR_RET_MISSP_EXEC, + PNE_P6_M_EMON_ESP_UOPS, + PNE_P6_M_EMON_EST_TRANS, + PNE_P6_M_EMON_FUSED_UOPS_RET, + PNE_P6_M_EMON_PREF_RQSTS_DN, + PNE_P6_M_EMON_PREF_RQSTS_UP, + PNE_P6_M_EMON_SIMD_INSTR_RETIRD, + PNE_P6_M_EMON_SSE_SSE2_COMP_INST_RETIRED, + PNE_P6_M_EMON_SSE_SSE2_INST_RETIRED, + PNE_P6_M_EMON_SYNCH_UOPS, + PNE_P6_M_EMON_THERMAL_TRIP, + PNE_P6_M_EMON_UNFUSION, + PNE_P6_M_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P6_M_Processor_info[]; +extern hwi_search_t P6_M_Processor_map[]; + +#endif diff --git a/src/freebsd/map-p6.c b/src/freebsd/map-p6.c new file mode 100644 index 0000000..7c720d4 --- /dev/null +++ b/src/freebsd/map-p6.c @@ -0,0 +1,100 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + +/**************************************************************************** + P6 SUBSTRATE + P6 SUBSTRATE + P6 SUBSTRATE (aka Pentium Pro) + P6 SUBSTRATE + P6 SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_P6Processor must match P6Processor_info +*/ + +Native_Event_LabelDescription_t P6Processor_info[] = +{ + { "p6-baclears", "Count the number of times a static branch prediction was made by the branch decoder because the BTB did not have a prediction." }, + { "p6-br-bogus", "Count the number of bogus branches." }, + { "p6-br-inst-decoded", "Count the number of branch instructions decoded." }, + { "p6-br-inst-retired", "Count the number of branch instructions retired." }, + { "p6-br-miss-pred-retired", "Count the number of mispredicted branch instructions retired." }, + { "p6-br-miss-pred-taken-ret", "Count the number of taken mispredicted branches retired." }, + { "p6-br-taken-retired", "Count the number of taken branches retired." }, + { "p6-btb-misses", "Count the number of branches for which the BTB did not produce a prediction. "}, + { "p6-bus-bnr-drv", "Count the number of bus clock cycles during which this processor is driving the BNR# pin." }, + { "p6-bus-data-rcv", "Count the number of bus clock cycles during which this processor is receiving data." }, + { "p6-bus-drdy-clocks", "Count the number of clocks during which DRDY# is asserted." }, + { "p6-bus-hit-drv", "Count the number of bus clock cycles during which this processor is driving the HIT# pin." }, + { "p6-bus-hitm-drv", "Count the number of bus clock cycles during which this processor is driving the HITM# pin." }, + { "p6-bus-lock-clocks", "Count the number of clocks during with LOCK# is asserted on the external system bus." }, + { "p6-bus-req-outstanding", "Count the number of bus requests outstanding in any given cycle." }, + { "p6-bus-snoop-stall", "Count the number of clock cycles during which the bus is snoop stalled." }, + { "p6-bus-tran-any", "Count the number of completed bus transactions of any kind." }, + { "p6-bus-tran-brd", "Count the number of burst read transactions." }, + { "p6-bus-tran-burst", "Count the number of completed burst transactions." }, + { "p6-bus-tran-def", "Count the number of completed deferred transactions." }, + { "p6-bus-tran-ifetch", "Count the number of completed instruction fetch transactions." }, + { "p6-bus-tran-inval", "Count the number of completed invalidate transactions." }, + { "p6-bus-tran-mem", "Count the number of completed memory transactions." }, + { "p6-bus-tran-pwr", "Count the number of completed partial write transactions." }, + { "p6-bus-tran-rfo", "Count the number of completed read-for-ownership transactions." }, + { "p6-bus-trans-io", "Count the number of completed I/O transactions." }, + { "p6-bus-trans-p", "Count the number of completed partial transactions." }, + { "p6-bus-trans-wb", "Count the number of completed write-back transactions." }, + { "p6-cpu-clk-unhalted", "Count the number of cycles during with the processor was not halted." }, + { "p6-cycles-div-busy", "Count the number of cycles during which the divider is busy and cannot accept new divides." }, + { "p6-cycles-in-pending-and-masked", "Count the number of processor cycles for which interrupts were disabled and interrupts were pending." }, + { "p6-cycles-int-masked", "Count the number of processor cycles for which interrupts were disabled." }, + { "p6-data-mem-refs", "Count all loads and all stores using any memory type, including internal retries." }, + { "p6-dcu-lines-in", "Count the total lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-in", "Count the number of M state lines allocated in the data cache unit." }, + { "p6-dcu-m-lines-out", "Count the number of M state lines evicted from the data cache unit." }, + { "p6-dcu-miss-outstanding", "Count the weighted number of cycles while a data cache unit miss is outstanding, incremented by the number of outstanding cache misses at any time."}, + { "p6-div", "Count the number of integer and floating-point divides including speculative divides." }, + { "p6-flops", "Count the number of computational floating point operations retired." }, + { "p6-fp-assist", "Count the number of floating point exceptions handled by microcode." }, + { "p6-fp-comps-ops-exe", "Count the number of computation floating point operations executed." }, + { "p6-hw-int-rx", "Count the number of hardware interrupts received." }, + { "p6-ifu-fetch", "Count the number of instruction fetches, both cacheable and non-cacheable." }, + { "p6-ifu-fetch-miss", "Count the number of instruction fetch misses" }, + { "p6-ifu-mem-stall", "Count the number of cycles instruction fetch is stalled for any reason." }, + { "p6-ild-stall", "Count the number of cycles the instruction length decoder is stalled." }, + { "p6-inst-decoded", "Count the number of instructions decoded." }, + { "p6-inst-retired", "Count the number of instructions retired." }, + { "p6-itlb-miss", "Count the number of instruction TLB misses." }, + { "p6-l2-ads", "Count the number of L2 address strobes." }, + { "p6-l2-dbus-busy", "Count the number of cycles during which the L2 cache data bus was busy." }, + { "p6-l2-dbus-busy-rd", "Count the number of cycles during which the L2 cache data bus was busy transferring read data from L2 to the processor." }, + { "p6-l2-ifetch", "Count the number of L2 instruction fetches." }, + { "p6-l2-ld", "Count the number of L2 data loads." }, + { "p6-l2-lines-in", "Count the number of L2 lines allocated." }, + { "p6-l2-lines-out", "Count the number of L2 lines evicted." }, + { "p6-l2-m-lines-inm", "Count the number of modified lines allocated in L2 cache." }, + { "p6-l2-m-lines-outm", "Count the number of L2 M-state lines evicted." }, + { "p6-l2-rqsts", "Count the total number of L2 requests." }, + { "p6-l2-st", "Count the number of L2 data stores." }, + { "p6-ld-blocks", "Count the number of load operations delayed due to store buffer blocks." }, + { "p6-misalign-mem-ref", "Count the number of misaligned data memory references (crossing a 64 bit boundary)." }, + { "p6-mul", "Count the number of floating point multiplies, including speculative multiplies." }, + { "p6-partial-rat-stalls", "Count the number of cycles or events for partial stalls." }, + { "p6-resource-stalls", "Count the number of cycles there was a resource related stall of any kind." }, + { "p6-sb-drains", "Count the number of cycles the store buffer is draining." }, + { "p6-segment-reg-loads", "Count the number of segment register loads." }, + { "p6-uops-retired", "Count the number of micro-ops retired." }, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-p6.h b/src/freebsd/map-p6.h new file mode 100644 index 0000000..c5edbe9 --- /dev/null +++ b/src/freebsd/map-p6.h @@ -0,0 +1,90 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-p6.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_P6 +#define FreeBSD_MAP_P6 + +enum NativeEvent_Value_P6Processor { + PNE_P6_BACLEARS = PAPI_NATIVE_MASK, + PNE_P6_BR_BOGUS, + PNE_P6_BR_INST_DECODED, + PNE_P6_BR_INST_RETIRED, + PNE_P6_BR_MISS_PRED_RETIRED, + PNE_P6_BR_MISS_PRED_TAKEN_RET, + PNE_P6_BR_TAKEN_RETIRED, + PNE_P6_BTB_MISSES, + PNE_P6_BUS_BNR_DRV, + PNE_P6_BUS_DATA_RCV, + PNE_P6_BUS_DRDY_CLOCKS, + PNE_P6_BUS_HIT_DRV, + PNE_P6_BUS_HITM_DRV, + PNE_P6_BUS_LOCK_CLOCKS, + PNE_P6_BUS_REQ_OUTSTANDING, + PNE_P6_BUS_SNOOP_STALL, + PNE_P6_BUS_TRAN_ANY, + PNE_P6_BUS_TRAN_BRD, + PNE_P6_BUS_TRAN_BURST, + PNE_P6_BUS_TRAN_DEF, + PNE_P6_BUS_TRAN_IFETCH, + PNE_P6_BUS_TRAN_INVAL, + PNE_P6_BUS_TRAN_MEM, + PNE_P6_BUS_TRAN_POWER, + PNE_P6_BUS_TRAN_RFO, + PNE_P6_BUS_TRANS_IO, + PNE_P6_BUS_TRANS_P, + PNE_P6_BUS_TRANS_WB, + PNE_P6_CPU_CLK_UNHALTED, + PNE_P6_CYCLES_DIV_BUSY, + PNE_P6_CYCLES_IN_PENDING_AND_MASKED, + PNE_P6_CYCLES_INT_MASKED, + PNE_P6_DATA_MEM_REFS, + PNE_P6_DCU_LINES_IN, + PNE_P6_DCU_M_LINES_IN, + PNE_P6_DCU_M_LINES_OUT, + PNE_P6_DCU_MISS_OUTSTANDING, + PNE_P6_DIV, + PNE_P6_FLOPS, + PNE_P6_FP_ASSIST, + PNE_P6_FTP_COMPS_OPS_EXE, + PNE_P6_HW_INT_RX, + PNE_P6_IFU_FETCH, + PNE_P6_IFU_FETCH_MISS, + PNE_P6_IFU_MEM_STALL, + PNE_P6_ILD_STALL, + PNE_P6_INST_DECODED, + PNE_P6_INST_RETIRED, + PNE_P6_ITLB_MISS, + PNE_P6_L2_ADS, + PNE_P6_L2_DBUS_BUSY, + PNE_P6_L2_DBUS_BUSY_RD, + PNE_P6_L2_IFETCH, + PNE_P6_L2_LD, + PNE_P6_L2_LINES_IN, + PNE_P6_L2_LINES_OUT, + PNE_P6_L2M_LINES_INM, + PNE_P6_L2M_LINES_OUTM, + PNE_P6_L2_RQSTS, + PNE_P6_L2_ST, + PNE_P6_LD_BLOCKS, + PNE_P6_MISALIGN_MEM_REF, + PNE_P6_MUL, + PNE_P6_PARTIAL_RAT_STALLS, + PNE_P6_RESOURCE_STALL, + PNE_P6_SB_DRAINS, + PNE_P6_SEGMENT_REG_LOADS, + PNE_P6_UOPS_RETIRED, + PNE_P6_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t P6Processor_info[]; +extern hwi_search_t P6Processor_map[]; + +#endif diff --git a/src/freebsd/map-unknown.c b/src/freebsd/map-unknown.c new file mode 100644 index 0000000..63eb69b --- /dev/null +++ b/src/freebsd/map-unknown.c @@ -0,0 +1,37 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-unknown.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/**************************************************************************** + UNKNOWN SUBSTRATE + UNKNOWN SUBSTRATE + UNKNOWN SUBSTRATE + UNKNOWN SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_UnknownProcessor must match UnkProcessor_info +*/ + +Native_Event_LabelDescription_t UnkProcessor_info[] = +{ + { "branches", "Measure the number of branches retired." }, + { "branch-mispredicts", "Measure the number of retired branches that were mispredicted." }, + /* { "cycles", "Measure processor cycles." }, */ + { "dc-misses", "Measure the number of data cache misses." }, + { "ic-misses", "Measure the number of instruction cache misses." }, + { "instructions", "Measure the number of instructions retired." }, + { "interrupts", "Measure the number of interrupts seen." }, + { "unhalted-cycles", "Measure the number of cycles the processor is not in a halted or sleep state." }, + { NULL, NULL } +}; diff --git a/src/freebsd/map-unknown.h b/src/freebsd/map-unknown.h new file mode 100644 index 0000000..32885f4 --- /dev/null +++ b/src/freebsd/map-unknown.h @@ -0,0 +1,31 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-unknown.h +* CVS: $Id$ +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef FreeBSD_MAP_UNKNOWN +#define FreeBSD_MAP_UNKNOWN + +enum NativeEvent_Value_UnknownProcessor { + PNE_UNK_BRANCHES = PAPI_NATIVE_MASK, + PNE_UNK_BRANCH_MISPREDICTS, + /* PNE_UNK_CYCLES, -- libpmc only supports cycles in system wide mode and this + requires root privileges */ + PNE_UNK_DC_MISSES, + PNE_UNK_IC_MISSES, + PNE_UNK_INSTRUCTIONS, + PNE_UNK_INTERRUPTS, + PNE_UNK_UNHALTED_CYCLES, + PNE_UNK_NATNAME_GUARD +}; + +extern Native_Event_LabelDescription_t UnkProcessor_info[]; +extern hwi_search_t UnkProcessor_map[]; + +#endif diff --git a/src/freebsd/map-westmere.c b/src/freebsd/map-westmere.c new file mode 100644 index 0000000..fe09111 --- /dev/null +++ b/src/freebsd/map-westmere.c @@ -0,0 +1,524 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-westmere.c +* Author: George Neville-Neil +* gnn@freebsd.org +* Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + + + /**************************************************************************** + Westmere SUBSTRATE + Westmere SUBSTRATE + Westmere SUBSTRATE + Westmere SUBSTRATE + Westmere SUBSTRATE +****************************************************************************/ + +/* + NativeEvent_Value_Westmere must match Westmere_info +*/ + +Native_Event_LabelDescription_t WestmereProcessor_info[] = +{ + {"LOAD_BLOCK.OVERLAP_STORE", "Loads that partially overlap an earlier store"}, + {"SB_DRAIN.ANY", "All Store buffer stall cycles"}, + {"MISALIGN_MEMORY.STORE", "All store referenced with misaligned address"}, + {"STORE_BLOCKS.AT_RET", "Counts number of loads delayed with at-Retirement block code. The following loads need to be executed at retirement and wait for all senior stores on the same thread to be drained: load splitting across 4K boundary (page split), load accessing uncacheable (UC or USWC) memory, load lock, and load with page table in UC or USWC memory region."}, + {"STORE_BLOCKS.L1D_BLOCK", "Cacheable loads delayed with L1D block code"}, + {"PARTIAL_ADDRESS_ALIAS", "Counts false dependency due to partial address aliasing"}, + {"DTLB_LOAD_MISSES.ANY", "Counts all load misses that cause a page walk"}, + {"DTLB_LOAD_MISSES.WALK_COMPLETED", "Counts number of completed page walks due to load miss in the STLB."}, + {"DTLB_LOAD_MISSES.WALK_CYCLES", "Cycles PMH is busy with a page walk due to a load miss in the STLB."}, + {"DTLB_LOAD_MISSES.STLB_HIT", "Number of cache load STLB hits"}, + {"DTLB_LOAD_MISSES.PDE_MISS", "Number of DTLB cache load misses where the low part of the linear tophysical address translation was missed."}, + {"MEM_INST_RETIRED.LOADS", "Counts the number of instructions with an architecturally-visible store retired on the architected path. In conjunction with ld_lat facility"}, + {"MEM_INST_RETIRED.STORES", "Counts the number of instructions with an architecturally-visible store retired on the architected path. In conjunction with ld_lat facility"}, + {"MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD", "Counts the number of instructions exceeding the latency specified with ld_lat facility. In conjunction with ld_lat facility"}, + {"MEM_STORE_RETIRED.DTLB_MISS", "The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not counter prefetches. Counts both primary and secondary misses to the TLB"}, + {"UOPS_ISSUED.ANY", "Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, + {"UOPS_ISSUED.STALLED_CYCLES", "Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end."}, + {"UOPS_ISSUED.FUSED", "Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station."}, + {"MEM_UNCORE_RETIRED.LOCAL_HITM", "Load instructions retired that HIT modified data in sibling core (Precise Event)"}, + {"MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT", "Load instructions retired local dram and remote cache HIT data sources (Precise Event)"}, + {"MEM_UNCORE_RETIRED.LOCAL_DRAM", "Load instructions retired with a data source of local DRAM or locally homed remote cache HITM (Precise Event)"}, + {"MEM_UNCORE_RETIRED.REMOTE_DRAM", "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"}, + {"MEM_UNCORE_RETIRED.UNCACHEABLE", "Load instructions retired I/O (Precise Event)"}, + {"FP_COMP_OPS_EXE.X87", "Counts the number of FP Computational Uops Executed. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a separate FADD instruction."}, + {"FP_COMP_OPS_EXE.MMX", "Counts number of MMX Uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP", "Counts number of SSE and SSE2 FP uops executed."}, + {"FP_COMP_OPS_EXE.SSE2_INTEGER", "Counts number of SSE2 integer uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP_PACKED", "Counts number of SSE FP packed uops executed."}, + {"FP_COMP_OPS_EXE.SSE_FP_SCALAR", "Counts number of SSE FP scalar uops executed."}, + {"FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION", "Counts number of SSE* FP single precision uops executed."}, + {"FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION", "Counts number of SSE* FP double precision uops executed."}, + {"SIMD_INT_128.PACKED_MPY", "Counts number of 128 bit SIMD integer multiply operations."}, + {"SIMD_INT_128.PACKED_SHIFT", "Counts number of 128 bit SIMD integer shift operations."}, + {"SIMD_INT_128.PACK", "Counts number of 128 bit SIMD integer pack operations."}, + {"SIMD_INT_128.UNPACK", "Counts number of 128 bit SIMD integer unpack operations."}, + {"SIMD_INT_128.PACKED_LOGICAL", "Counts number of 128 bit SIMD integer logical operations."}, + {"SIMD_INT_128.PACKED_ARITH", "Counts number of 128 bit SIMD integer arithmetic operations."}, + {"SIMD_INT_128.SHUFFLE_MOVE", "Counts number of 128 bit SIMD integer shuffle and move operations."}, + {"LOAD_DISPATCH.RS", "Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer."}, + {"LOAD_DISPATCH.RS_DELAYED", "Counts the number of delayed RS dispatches at the stage latch. If an RS dispatch can not bypass to LB, it has another chance to dispatch from the one-cycle delayed staging latch before it is written into the LB."}, + {"LOAD_DISPATCH.MOB", "Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer."}, + {"LOAD_DISPATCH.ANY", "Counts all loads dispatched from the Reservation Station."}, + {"ARITH.CYCLES_DIV_BUSY", "Counts the number of cycles the divider is busy executing divide or square root operations. The divide can be integer, X87 or Streaming SIMD Extensions (SSE). The square root operation can be either X87 or SSE. Set 'edge =1, invert=1, cmask=1' to count the number of divides. Count may be incorrect When SMT is on."}, + {"ARITH.MUL", "Counts the number of multiply operations executed. This includes integer as well as floating point multiply operations but excludes DPPS mul and MPSAD. Count may be incorrect When SMT is on."}, + {"INST_QUEUE_WRITES", "Counts the number of instructions written into the instruction queue every cycle."}, + {"INST_DECODED.DEC0", "Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop"}, + {"TWO_UOP_INSTS_DECODED", "An instruction that generates two uops was decoded"}, + {"INST_QUEUE_WRITE_CYCLES", "This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. If SSE* instructions that are 6 bytes or longer arrive one after another, then front end throughput may limit execution speed. "}, + {"LSD_OVERFLOW", "Number of loops that can not stream from the instruction queue."}, + {"L2_RQSTS.LD_HIT", "Counts number of loads that hit the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches. L2 loads can be rejected for various reasons. Only non rejected loads are counted."}, + {"L2_RQSTS.LD_MISS", "Counts the number of loads that miss the L2 cache. L2 loads include both L1D demand misses as well as L1D prefetches."}, + {"L2_RQSTS.LOADS", "Counts all L2 load requests. L2 loads include both L1D demand misses as well as L1D prefetches."}, + {"L2_RQSTS.RFO_HIT", "Counts the number of store RFO requests that hit the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Count includes WC memory requests, where the data is not fetched but the permission to write the line is required."}, + {"L2_RQSTS.RFO_MISS", "Counts the number of store RFO requests that miss the L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, + {"L2_RQSTS.RFOS", "Counts all L2 store RFO requests. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches."}, + {"L2_RQSTS.IFETCH_HIT", "Counts number of instruction fetches that hit the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.IFETCH_MISS", "Counts number of instruction fetches that miss the L2 cache. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.IFETCHES", "Counts all instruction fetches. L2 instruction fetches include both L1I demand misses as well as L1I instruction prefetches."}, + {"L2_RQSTS.PREFETCH_HIT", "Counts L2 prefetch hits for both code and data."}, + {"L2_RQSTS.PREFETCH_MISS", "Counts L2 prefetch misses for both code and data."}, + {"L2_RQSTS.PREFETCHES", "Counts all L2 prefetches for both code and data."}, + {"L2_RQSTS.MISS", "Counts all L2 misses for both code and data."}, + {"L2_RQSTS.REFERENCES", "Counts all L2 requests for both code and data."}, + {"L2_DATA_RQSTS.DEMAND.I_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.S_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.E_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.M_STATE", "Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.DEMAND.MESI", "Counts all L2 data demand requests. L2 demand loads are both L1D demand misses and L1D prefetches."}, + {"L2_DATA_RQSTS.PREFETCH.I_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, + {"L2_DATA_RQSTS.PREFETCH.S_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state. A prefetch RFO will miss on an S state line, while a prefetch read will hit on an S state line."}, + {"L2_DATA_RQSTS.PREFETCH.E_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state."}, + {"L2_DATA_RQSTS.PREFETCH.M_STATE", "Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state."}, + {"L2_DATA_RQSTS.PREFETCH.MESI", "Counts all L2 prefetch requests."}, + {"L2_DATA_RQSTS.ANY", "Counts all L2 data requests."}, + {"L2_WRITE.RFO.I_STATE", "Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.S_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.M_STATE", "Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.RFO.HIT", "Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states. The L1D prefetcher does not issue a RFO prefetch."}, + {"L2_WRITE.RFO.MESI", "Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO prefetch. This is a demand RFO request."}, + {"L2_WRITE.LOCK.I_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i.e. a cache miss."}, + {"L2_WRITE.LOCK.S_STATE", "Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state."}, + {"L2_WRITE.LOCK.E_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state."}, + {"L2_WRITE.LOCK.M_STATE", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state."}, + {"L2_WRITE.LOCK.HIT", "Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state."}, + {"L2_WRITE.LOCK.MESI", "Counts all L2 demand lock RFO requests."}, + {"L1D_WB_L2.I_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i.e. a cache miss."}, + {"L1D_WB_L2.S_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state."}, + {"L1D_WB_L2.E_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state."}, + {"L1D_WB_L2.M_STATE", "Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state."}, + {"L1D_WB_L2.MESI", "Counts all L1 writebacks to the L2."}, + {"L3_LAT_CACHE.REFERENCE", "Counts uncore Last Level Cache references. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended. See Table A-1."}, + {"L3_LAT_CACHE.MISS", "Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes and other implementation-specific characteristics; value comparison to estimate performance differences is not recommended. See Table A-1."}, + {"CPU_CLK_UNHALTED.THREAD_P", "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. See Table A-1."}, + {"CPU_CLK_UNHALTED.REF_P", "Increments at the frequency of TSC when not halted. See Table A-1."}, + {"DTLB_MISSES.ANY", "Counts the number of misses in the STLB which causes a page walk."}, + {"DTLB_MISSES.WALK_COMPLETED", "Counts number of misses in the STLB which resulted in a completed page walk."}, + {"DTLB_MISSES.WALK_CYCLES", "Counts cycles of page walk due to misses in the STLB."}, + {"DTLB_MISSES.STLB_HIT", "Counts the number of DTLB first level misses that hit in the second level TLB. This event is only relevant if the core contains multiple DTLB levels."}, + {"DTLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, + {"LOAD_HIT_PRE", "Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished."}, + {"L1D_PREFETCH.REQUESTS", "Counts number of hardware prefetch requests dispatched out of the prefetch FIFO."}, + {"L1D_PREFETCH.MISS", "Counts number of hardware prefetch requests that miss the L1D. There are two prefetchers in the L1D. A streamer, which predicts lines sequentially after this one should be fetched, and the IP prefetcher that remembers access patterns for the current instruction. The streamer prefetcher stops on an L1D hit, while the IP prefetcher does not."}, + {"L1D_PREFETCH.TRIGGERS", "Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO. Some of the prefetch requests are dropped due to overwrites or competition between the IP index prefetcher and streamer prefetcher. The prefetch FIFO contains 4 entries."}, + {"EPT.WALK_CYCLES", "Counts Extended Page walk cycles."}, + {"L1D.REPL", "Counts the number of lines brought into the L1 data cache.Counter 0, 1 only."}, + {"L1D.M_REPL", "Counts the number of modified lines brought into the L1 data cache. Counter 0, 1 only."}, + {"L1D.M_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to replacement. Counter 0, 1 only."}, + {"L1D.M_SNOOP_EVICT", "Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention. Counter 0, 1 only."}, + {"L1D_CACHE_PREFETCH_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated instructions accepted into the fill buffer."}, + {"L1D_CACHE_LOCK_FB_HIT", "Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA", "Counts weighted cycles of offcore demand data read requests. Does not include L2 prefetch requests. Counter 0."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE", "Counts weighted cycles of offcore demand code read requests. Does not include L2 prefetch requests. Counter 0."}, + {"OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO", "Counts weighted cycles of offcore demand RFO requests. Does not include L2 prefetch requests. Counter 0."}, + {"OFFCORE_REQUESTS_OUTSTANDING.ANY.READ", "Counts weighted cycles of offcore read requests of any kind. Include L2 prefetch requests. Counter 0."}, + {"CACHE_LOCK_CYCLES.L1D_L2", "Cycle count during which the L1D and L2 are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such accesses."}, + {"CACHE_LOCK_CYCLES.L1D", "Counts the number of cycles that cacheline in the L1 data cache unit is locked. Counter 0, 1 only."}, + {"IO_TRANSACTIONS", "Counts the number of completed I/O transactions."}, + {"L1I.HITS", "Counts all instruction fetches that hit the L1 instruction cache."}, + {"L1I.MISSES", "Counts all instruction fetches that miss the L1I cache. This includes instruction cache misses, streaming buffer misses, victim cache misses and uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."}, + {"L1I.READS", "Counts all instruction fetches, including uncacheable fetches that bypass the L1I."}, + {"L1I.CYCLES_STALLED", "Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault."}, + {"LARGE_ITLB.HIT", "Counts number of large ITLB hits."}, + {"ITLB_MISSES.ANY", "Counts the number of misses in all levels of the ITLB which causes a page walk."}, + {"ITLB_MISSES.WALK_COMPLETED", "Counts number of misses in all levels of the ITLB which resulted in a completed page walk."}, + {"ITLB_MISSES.WALK_CYCLES", "Counts ITLB miss page walk cycles."}, + {"ITLB_MISSES.LARGE_WALK_COMPLETED", "Counts number of completed large page walks due to misses in the STLB."}, + {"ILD_STALL.LCP", "Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX.W (for EM64T) instructions which change the length of the decoded instruction."}, + {"ILD_STALL.MRU", "Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU). Most Recently Used (MRU) bypass."}, + {"ILD_STALL.IQ_FULL", "Stall cycles due to a full instruction queue."}, + {"ILD_STALL.REGEN", "Counts the number of regen stalls."}, + {"ILD_STALL.ANY", "Counts any cycles the Instruction Length Decoder is stalled."}, + {"BR_INST_EXEC.COND", "Counts the number of conditional near branch instructions executed, but not necessarily retired."}, + {"BR_INST_EXEC.DIRECT", "Counts all unconditional near branch instructions excluding calls and indirect branches."}, + {"BR_INST_EXEC.INDIRECT_NON_CALL", "Counts the number of executed indirect near branch instructions that are not calls."}, + {"BR_INST_EXEC.NON_CALLS", "Counts all non call near branch instructions executed, but not necessarily retired."}, + {"BR_INST_EXEC.RETURN_NEAR", "Counts indirect near branches that have a return mnemonic."}, + {"BR_INST_EXEC.DIRECT_NEAR_CALL", "Counts unconditional near call branch instructions, excluding non call branch, executed."}, + {"BR_INST_EXEC.INDIRECT_NEAR_CALL", "Counts indirect near calls, including both register and memory indirect, executed."}, + {"BR_INST_EXEC.NEAR_CALLS", "Counts all near call branches executed, but not necessarily retired."}, + {"BR_INST_EXEC.TAKEN", "Counts taken near branches executed, but not necessarily retired."}, + {"BR_INST_EXEC.ANY", "Counts all near executed branches (not necessarily retired). This includes only instructions and not micro-op branches. Frequent branching is not necessarily a major performance issue. However frequent branch mispredictions may be a problem."}, + {"BR_MISP_EXEC.COND", "Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired."}, + {"BR_MISP_EXEC.DIRECT", "Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)."}, + {"BR_MISP_EXEC.INDIRECT_NON_CALL", "Counts the number of executed mispredicted indirect near branch instructions that are not calls."}, + {"BR_MISP_EXEC.NON_CALLS", "Counts mispredicted non call near branches executed, but not necessarily retired."}, + {"BR_MISP_EXEC.RETURN_NEAR", "Counts mispredicted indirect branches that have a rear return mnemonic."}, + {"BR_MISP_EXEC.DIRECT_NEAR_CALL", "Counts mispredicted non-indirect near calls executed, (should always be 0)."}, + {"BR_MISP_EXEC.INDIRECT_NEAR_CALL", "Counts mispredicted indirect near calls executed, including both register and memory indirect."}, + {"BR_MISP_EXEC.NEAR_CALLS", "Counts all mispredicted near call branches executed, but not necessarily retired."}, + {"BR_MISP_EXEC.TAKEN", "Counts executed mispredicted near branches that are taken, but not necessarily retired."}, + {"BR_MISP_EXEC.ANY", "Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired."}, + {"RESOURCE_STALLS.ANY", "Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. Does not include stalls due to SuperQ (off core) queue full, too many cache misses, etc."}, + {"RESOURCE_STALLS.LOAD", "Counts the cycles of stall due to lack of load buffer for load operation."}, + {"RESOURCE_STALLS.RS_FULL", "This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle. A high count of this event indicates that there are long latency operations in the pipe (possibly load and store operations that miss the L2 cache, or instructions dependent upon instructions further down the pipeline that have yet to retire. When RS is full, new instructions can not enter the reservation station and start execution."}, + {"RESOURCE_STALLS.STORE", "This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i.e. all store buffers are used). The stall ends when a store instruction commits its data to the cache or memory."}, + {"RESOURCE_STALLS.ROB_FULL", "Counts the cycles of stall due to re- order buffer full."}, + {"RESOURCE_STALLS.FPCW", "Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word."}, + {"RESOURCE_STALLS.MXCSR", "Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename. The MXCSR provides control and status for the MMX registers."}, + {"RESOURCE_STALLS.OTHER", "Counts the number of cycles while execution was stalled due to other resource issues."}, + {"MACRO_INSTS.FUSIONS_DECODED", "Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired."}, + {"BACLEAR_FORCE_IQ", "Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediction direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, + {"LSD.UOPS", "Counts the number of micro-ops delivered by loop stream detector. Use cmask=1 and invert to count cycles."}, + {"ITLB_FLUSH", "Counts the number of ITLB flushes"}, + {"OFFCORE_REQUESTS.DEMAND.READ_DATA", "Counts number of offcore demand data read requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.DEMAND.READ_CODE", "Counts number of offcore demand code read requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.DEMAND.RFO", "Counts number of offcore demand RFO requests. Does not count L2 prefetch requests."}, + {"OFFCORE_REQUESTS.ANY.READ", "Counts number of offcore read requests. Includes L2 prefetch requests."}, + {"OFFCORE_REQUESTS.ANY.RFO", "Counts number of offcore RFO requests. Includes L2 prefetch requests."}, + {"OFFCORE_REQUESTS.L1D_WRITEBACK", "Counts number of L1D writebacks to the uncore."}, + {"OFFCORE_REQUESTS.ANY", "Counts all offcore requests."}, + {"UOPS_EXECUTED.PORT0", "Counts number of Uops executed that were issued on port 0. Port 0 handles integer arithmetic, SIMD and FP add Uops."}, + {"UOPS_EXECUTED.PORT1", "Counts number of Uops executed that were issued on port 1. Port 1 handles integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops."}, + {"UOPS_EXECUTED.PORT2_CORE", "Counts number of Uops executed that were issued on port 2. Port 2 handles the load Uops. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT3_CORE", "Counts number of Uops executed that were issued on port 3. Port 3 handles store Uops. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT4_CORE", "Counts number of Uops executed that where issued on port 4. Port 4 handles the value to be stored for the store Uops issued on port 3. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5", "Counts number of cycles there are one or more uops being executed and were issued on ports 0-4. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT5", "Counts number of Uops executed that where issued on port 5."}, + {"UOPS_EXECUTED.CORE_ACTIVE_CYCLES", "Counts number of cycles there are one or more uops being executed on any ports. This is a core count only and can not be collected per thread."}, + {"UOPS_EXECUTED.PORT015", "Counts number of Uops executed that where issued on port 0, 1, or 5. Use cmask=1, invert=1 to count stall cycles."}, + {"UOPS_EXECUTED.PORT234", "Counts number of Uops executed that where issued on port 2, 3, or 4."}, + {"OFFCORE_REQUESTS_SQ_FULL", "Counts number of cycles the SQ is full to handle off-core requests."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.DATA", "Counts weighted cycles of snoopq requests for data. Counter 0 only Use cmask=1 to count cycles not empty."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE", "Counts weighted cycles of snoopq invalidate requests. Counter 0 only Use cmask=1 to count cycles not empty."}, + {"SNOOPQ_REQUESTS_OUTSTANDING.CODE", "Counts weighted cycles of snoopq requests for code. Counter 0 only. Use cmask=1 to count cycles not empty."}, + {"SNOOPQ_REQUESTS.CODE", "Counts the number of snoop code requests."}, + {"SNOOPQ_REQUESTS.DATA", "Counts the number of snoop data requests."}, + {"SNOOPQ_REQUESTS.INVALIDATE", "Counts the number of snoop invalidate requests."}, + {"OFF_CORE_RESPONSE_0", "see Section 30.6.1.3, Off-core Response Performance Monitoring in the Processor Core. Requires programming MSR 01A6H."}, + {"SNOOP_RESPONSE.HIT", "Counts HIT snoop response sent by this thread in response to a snoop request."}, + {"SNOOP_RESPONSE.HITE", "Counts HIT E snoop response sent by this thread in response to a snoop request."}, + {"SNOOP_RESPONSE.HITM", "Counts HIT M snoop response sent by this thread in response to a snoop request."}, + {"OFF_CORE_RESPONSE_1", "See Section 30.6.1.3, Off-core Response Performance Monitoring in the Processor Core. Use MSR 01A7H."}, + {"INST_RETIRED.ANY_P", "See Table A-1 Notes: INST_RETIRED.ANY is counted by a designated fixed counter. INST_RETIRED.ANY_P is counted by a programmable counter and is an architectural performance event. Event is supported if CPUID.A.EBX[1] = 0. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions."}, + {"INST_RETIRED.X87", "Counts the number of floating point computational operations retired floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions."}, + {"INST_RETIRED.MMX", "Counts the number of retired: MMX instructions."}, + {"UOPS_RETIRED.ANY", "Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two micro-ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. Use cmask=1 and invert to count active cycles or stalled cycles"}, + {"UOPS_RETIRED.RETIRE_SLOTS", "Counts the number of retirement slots used each cycle"}, + {"UOPS_RETIRED.MACRO_FUSED", "Counts number of macro-fused uops retired."}, + {"MACHINE_CLEARS.CYCLES", "Counts the cycles machine clear is asserted."}, + {"MACHINE_CLEARS.MEM_ORDER", "Counts the number of machine clears due to memory order conflicts."}, + {"MACHINE_CLEARS.SMC", "Counts the number of times that a program writes to a code section. Self-modifying code causes a sever penalty in all Intel 64 and IA-32 processors. The modified cache line is written back to the L2 and L3caches."}, + {"BR_INST_RETIRED.ANY_P", "See Table A-1"}, + {"BR_INST_RETIRED.CONDITIONAL", "Counts the number of conditional branch instructions retired."}, + {"BR_INST_RETIRED.NEAR_CALL", "Counts the number of direct & indirect near unconditional calls retired."}, + {"BR_INST_RETIRED.ALL_BRANCHES", "Counts the number of branch instructions retired."}, + {"BR_MISP_RETIRED.ANY_P", "See Table A-1."}, + {"BR_MISP_RETIRED.CONDITIONAL", "Counts mispredicted conditional retired calls."}, + {"BR_MISP_RETIRED.NEAR_CALL", "Counts mispredicted direct & indirect near unconditional retired calls."}, + {"BR_MISP_RETIRED.ALL_BRANCHES", "Counts all mispredicted retired calls."}, + {"SSEX_UOPS_RETIRED.PACKED_SINGLE", "Counts SIMD packed single-precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.SCALAR_SINGLE", "Counts SIMD calar single-precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.PACKED_DOUBLE", "Counts SIMD packed double- precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.SCALAR_DOUBLE", "Counts SIMD scalar double-precision floating point Uops retired."}, + {"SSEX_UOPS_RETIRED.VECTOR_INTEGER", "Counts 128-bit SIMD vector integer Uops retired."}, + {"ITLB_MISS_RETIRED", "Counts the number of retired instructions that missed the ITLB when the instruction was fetched."}, + {"MEM_LOAD_RETIRED.L1D_HIT", "Counts number of retired loads that hit the L1 data cache."}, + {"MEM_LOAD_RETIRED.L2_HIT", "Counts number of retired loads that hit the L2 data cache."}, + {"MEM_LOAD_RETIRED.L3_UNSHARED_HIT", "Counts number of retired loads that hit their own, unshared lines in the L3 cache."}, + {"MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM", "Counts number of retired loads that hit in a sibling core's L2 (on die core). Since the L3 is inclusive of all cores on the package, this is an L3 hit. This counts both clean or modified hits."}, + {"MEM_LOAD_RETIRED.L3_MISS", "Counts number of retired loads that miss the L3 cache. The load was satisfied by a remote socket, local memory or an IOH."}, + {"MEM_LOAD_RETIRED.HIT_LFB", "Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache. This is counting secondary L1D misses."}, + {"MEM_LOAD_RETIRED.DTLB_MISS", "Counts the number of retired loads that missed the DTLB. The DTLB miss is not counted if the load operation causes a fault. This event counts loads from cacheable memory only. The event does not count loads by software prefetches. Counts both primary and secondary misses to the TLB."}, + {"FP_MMX_TRANS.TO_FP", "Counts the first floating-point instruction following any MMX instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"FP_MMX_TRANS.TO_MMX", "Counts the first MMX instruction following a floating-point instruction. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"FP_MMX_TRANS.ANY", "Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions. You can use this event to estimate the penalties for the transitions between floating-point and MMX technology states."}, + {"MACRO_INSTS.DECODED", "Counts the number of instructions decoded, (but not necessarily executed or retired)."}, + {"UOPS_DECODED.STALL_CYCLES", "Counts the cycles of decoder stalls."}, + {"UOPS_DECODED.MS", "Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS delivers uops when the instruction is more than 4 uops long or a microcode assist is occurring."}, + {"UOPS_DECODED.ESP_FOLDING", "Counts number of stack pointer (ESP) instructions decoded: push, pop, call, ret, etc. ESP instructions do not generate a Uop to increment or decrement ESP. Instead, they update an ESP_Offset register that keeps track of the delta to the current value of the ESP register."}, + {"UOPS_DECODED.ESP_SYNC", "Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register."}, + {"RAT_STALLS.FLAGS", "Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall. A partial register stall may occur when two conditions are met: 1) an instruction modifies some, but not all, of the flags in the flag register and 2) the next instruction, which depends on flags, depends on flags that were not modified by this instruction."}, + {"RAT_STALLS.REGISTERS", "This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction."}, + {"RAT_STALLS.ROB_READ_PORT", "Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline. Note that, at this stage in the pipeline, additional stalls may occur at the same cycle and prevent the stalled micro-ops from entering the pipe. In such a case, micro-ops retry entering the execution pipe in the next cycle and the ROB-read port stall is counted again."}, + {"RAT_STALLS.SCOREBOARD", "Counts the cycles where we stall due to microarchitecturally required serialization. Microcode scoreboarding stalls."}, + {"RAT_STALLS.ANY", "Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe. Cycles when partial register stalls occurred Cycles when flag stalls occurred Cycles floating-point unit (FPU) status word stalls occurred. To count each of these conditions separately use the events: RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and RAT_STALLS.FPSW."}, + {"SEG_RENAME_STALLS", "Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front- end of the pipeline until the renamed segment retires."}, + {"ES_REG_RENAMES", "Counts the number of times the ES segment register is renamed."}, + {"UOP_UNFUSION", "Counts unfusion events due to floating point exception to a fused uop."}, + {"BR_INST_DECODED", "Counts the number of branch instructions decoded."}, + {"BPU_MISSED_CALL_RET", "Counts number of times the Branch Prediction Unit missed predicting a call or return branch."}, + {"BACLEAR.CLEAR", "Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. This can occur if the code has many branches such that they cannot be consumed by the BPU. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline. The effect on total execution time depends on the surrounding code."}, + {"BACLEAR.BAD_TARGET", "Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline."}, + {"BPU_CLEARS.EARLY", "Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken. The BPU clear leads to 2 cycle bubble in the Front End."}, + {"BPU_CLEARS.LATE", "Counts late Branch Prediction Unit clears due to Most Recently Used conflicts. The PBU clear leads to a 3 cycle bubble in the Front End."}, + {"THREAD_ACTIVE", "Counts cycles threads are active."}, + {"L2_TRANSACTIONS.LOAD", "Counts L2 load operations due to HW prefetch or demand loads."}, + {"L2_TRANSACTIONS.RFO", "Counts L2 RFO operations due to HW prefetch or demand RFOs."}, + {"L2_TRANSACTIONS.IFETCH", "Counts L2 instruction fetch operations due to HW prefetch or demand ifetch."}, + {"L2_TRANSACTIONS.PREFETCH", "Counts L2 prefetch operations."}, + {"L2_TRANSACTIONS.L1D_WB", "Counts L1D writeback operations to the L2."}, + {"L2_TRANSACTIONS.FILL", "Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch."}, + {"L2_TRANSACTIONS.WB", "Counts L2 writeback operations to the L3."}, + {"L2_TRANSACTIONS.ANY", "Counts all L2 cache operations."}, + {"L2_LINES_IN.S_STATE", "Counts the number of cache lines allocated in the L2 cache in the S (shared) state."}, + {"L2_LINES_IN.E_STATE", "Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state."}, + {"L2_LINES_IN.ANY", "Counts the number of cache lines allocated in the L2 cache."}, + {"L2_LINES_OUT.DEMAND_CLEAN", "Counts L2 clean cache lines evicted by a demand request."}, + {"L2_LINES_OUT.DEMAND_DIRTY", "Counts L2 dirty (modified) cache lines evicted by a demand request."}, + {"L2_LINES_OUT.PREFETCH_CLEAN", "Counts L2 clean cache line evicted by a prefetch request."}, + {"L2_LINES_OUT.PREFETCH_DIRTY", "Counts L2 modified cache line evicted by a prefetch request."}, + {"L2_LINES_OUT.ANY", "Counts all L2 cache lines evicted for any reason."}, + {"SQ_MISC.LRU_HINTS", "Counts number of Super Queue LRU hints sent to L3."}, + {"SQ_MISC.SPLIT_LOCK", "Counts the number of SQ lock splits across a cache line."}, + {"SQ_FULL_STALL_CYCLES", "Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore."}, + {"FP_ASSIST.ALL", "Counts the number of floating point operations executed that required micro-code assist intervention. Assists are required in the following cases: SSE instructions, (Denormal input when the DAZ flag is off or Underflow result when the FTZ flag is off): x87 instructions, (NaN or denormal are loaded to a register or used as input from memory, Division by 0 or Underflow output)."}, + {"FP_ASSIST.OUTPUT", "Counts number of floating point micro-code assist when the output value (destination register) is invalid."}, + {"FP_ASSIST.INPUT", "Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid."}, + {"SIMD_INT_64.PACKED_MPY", "Counts number of SID integer 64 bit packed multiply operations."}, + {"SIMD_INT_64.PACKED_SHIFT", "Counts number of SID integer 64 bit packed shift operations."}, + {"SIMD_INT_64.PACK", "Counts number of SID integer 64 bit pack operations."}, + {"SIMD_INT_64.UNPACK", "Counts number of SID integer 64 bit unpack operations."}, + {"SIMD_INT_64.PACKED_LOGICAL", "Counts number of SID integer 64 bit logical operations."}, + {"SIMD_INT_64.PACKED_ARITH", "Counts number of SID integer 64 bit arithmetic operations."}, + {"SIMD_INT_64.SHUFFLE_MOVE", "Counts number of SID integer 64 bit shift or move operations."}, + {"INSTR_RETIRED_ANY", ""}, + {"CPU_CLK_UNHALTED_CORE", ""}, + {"CPU_CLK_UNHALTED_REF", ""}, + {"GQ_CYCLES_FULL.READ_TRACKER", "Uncore cycles Global Queue read tracker is full."}, + {"GQ_CYCLES_FULL.WRITE_TRACKER", "Uncore cycles Global Queue write tracker is full."}, + {"GQ_CYCLES_FULL.PEER_PROBE_TRACKER", "Uncore cycles Global Queue peer probe tracker is full. The peer probe tracker queue tracks snoops from the IOH and remote sockets."}, + {"GQ_CYCLES_NOT_EMPTY.READ_TRACKER", "Uncore cycles were Global Queue read tracker has at least one valid entry."}, + {"GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER", "Uncore cycles were Global Queue write tracker has at least one valid entry."}, + {"GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER", "Uncore cycles were Global Queue peer probe tracker has at least one valid entry. The peer probe tracker queue tracks IOH and remote socket snoops."}, + {"GQ_OCCUPANCY.READ_TRACKER", "Increments the number of queue entries (code read, data read, and RFOs) in the tread tracker. The GQ read tracker allocate to deallocate occupancy count is divided by the count to obtain the average read tracker latency."}, + {"GQ_ALLOC.READ_TRACKER", "Counts the number of tread tracker allocate to deallocate entries. The GQ read tracker allocate to deallocate occupancy count is divided by the count to obtain the average read tracker latency."}, + {"GQ_ALLOC.RT_L3_MISS", "Counts the number GQ read tracker entries for which a full cache line read has missed the L3. The GQ read tracker L3 miss to fill occupancy count is divided by this count to obtain the average cache line read L3 miss latency. The latency represents the time after which the L3 has determined that the cache line has missed. The time between a GQ read tracker allocation and the L3 determining that the cache line has missed is the average L3 hit latency. The total L3 cache line read miss latency is the hit latency + L3 misslatency."}, + {"GQ_ALLOC.RT_TO_L3_RESP", "Counts the number of GQ read tracker entries that are allocated in the read tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy count is divided by this count to obtain the average L3 hit latency."}, + {"GQ_ALLOC.RT_TO_RTID_ACQUIRED", "Counts the number of GQ read tracker entries that are allocated in the read tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ read tracker L3 miss to RTID acquired occupancy count is divided by this count to obtain the average latency for a read L3 miss to acquire an RTID."}, + {"GQ_ALLOC.WT_TO_RTID_ACQUIRED", "Counts the number of GQ write tracker entries that are allocated in the write tracker, have missed in the L3 and have not acquired a Request Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is divided by this count to obtain the average latency for a write L3 miss to acquire an RTID."}, + {"GQ_ALLOC.WRITE_TRACKER", "Counts the number of GQ write tracker entries that are allocated in the write tracker queue that miss the L3. The GQ write tracker occupancy count is divided by the this count to obtain the average L3 write miss latency."}, + {"GQ_ALLOC.PEER_PROBE_TRACKER", "Counts the number of GQ peer probe tracker (snoop) entries that are allocated in the peer probe tracker queue that miss the L3. The GQ peer probe occupancy count is divided by this count to obtain the average L3 peer probe miss latency."}, + {"GQ_DATA.FROM_QPI", "Cycles Global Queue Quickpath Interface input data port is busy importing data from the Quickpath Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, + {"GQ_DATA.FROM_QMC", "Cycles Global Queue Quickpath Memory Interface input data port is busy importing data from the Quickpath Memory Interface. Each cycle the input port can transfer 8 or 16 bytes of data."}, + {"GQ_DATA.FROM_L3", "Cycles GQ L3 input data port is busy importing data from the Last Level Cache. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.FROM_CORES_02", "Cycles GQ Core 0 and 2 input data port is busy importing data from processor cores 0 and 2. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.FROM_CORES_13", "Cycles GQ Core 1 and 3 input data port is busy importing data from processor cores 1 and 3. Each cycle the input port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_QPI_QMC", "Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath Interface or Quickpath Memory Interface. Each cycle the output port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_L3", "Cycles GQ L3 output data port is busy sending data to the Last Level Cache. Each cycle the output port can transfer 32 bytes of data."}, + {"GQ_DATA.TO_CORES", "Cycles GQ Core output data port is busy sending data to the Cores. Each cycle the output port can transfer 32 bytes of data."}, + {"SNP_RESP_TO_LOCAL_HOME.I_STATE", "Number of snoop responses to the local home that L3 does not have the referenced cache line."}, + {"SNP_RESP_TO_LOCAL_HOME.S_STATE", "Number of snoop responses to the local home that L3 has the referenced line cached in the S state."}, + {"SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to the local home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the local home in the S state."}, + {"SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to the local home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the local home in the M state."}, + {"SNP_RESP_TO_LOCAL_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, + {"SNP_RESP_TO_LOCAL_HOME.WB", "Number of responses to code or data read snoops to the local home that the L3 has the referenced line cached in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.I_STATE", "Number of snoop responses to a remote home that L3 does not have the referenced cache line."}, + {"SNP_RESP_TO_REMOTE_HOME.S_STATE", "Number of snoop responses to a remote home that L3 has the referenced line cached in the S state."}, + {"SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced cache line in the E state. The L3 cache line state is changed to the S state and the line is forwarded to the remote home in the S state."}, + {"SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE", "Number of responses to read invalidate snoops to a remote home that the L3 has the referenced cache line in the M state. The L3 cache line state is invalidated and the line is forwarded to the remote home in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.CONFLICT", "Number of conflict snoop responses sent to the local home."}, + {"SNP_RESP_TO_REMOTE_HOME.WB", "Number of responses to code or data read snoops to a remote home that the L3 has the referenced line cached in the M state."}, + {"SNP_RESP_TO_REMOTE_HOME.HITM", "Number of HITM snoop responses to a remote home."}, + {"L3_HITS.READ", "Number of code read, data read and RFO requests that hit in the L3."}, + {"L3_HITS.WRITE", "Number of writeback requests that hit in the L3. Writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, + {"L3_HITS.PROBE", "Number of snoops from IOH or remote sockets that hit in the L3."}, + {"L3_HITS.ANY", "Number of reads and writes that hit the L3."}, + {"L3_MISS.READ", "Number of code read, data read and RFO requests that miss the L3."}, + {"L3_MISS.WRITE", "Number of writeback requests that miss the L3. Should always be zero as writebacks from the cores will always result in L3 hits due to the inclusive property of the L3."}, + {"L3_MISS.PROBE", "Number of snoops from IOH or remote sockets that miss the L3."}, + {"L3_MISS.ANY", "Number of reads and writes that miss the L3."}, + {"L3_LINES_IN.M_STATE", "Counts the number of L3 lines allocated in M state. The only time a cache line is allocated in the M state is when the line was forwarded in M state is forwarded due to a Snoop Read Invalidate Own request."}, + {"L3_LINES_IN.E_STATE", "Counts the number of L3 lines allocated in E state."}, + {"L3_LINES_IN.S_STATE", "Counts the number of L3 lines allocated in S state."}, + {"L3_LINES_IN.F_STATE", "Counts the number of L3 lines allocated in F state."}, + {"L3_LINES_IN.ANY", "Counts the number of L3 lines allocated in any state."}, + {"L3_LINES_OUT.M_STATE", "Counts the number of L3 lines victimized that were in the M state. When the victim cache line is in M state, the line is written to its home cache agent which can be either local or remote."}, + {"L3_LINES_OUT.E_STATE", "Counts the number of L3 lines victimized that were in the E state."}, + {"L3_LINES_OUT.S_STATE", "Counts the number of L3 lines victimized that were in the S state."}, + {"L3_LINES_OUT.I_STATE", "Counts the number of L3 lines victimized that were in the I state."}, + {"L3_LINES_OUT.F_STATE", "Counts the number of L3 lines victimized that were in the F state."}, + {"L3_LINES_OUT.ANY", "Counts the number of L3 lines victimized in any state."}, + {"GQ_SNOOP.GOTO_S", "Counts the number of remote snoops that have requested a cache line be set to the S state."}, + {"GQ_SNOOP.GOTO_I", "Counts the number of remote snoops that have requested a cache line be set to the I state."}, + {"GQ_SNOOP.GOTO_S_HIT", "Counts the number of remote snoops that have requested a cache line be set to the S state from E state. Requires writing MSR 301H with mask = 2H"}, + {"GQ_SNOOP.GOTO_I_HIT", "Counts the number of remote snoops that have requested a cache line be set to the S state from F (forward) state. Requires writing MSR 301H with mask = 8H"}, + {"QHL_REQUESTS.IOH_READS", "Counts number of Quickpath Home Logic read requests from the IOH."}, + {"QHL_REQUESTS.IOH_WRITES", "Counts number of Quickpath Home Logic write requests from the IOH."}, + {"QHL_REQUESTS.REMOTE_READS", "Counts number of Quickpath Home Logic read requests from a remote socket."}, + {"QHL_REQUESTS.REMOTE_WRITES", "Counts number of Quickpath Home Logic write requests from a remote socket."}, + {"QHL_REQUESTS.LOCAL_READS", "Counts number of Quickpath Home Logic read requests from the local socket."}, + {"QHL_REQUESTS.LOCAL_WRITES", "Counts number of Quickpath Home Logic write requests from the local socket."}, + {"QHL_CYCLES_FULL.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH are full."}, + {"QHL_CYCLES_FULL.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker are full."}, + {"QHL_CYCLES_FULL.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker are full."}, + {"QHL_CYCLES_NOT_EMPTY.IOH", "Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy."}, + {"QHL_CYCLES_NOT_EMPTY.REMOTE", "Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is busy."}, + {"QHL_CYCLES_NOT_EMPTY.LOCAL", "Counts uclk cycles all entries in the Quickpath Home Logic local tracker is busy."}, + {"QHL_OCCUPANCY.IOH", "QHL IOH tracker allocate to deallocate read occupancy."}, + {"QHL_OCCUPANCY.REMOTE", "QHL remote tracker allocate to deallocate read occupancy."}, + {"QHL_OCCUPANCY.LOCAL", "QHL local tracker allocate to deallocate read occupancy."}, + {"QHL_ADDRESS_CONFLICTS.2WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 2 conflicts. The AAT is a structure that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, + {"QHL_ADDRESS_CONFLICTS.3WAY", "Counts number of QHL Active Address Table (AAT) entries that saw a max of 3 conflicts. The AAT is a structure that tracks requests that are in conflict. The requests themselves are in the home tracker entries. The count is reported when an AAT entry deallocates."}, + {"QHL_CONFLICT_CYCLES.IOH", "Counts cycles the Quickpath Home Logic IOH Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, + {"QHL_CONFLICT_CYCLES.REMOTE", "Counts cycles the Quickpath Home Logic Remote Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, + {"QHL_CONFLICT_CYCLES.LOCAL", "Counts cycles the Quickpath Home Logic Local Tracker contains two or more requests with an address conflict. A max of 3 requests can be in conflict."}, + {"QHL_TO_QMC_BYPASS", "Counts number or requests to the Quickpath Memory Controller that bypass the Quickpath Home Logic. All local accesses can be bypassed. For remote requests, only read requests can be bypassed."}, + {"QMC_ISOC_FULL.READ.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.READ.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.READ.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous read requests."}, + {"QMC_ISOC_FULL.WRITE.CH0", "Counts cycles all the entries in the DRAM channel 0 high priority queue are occupied with isochronous write requests."}, + {"QMC_ISOC_FULL.WRITE.CH1", "Counts cycles all the entries in the DRAM channel 1 high priority queue are occupied with isochronous write requests."}, + {"QMC_ISOC_FULL.WRITE.CH2", "Counts cycles all the entries in the DRAM channel 2 high priority queue are occupied with isochronous write requests."}, + {"QMC_BUSY.READ.CH0", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 0."}, + {"QMC_BUSY.READ.CH1", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 1."}, + {"QMC_BUSY.READ.CH2", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding read request to DRAM channel 2."}, + {"QMC_BUSY.WRITE.CH0", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 0."}, + {"QMC_BUSY.WRITE.CH1", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 1."}, + {"QMC_BUSY.WRITE.CH2", "Counts cycles where Quickpath Memory Controller has at least 1 outstanding write request to DRAM channel 2."}, + {"QMC_OCCUPANCY.CH0", "IMC channel 0 normal read request occupancy."}, + {"QMC_OCCUPANCY.CH1", "IMC channel 1 normal read request occupancy."}, + {"QMC_OCCUPANCY.CH2", "IMC channel 2 normal read request occupancy."}, + {"QMC_OCCUPANCY.ANY", "Normal read request occupancy for any channel."}, + {"QMC_ISSOC_OCCUPANCY.CH0", "IMC channel 0 issoc read request occupancy."}, + {"QMC_ISSOC_OCCUPANCY.CH1", "IMC channel 1 issoc read request occupancy."}, + {"QMC_ISSOC_OCCUPANCY.CH2", "IMC channel 2 issoc read request occupancy."}, + {"QMC_ISSOC_READS.ANY", "IMC issoc read request occupancy."}, + {"QMC_NORMAL_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 medium and low priority read requests. The QMC channel 0 normal read occupancy divided by this count provides the average QMC channel 0 read latency."}, + {"QMC_NORMAL_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 medium and low priority read requests. The QMC channel 1 normal read occupancy divided by this count provides the average QMC channel 1 read latency."}, + {"QMC_NORMAL_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 medium and low priority read requests. The QMC channel 2 normal read occupancy divided by this count provides the average QMC channel 2 read latency."}, + {"QMC_NORMAL_READS.ANY", "Counts the number of Quickpath Memory Controller medium and low priority read requests. The QMC normal read occupancy divided by this count provides the average QMC read latency."}, + {"QMC_HIGH_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 high priority isochronous read requests."}, + {"QMC_HIGH_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Controller high priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH0", "Counts the number of Quickpath Memory Controller channel 0 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH1", "Counts the number of Quickpath Memory Controller channel 1 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.CH2", "Counts the number of Quickpath Memory Controller channel 2 critical priority isochronous read requests."}, + {"QMC_CRITICAL_PRIORITY_READS.ANY", "Counts the number of Quickpath Memory Controller critical priority isochronous read requests."}, + {"QMC_WRITES.FULL.CH0", "Counts number of full cache line writes to DRAM channel 0."}, + {"QMC_WRITES.FULL.CH1", "Counts number of full cache line writes to DRAM channel 1."}, + {"QMC_WRITES.FULL.CH2", "Counts number of full cache line writes to DRAM channel 2."}, + {"QMC_WRITES.FULL.ANY", "Counts number of full cache line writes to DRAM."}, + {"QMC_WRITES.PARTIAL.CH0", "Counts number of partial cache line writes to DRAM channel 0."}, + {"QMC_WRITES.PARTIAL.CH1", "Counts number of partial cache line writes to DRAM channel 1."}, + {"QMC_WRITES.PARTIAL.CH2", "Counts number of partial cache line writes to DRAM channel 2."}, + {"QMC_WRITES.PARTIAL.ANY", "Counts number of partial cache line writes to DRAM."}, + {"QMC_CANCEL.CH0", "Counts number of DRAM channel 0 cancel requests."}, + {"QMC_CANCEL.CH1", "Counts number of DRAM channel 1 cancel requests."}, + {"QMC_CANCEL.CH2", "Counts number of DRAM channel 2 cancel requests."}, + {"QMC_CANCEL.ANY", "Counts number of DRAM cancel requests."}, + {"QMC_PRIORITY_UPDATES.CH0", "Counts number of DRAM channel 0 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.CH1", "Counts number of DRAM channel 1 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.CH2", "Counts number of DRAM channel 2 priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"QMC_PRIORITY_UPDATES.ANY", "Counts number of DRAM priority updates. A priority update occurs when an ISOC high or critical request is received by the QHL and there is a matching request with normal priority that has already been issued to the QMC. In this instance, the QHL will send a priority update to QMC to expedite the request."}, + {"IMC_RETRY.CH0", "Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when configured in RAS mode."}, + {"IMC_RETRY.CH1", "Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when configured in RAS mode."}, + {"IMC_RETRY.CH2", "Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when configured in RAS mode."}, + {"IMC_RETRY.ANY", "Counts number of IMC DRAM retries from any channel. DRAM retry only occurs when configured in RAS mode."}, + {"QHL_FRC_ACK_CNFLTS.IOH", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the IOH."}, + {"QHL_FRC_ACK_CNFLTS.REMOTE", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the remote home."}, + {"QHL_FRC_ACK_CNFLTS.LOCAL", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic to the local home."}, + {"QHL_FRC_ACK_CNFLTS.ANY", "Counts number of Force Acknowledge Conflict messages sent by the Quickpath Home Logic."}, + {"QHL_SLEEPS.IOH_ORDER", "Counts number of occurrences a request was put to sleep due to IOH ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"QHL_SLEEPS.REMOTE_ORDER", "Counts number of occurrences a request was put to sleep due to remote socket ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"QHL_SLEEPS.LOCAL_ORDER", "Counts number of occurrences a request was put to sleep due to local socket ordering (write after read) conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"QHL_SLEEPS.IOH_CONFLICT", "Counts number of occurrences a request was put to sleep due to IOH address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"QHL_SLEEPS.REMOTE_CONFLICT", "Counts number of occurrences a request was put to sleep due to remote socket address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"QHL_SLEEPS.LOCAL_CONFLICT", "Counts number of occurrences a request was put to sleep due to local socket address conflicts. While in the sleep state, the request is not eligible to be scheduled to the QMC."}, + {"ADDR_OPCODE_MATCH.IOH", "Counts number of requests from the IOH, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, + {"ADDR_OPCODE_MATCH.REMOTE", "Counts number of requests from the remote socket, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, + {"ADDR_OPCODE_MATCH.LOCAL", "Counts number of requests from the local socket, address/opcode of request is qualified by mask value written to MSR 396H. The following mask values are supported: 0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS 40001D00_00000000H:RSPIWB Match opcode/address by writing MSR 396H with mask supported mask value."}, + {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0", "Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0", "Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0", "Counts cycles the Quickpath outbound link 0 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1", "Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1", "Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1", "Counts cycles the Quickpath outbound link 1 non-data response virtual channel is stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_SINGLE_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of a VNA and VN0 credit. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0", "Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0", "Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1", "Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1", "Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual channel is stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.LINK_0", "Counts cycles the Quickpath outbound link 0 virtual channels are stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_STALLED_MULTI_FLIT.LINK_1", "Counts cycles the Quickpath outbound link 1 virtual channels are stalled due to lack of VNA and VN0 credits. Note that this event does not filter out when a flit would not have been selected for arbitration because another virtual channel is getting arbitrated."}, + {"QPI_TX_HEADER.FULL.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is full."}, + {"QPI_TX_HEADER.BUSY.LINK_0", "Number of cycles that the header buffer in the Quickpath Interface outbound link 0 is busy."}, + {"QPI_TX_HEADER.FULL.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is full."}, + {"QPI_TX_HEADER.BUSY.LINK_1", "Number of cycles that the header buffer in the Quickpath Interface outbound link 1 is busy."}, + {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0", "Number of cycles that snoop packets incoming to the Quickpath Interface link0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, + {"QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1", "Number of cycles that snoop packets incoming to the Quickpath Interface link 1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT) does not have any available entries."}, + {"DRAM_OPEN.CH0", "Counts number of DRAM Channel 0 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_OPEN.CH1", "Counts number of DRAM Channel 1 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_OPEN.CH2", "Counts number of DRAM Channel 2 open commands issued either for read or write. To read or write data, the referenced DRAM page must first be opened."}, + {"DRAM_PAGE_CLOSE.CH0", "DRAM channel 0 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_CLOSE.CH1", "DRAM channel 1 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_CLOSE.CH2", "DRAM channel 2 command issued to CLOSE a page due to page idle timer expiration. Closing a page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH0", "Counts the number of precharges (PRE) that were issued to DRAM channel 0 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH1", "Counts the number of precharges (PRE) that were issued to DRAM channel 1 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_PAGE_MISS.CH2", "Counts the number of precharges (PRE) that were issued to DRAM channel 2 because there was a page miss. A page miss refers to a situation in which a page is currently open and another page from the same bank needs to be opened. The new page experiences a page miss. Closing of the old page is done by issuing a precharge."}, + {"DRAM_READ_CAS.CH0", "Counts the number of times a read CAS command was issued on DRAM channel 0."}, + {"DRAM_READ_CAS.AUTOPRE_CH0", "Counts the number of times a read CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_READ_CAS.CH1", "Counts the number of times a read CAS command was issued on DRAM channel 1."}, + {"DRAM_READ_CAS.AUTOPRE_CH1", "Counts the number of times a read CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_READ_CAS.CH2", "Counts the number of times a read CAS command was issued on DRAM channel 2."}, + {"DRAM_READ_CAS.AUTOPRE_CH2", "Counts the number of times a read CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH0", "Counts the number of times a write CAS command was issued on DRAM channel 0 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH1", "Counts the number of times a write CAS command was issued on DRAM channel 1 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_WRITE_CAS.CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2."}, + {"DRAM_WRITE_CAS.AUTOPRE_CH2", "Counts the number of times a write CAS command was issued on DRAM channel 2 where the command issued used the auto-precharge (auto page close) mode."}, + {"DRAM_REFRESH.CH0", "Counts number of DRAM channel 0 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_REFRESH.CH1", "Counts number of DRAM channel 1 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_REFRESH.CH2", "Counts number of DRAM channel 2 refresh commands. DRAM loses data content over time. In order to keep correct data content, the data values have to be refreshed periodically."}, + {"DRAM_PRE_ALL.CH0", "Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + {"DRAM_PRE_ALL.CH1", "Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + {"DRAM_PRE_ALL.CH2", "Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close all open pages in a rank. PREALL is issued when the DRAM needs to be refreshed or needs to go into a power down mode."}, + {"DRAM_THERMAL_THROTTLED", "Uncore cycles DRAM was throttled due to its temperature being above the thermal throttling threshold."}, + {"THERMAL_THROTTLING_TEMP.CORE_0", "Cycles that the PCU records that core 0 is above the thermal throttling threshold temperature."}, + {"THERMAL_THROTTLING_TEMP.CORE_1", "Cycles that the PCU records that core 1 is above the thermal throttling threshold temperature."}, + {"THERMAL_THROTTLING_TEMP.CORE_2", "Cycles that the PCU records that core 2 is above the thermal throttling threshold temperature."}, + {"THERMAL_THROTTLING_TEMP.CORE_3", "Cycles that the PCU records that core 3 is above the thermal throttling threshold temperature."}, + {"THERMAL_THROTTLED_TEMP.CORE_0", "Cycles that the PCU records that core 0 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, + {"THERMAL_THROTTLED_TEMP.CORE_1", "Cycles that the PCU records that core 1 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, + {"THERMAL_THROTTLED_TEMP.CORE_2", "Cycles that the PCU records that core 2 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, + {"THERMAL_THROTTLED_TEMP.CORE_3", "Cycles that the PCU records that core 3 is in the power throttled state due to cores temperature being above the thermal throttling threshold."}, + {"PROCHOT_ASSERTION", "Number of system assertions of PROCHOT indicating the entire processor has exceeded the thermal limit."}, + {"THERMAL_THROTTLING_PROCHOT.CORE_0", "Cycles that the PCU records that core 0 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, + {"THERMAL_THROTTLING_PROCHOT.CORE_1", "Cycles that the PCU records that core 1 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, + {"THERMAL_THROTTLING_PROCHOT.CORE_2", "Cycles that the PCU records that core 2 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, + {"THERMAL_THROTTLING_PROCHOT.CORE_3", "Cycles that the PCU records that core 3 is a low power state due to the system asserting PROCHOT the entire processor has exceeded the thermal limit."}, + {"TURBO_MODE.CORE_0", "Uncore cycles that core 0 is operating in turbo mode."}, + {"TURBO_MODE.CORE_1", "Uncore cycles that core 1 is operating in turbo mode."}, + {"TURBO_MODE.CORE_2", "Uncore cycles that core 2 is operating in turbo mode."}, + {"TURBO_MODE.CORE_3", "Uncore cycles that core 3 is operating in turbo mode."}, + {"CYCLES_UNHALTED_L3_FLL_ENABLE", "Uncore cycles that at least one core is unhalted and all L3 ways are enabled."}, + {"CYCLES_UNHALTED_L3_FLL_DISABLE", "Uncore cycles that at least one core is unhalted and all L3 ways are disabled."}, + { NULL, NULL } +}; + diff --git a/src/freebsd/map-westmere.h b/src/freebsd/map-westmere.h new file mode 100644 index 0000000..720fd3a --- /dev/null +++ b/src/freebsd/map-westmere.h @@ -0,0 +1,511 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: map-westmere.h +* Author: George Neville-Neil +* gnn@freebsd.org +*/ + +#ifndef FreeBSD_MAP_WESTMERE +#define FreeBSD_MAP_WESTMERE + +enum NativeEvent_Value_WestmereProcessor { + PNE_WESTMERE_LOAD_BLOCK_OVERLAP_STORE= PAPI_NATIVE_MASK , + PNE_WESTMERE_SB_DRAIN_ANY, + PNE_WESTMERE_MISALIGN_MEMORY_STORE, + PNE_WESTMERE_STORE_BLOCKS_AT_RET, + PNE_WESTMERE_STORE_BLOCKS_L1D_BLOCK, + PNE_WESTMERE_PARTIAL_ADDRESS_ALIAS, + PNE_WESTMERE_DTLB_LOAD_MISSES_ANY, + PNE_WESTMERE_DTLB_LOAD_MISSES_WALK_COMPLETED, + PNE_WESTMERE_DTLB_LOAD_MISSES_WALK_CYCLES, + PNE_WESTMERE_DTLB_LOAD_MISSES_STLB_HIT, + PNE_WESTMERE_DTLB_LOAD_MISSES_PDE_MISS, + PNE_WESTMERE_MEM_INST_RETIRED_LOADS, + PNE_WESTMERE_MEM_INST_RETIRED_STORES, + PNE_WESTMERE_MEM_INST_RETIRED_LATENCY_ABOVE_THRESHOLD, + PNE_WESTMERE_MEM_STORE_RETIRED_DTLB_MISS, + PNE_WESTMERE_UOPS_ISSUED_ANY, + PNE_WESTMERE_UOPS_ISSUED_STALLED_CYCLES, + PNE_WESTMERE_UOPS_ISSUED_FUSED, + PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_HITM, + PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_DRAM_AND_REMOTE_CACHE_HIT, + PNE_WESTMERE_MEM_UNCORE_RETIRED_LOCAL_DRAM, + PNE_WESTMERE_MEM_UNCORE_RETIRED_REMOTE_DRAM, + PNE_WESTMERE_MEM_UNCORE_RETIRED_UNCACHEABLE, + PNE_WESTMERE_FP_COMP_OPS_EXE_X87, + PNE_WESTMERE_FP_COMP_OPS_EXE_MMX, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE2_INTEGER, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP_PACKED, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_FP_SCALAR, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_SINGLE_PRECISION, + PNE_WESTMERE_FP_COMP_OPS_EXE_SSE_DOUBLE_PRECISION, + PNE_WESTMERE_SIMD_INT_128_PACKED_MPY, + PNE_WESTMERE_SIMD_INT_128_PACKED_SHIFT, + PNE_WESTMERE_SIMD_INT_128_PACK, + PNE_WESTMERE_SIMD_INT_128_UNPACK, + PNE_WESTMERE_SIMD_INT_128_PACKED_LOGICAL, + PNE_WESTMERE_SIMD_INT_128_PACKED_ARITH, + PNE_WESTMERE_SIMD_INT_128_SHUFFLE_MOVE, + PNE_WESTMERE_LOAD_DISPATCH_RS, + PNE_WESTMERE_LOAD_DISPATCH_RS_DELAYED, + PNE_WESTMERE_LOAD_DISPATCH_MOB, + PNE_WESTMERE_LOAD_DISPATCH_ANY, + PNE_WESTMERE_ARITH_CYCLES_DIV_BUSY, + PNE_WESTMERE_ARITH_MUL, + PNE_WESTMERE_INST_QUEUE_WRITES, + PNE_WESTMERE_INST_DECODED_DEC0, + PNE_WESTMERE_TWO_UOP_INSTS_DECODED, + PNE_WESTMERE_INST_QUEUE_WRITE_CYCLES, + PNE_WESTMERE_LSD_OVERFLOW, + PNE_WESTMERE_L2_RQSTS_LD_HIT, + PNE_WESTMERE_L2_RQSTS_LD_MISS, + PNE_WESTMERE_L2_RQSTS_LOADS, + PNE_WESTMERE_L2_RQSTS_RFO_HIT, + PNE_WESTMERE_L2_RQSTS_RFO_MISS, + PNE_WESTMERE_L2_RQSTS_RFOS, + PNE_WESTMERE_L2_RQSTS_IFETCH_HIT, + PNE_WESTMERE_L2_RQSTS_IFETCH_MISS, + PNE_WESTMERE_L2_RQSTS_IFETCHES, + PNE_WESTMERE_L2_RQSTS_PREFETCH_HIT, + PNE_WESTMERE_L2_RQSTS_PREFETCH_MISS, + PNE_WESTMERE_L2_RQSTS_PREFETCHES, + PNE_WESTMERE_L2_RQSTS_MISS, + PNE_WESTMERE_L2_RQSTS_REFERENCES, + PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_I_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_S_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_E_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_M_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_DEMAND_MESI, + PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_I_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_S_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_E_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_M_STATE, + PNE_WESTMERE_L2_DATA_RQSTS_PREFETCH_MESI, + PNE_WESTMERE_L2_DATA_RQSTS_ANY, + PNE_WESTMERE_L2_WRITE_RFO_I_STATE, + PNE_WESTMERE_L2_WRITE_RFO_S_STATE, + PNE_WESTMERE_L2_WRITE_RFO_M_STATE, + PNE_WESTMERE_L2_WRITE_RFO_HIT, + PNE_WESTMERE_L2_WRITE_RFO_MESI, + PNE_WESTMERE_L2_WRITE_LOCK_I_STATE, + PNE_WESTMERE_L2_WRITE_LOCK_S_STATE, + PNE_WESTMERE_L2_WRITE_LOCK_E_STATE, + PNE_WESTMERE_L2_WRITE_LOCK_M_STATE, + PNE_WESTMERE_L2_WRITE_LOCK_HIT, + PNE_WESTMERE_L2_WRITE_LOCK_MESI, + PNE_WESTMERE_L1D_WB_L2_I_STATE, + PNE_WESTMERE_L1D_WB_L2_S_STATE, + PNE_WESTMERE_L1D_WB_L2_E_STATE, + PNE_WESTMERE_L1D_WB_L2_M_STATE, + PNE_WESTMERE_L1D_WB_L2_MESI, + PNE_WESTMERE_L3_LAT_CACHE_REFERENCE, + PNE_WESTMERE_L3_LAT_CACHE_MISS, + PNE_WESTMERE_CPU_CLK_UNHALTED_THREAD_P, + PNE_WESTMERE_CPU_CLK_UNHALTED_REF_P, + PNE_WESTMERE_DTLB_MISSES_ANY, + PNE_WESTMERE_DTLB_MISSES_WALK_COMPLETED, + PNE_WESTMERE_DTLB_MISSES_WALK_CYCLES, + PNE_WESTMERE_DTLB_MISSES_STLB_HIT, + PNE_WESTMERE_DTLB_MISSES_LARGE_WALK_COMPLETED, + PNE_WESTMERE_LOAD_HIT_PRE, + PNE_WESTMERE_L1D_PREFETCH_REQUESTS, + PNE_WESTMERE_L1D_PREFETCH_MISS, + PNE_WESTMERE_L1D_PREFETCH_TRIGGERS, + PNE_WESTMERE_EPT_WALK_CYCLES, + PNE_WESTMERE_L1D_REPL, + PNE_WESTMERE_L1D_M_REPL, + PNE_WESTMERE_L1D_M_EVICT, + PNE_WESTMERE_L1D_M_SNOOP_EVICT, + PNE_WESTMERE_L1D_CACHE_PREFETCH_LOCK_FB_HIT, + PNE_WESTMERE_L1D_CACHE_LOCK_FB_HIT, + PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_DATA, + PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_READ_CODE, + PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_DEMAND_RFO, + PNE_WESTMERE_OFFCORE_REQUESTS_OUTSTANDING_ANY_READ, + PNE_WESTMERE_CACHE_LOCK_CYCLES_L1D_L2, + PNE_WESTMERE_CACHE_LOCK_CYCLES_L1D, + PNE_WESTMERE_IO_TRANSACTIONS, + PNE_WESTMERE_L1I_HITS, + PNE_WESTMERE_L1I_MISSES, + PNE_WESTMERE_L1I_READS, + PNE_WESTMERE_L1I_CYCLES_STALLED, + PNE_WESTMERE_LARGE_ITLB_HIT, + PNE_WESTMERE_ITLB_MISSES_ANY, + PNE_WESTMERE_ITLB_MISSES_WALK_COMPLETED, + PNE_WESTMERE_ITLB_MISSES_WALK_CYCLES, + PNE_WESTMERE_ITLB_MISSES_LARGE_WALK_COMPLETED, + PNE_WESTMERE_ILD_STALL_LCP, + PNE_WESTMERE_ILD_STALL_MRU, + PNE_WESTMERE_ILD_STALL_IQ_FULL, + PNE_WESTMERE_ILD_STALL_REGEN, + PNE_WESTMERE_ILD_STALL_ANY, + PNE_WESTMERE_BR_INST_EXEC_COND, + PNE_WESTMERE_BR_INST_EXEC_DIRECT, + PNE_WESTMERE_BR_INST_EXEC_INDIRECT_NON_CALL, + PNE_WESTMERE_BR_INST_EXEC_NON_CALLS, + PNE_WESTMERE_BR_INST_EXEC_RETURN_NEAR, + PNE_WESTMERE_BR_INST_EXEC_DIRECT_NEAR_CALL, + PNE_WESTMERE_BR_INST_EXEC_INDIRECT_NEAR_CALL, + PNE_WESTMERE_BR_INST_EXEC_NEAR_CALLS, + PNE_WESTMERE_BR_INST_EXEC_TAKEN, + PNE_WESTMERE_BR_INST_EXEC_ANY, + PNE_WESTMERE_BR_MISP_EXEC_COND, + PNE_WESTMERE_BR_MISP_EXEC_DIRECT, + PNE_WESTMERE_BR_MISP_EXEC_INDIRECT_NON_CALL, + PNE_WESTMERE_BR_MISP_EXEC_NON_CALLS, + PNE_WESTMERE_BR_MISP_EXEC_RETURN_NEAR, + PNE_WESTMERE_BR_MISP_EXEC_DIRECT_NEAR_CALL, + PNE_WESTMERE_BR_MISP_EXEC_INDIRECT_NEAR_CALL, + PNE_WESTMERE_BR_MISP_EXEC_NEAR_CALLS, + PNE_WESTMERE_BR_MISP_EXEC_TAKEN, + PNE_WESTMERE_BR_MISP_EXEC_ANY, + PNE_WESTMERE_RESOURCE_STALLS_ANY, + PNE_WESTMERE_RESOURCE_STALLS_LOAD, + PNE_WESTMERE_RESOURCE_STALLS_RS_FULL, + PNE_WESTMERE_RESOURCE_STALLS_STORE, + PNE_WESTMERE_RESOURCE_STALLS_ROB_FULL, + PNE_WESTMERE_RESOURCE_STALLS_FPCW, + PNE_WESTMERE_RESOURCE_STALLS_MXCSR, + PNE_WESTMERE_RESOURCE_STALLS_OTHER, + PNE_WESTMERE_MACRO_INSTS_FUSIONS_DECODED, + PNE_WESTMERE_BACLEAR_FORCE_IQ, + PNE_WESTMERE_LSD_UOPS, + PNE_WESTMERE_ITLB_FLUSH, + PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_READ_DATA, + PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_READ_CODE, + PNE_WESTMERE_OFFCORE_REQUESTS_DEMAND_RFO, + PNE_WESTMERE_OFFCORE_REQUESTS_ANY_READ, + PNE_WESTMERE_OFFCORE_REQUESTS_ANY_RFO, + PNE_WESTMERE_OFFCORE_REQUESTS_L1D_WRITEBACK, + PNE_WESTMERE_OFFCORE_REQUESTS_ANY, + PNE_WESTMERE_UOPS_EXECUTED_PORT0, + PNE_WESTMERE_UOPS_EXECUTED_PORT1, + PNE_WESTMERE_UOPS_EXECUTED_PORT2_CORE, + PNE_WESTMERE_UOPS_EXECUTED_PORT3_CORE, + PNE_WESTMERE_UOPS_EXECUTED_PORT4_CORE, + PNE_WESTMERE_UOPS_EXECUTED_CORE_ACTIVE_CYCLES_NO_PORT5, + PNE_WESTMERE_UOPS_EXECUTED_PORT5, + PNE_WESTMERE_UOPS_EXECUTED_CORE_ACTIVE_CYCLES, + PNE_WESTMERE_UOPS_EXECUTED_PORT015, + PNE_WESTMERE_UOPS_EXECUTED_PORT234, + PNE_WESTMERE_OFFCORE_REQUESTS_SQ_FULL, + PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_DATA, + PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_INVALIDATE, + PNE_WESTMERE_SNOOPQ_REQUESTS_OUTSTANDING_CODE, + PNE_WESTMERE_SNOOPQ_REQUESTS_CODE, + PNE_WESTMERE_SNOOPQ_REQUESTS_DATA, + PNE_WESTMERE_SNOOPQ_REQUESTS_INVALIDATE, + PNE_WESTMERE_OFF_CORE_RESPONSE_0, + PNE_WESTMERE_SNOOP_RESPONSE_HIT, + PNE_WESTMERE_SNOOP_RESPONSE_HITE, + PNE_WESTMERE_SNOOP_RESPONSE_HITM, + PNE_WESTMERE_OFF_CORE_RESPONSE_1, + PNE_WESTMERE_INST_RETIRED_ANY_P, + PNE_WESTMERE_INST_RETIRED_X87, + PNE_WESTMERE_INST_RETIRED_MMX, + PNE_WESTMERE_UOPS_RETIRED_ANY, + PNE_WESTMERE_UOPS_RETIRED_RETIRE_SLOTS, + PNE_WESTMERE_UOPS_RETIRED_MACRO_FUSED, + PNE_WESTMERE_MACHINE_CLEARS_CYCLES, + PNE_WESTMERE_MACHINE_CLEARS_MEM_ORDER, + PNE_WESTMERE_MACHINE_CLEARS_SMC, + PNE_WESTMERE_BR_INST_RETIRED_ANY_P, + PNE_WESTMERE_BR_INST_RETIRED_CONDITIONAL, + PNE_WESTMERE_BR_INST_RETIRED_NEAR_CALL, + PNE_WESTMERE_BR_INST_RETIRED_ALL_BRANCHES, + PNE_WESTMERE_BR_MISP_RETIRED_ANY_P, + PNE_WESTMERE_BR_MISP_RETIRED_CONDITIONAL, + PNE_WESTMERE_BR_MISP_RETIRED_NEAR_CALL, + PNE_WESTMERE_BR_MISP_RETIRED_ALL_BRANCHES, + PNE_WESTMERE_SSEX_UOPS_RETIRED_PACKED_SINGLE, + PNE_WESTMERE_SSEX_UOPS_RETIRED_SCALAR_SINGLE, + PNE_WESTMERE_SSEX_UOPS_RETIRED_PACKED_DOUBLE, + PNE_WESTMERE_SSEX_UOPS_RETIRED_SCALAR_DOUBLE, + PNE_WESTMERE_SSEX_UOPS_RETIRED_VECTOR_INTEGER, + PNE_WESTMERE_ITLB_MISS_RETIRED, + PNE_WESTMERE_MEM_LOAD_RETIRED_L1D_HIT, + PNE_WESTMERE_MEM_LOAD_RETIRED_L2_HIT, + PNE_WESTMERE_MEM_LOAD_RETIRED_L3_UNSHARED_HIT, + PNE_WESTMERE_MEM_LOAD_RETIRED_OTHER_CORE_L2_HIT_HITM, + PNE_WESTMERE_MEM_LOAD_RETIRED_L3_MISS, + PNE_WESTMERE_MEM_LOAD_RETIRED_HIT_LFB, + PNE_WESTMERE_MEM_LOAD_RETIRED_DTLB_MISS, + PNE_WESTMERE_FP_MMX_TRANS_TO_FP, + PNE_WESTMERE_FP_MMX_TRANS_TO_MMX, + PNE_WESTMERE_FP_MMX_TRANS_ANY, + PNE_WESTMERE_MACRO_INSTS_DECODED, + PNE_WESTMERE_UOPS_DECODED_STALL_CYCLES, + PNE_WESTMERE_UOPS_DECODED_MS, + PNE_WESTMERE_UOPS_DECODED_ESP_FOLDING, + PNE_WESTMERE_UOPS_DECODED_ESP_SYNC, + PNE_WESTMERE_RAT_STALLS_FLAGS, + PNE_WESTMERE_RAT_STALLS_REGISTERS, + PNE_WESTMERE_RAT_STALLS_ROB_READ_PORT, + PNE_WESTMERE_RAT_STALLS_SCOREBOARD, + PNE_WESTMERE_RAT_STALLS_ANY, + PNE_WESTMERE_SEG_RENAME_STALLS, + PNE_WESTMERE_ES_REG_RENAMES, + PNE_WESTMERE_UOP_UNFUSION, + PNE_WESTMERE_BR_INST_DECODED, + PNE_WESTMERE_BPU_MISSED_CALL_RET, + PNE_WESTMERE_BACLEAR_CLEAR, + PNE_WESTMERE_BACLEAR_BAD_TARGET, + PNE_WESTMERE_BPU_CLEARS_EARLY, + PNE_WESTMERE_BPU_CLEARS_LATE, + PNE_WESTMERE_THREAD_ACTIVE, + PNE_WESTMERE_L2_TRANSACTIONS_LOAD, + PNE_WESTMERE_L2_TRANSACTIONS_RFO, + PNE_WESTMERE_L2_TRANSACTIONS_IFETCH, + PNE_WESTMERE_L2_TRANSACTIONS_PREFETCH, + PNE_WESTMERE_L2_TRANSACTIONS_L1D_WB, + PNE_WESTMERE_L2_TRANSACTIONS_FILL, + PNE_WESTMERE_L2_TRANSACTIONS_WB, + PNE_WESTMERE_L2_TRANSACTIONS_ANY, + PNE_WESTMERE_L2_LINES_IN_S_STATE, + PNE_WESTMERE_L2_LINES_IN_E_STATE, + PNE_WESTMERE_L2_LINES_IN_ANY, + PNE_WESTMERE_L2_LINES_OUT_DEMAND_CLEAN, + PNE_WESTMERE_L2_LINES_OUT_DEMAND_DIRTY, + PNE_WESTMERE_L2_LINES_OUT_PREFETCH_CLEAN, + PNE_WESTMERE_L2_LINES_OUT_PREFETCH_DIRTY, + PNE_WESTMERE_L2_LINES_OUT_ANY, + PNE_WESTMERE_SQ_MISC_LRU_HINTS, + PNE_WESTMERE_SQ_MISC_SPLIT_LOCK, + PNE_WESTMERE_SQ_FULL_STALL_CYCLES, + PNE_WESTMERE_FP_ASSIST_ALL, + PNE_WESTMERE_FP_ASSIST_OUTPUT, + PNE_WESTMERE_FP_ASSIST_INPUT, + PNE_WESTMERE_SIMD_INT_64_PACKED_MPY, + PNE_WESTMERE_SIMD_INT_64_PACKED_SHIFT, + PNE_WESTMERE_SIMD_INT_64_PACK, + PNE_WESTMERE_SIMD_INT_64_UNPACK, + PNE_WESTMERE_SIMD_INT_64_PACKED_LOGICAL, + PNE_WESTMERE_SIMD_INT_64_PACKED_ARITH, + PNE_WESTMERE_SIMD_INT_64_SHUFFLE_MOVE, + PNE_WESTMERE_INSTR_RETIRED_ANY, + PNE_WESTMERE_CPU_CLK_UNHALTED_CORE, + PNE_WESTMERE_CPU_CLK_UNHALTED_REF, + PNE_WESTMERE_GQ_CYCLES_FULL_READ_TRACKER, + PNE_WESTMERE_GQ_CYCLES_FULL_WRITE_TRACKER, + PNE_WESTMERE_GQ_CYCLES_FULL_PEER_PROBE_TRACKER, + PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_READ_TRACKER, + PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_WRITE_TRACKER, + PNE_WESTMERE_GQ_CYCLES_NOT_EMPTY_PEER_PROBE_TRACKER, + PNE_WESTMERE_GQ_OCCUPANCY_READ_TRACKER, + PNE_WESTMERE_GQ_ALLOC_READ_TRACKER, + PNE_WESTMERE_GQ_ALLOC_RT_L3_MISS, + PNE_WESTMERE_GQ_ALLOC_RT_TO_L3_RESP, + PNE_WESTMERE_GQ_ALLOC_RT_TO_RTID_ACQUIRED, + PNE_WESTMERE_GQ_ALLOC_WT_TO_RTID_ACQUIRED, + PNE_WESTMERE_GQ_ALLOC_WRITE_TRACKER, + PNE_WESTMERE_GQ_ALLOC_PEER_PROBE_TRACKER, + PNE_WESTMERE_GQ_DATA_FROM_QPI, + PNE_WESTMERE_GQ_DATA_FROM_QMC, + PNE_WESTMERE_GQ_DATA_FROM_L3, + PNE_WESTMERE_GQ_DATA_FROM_CORES_02, + PNE_WESTMERE_GQ_DATA_FROM_CORES_13, + PNE_WESTMERE_GQ_DATA_TO_QPI_QMC, + PNE_WESTMERE_GQ_DATA_TO_L3, + PNE_WESTMERE_GQ_DATA_TO_CORES, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_I_STATE, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_S_STATE, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_FWD_S_STATE, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_FWD_I_STATE, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_CONFLICT, + PNE_WESTMERE_SNP_RESP_TO_LOCAL_HOME_WB, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_I_STATE, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_S_STATE, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_FWD_S_STATE, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_FWD_I_STATE, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_CONFLICT, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_WB, + PNE_WESTMERE_SNP_RESP_TO_REMOTE_HOME_HITM, + PNE_WESTMERE_L3_HITS_READ, + PNE_WESTMERE_L3_HITS_WRITE, + PNE_WESTMERE_L3_HITS_PROBE, + PNE_WESTMERE_L3_HITS_ANY, + PNE_WESTMERE_L3_MISS_READ, + PNE_WESTMERE_L3_MISS_WRITE, + PNE_WESTMERE_L3_MISS_PROBE, + PNE_WESTMERE_L3_MISS_ANY, + PNE_WESTMERE_L3_LINES_IN_M_STATE, + PNE_WESTMERE_L3_LINES_IN_E_STATE, + PNE_WESTMERE_L3_LINES_IN_S_STATE, + PNE_WESTMERE_L3_LINES_IN_F_STATE, + PNE_WESTMERE_L3_LINES_IN_ANY, + PNE_WESTMERE_L3_LINES_OUT_M_STATE, + PNE_WESTMERE_L3_LINES_OUT_E_STATE, + PNE_WESTMERE_L3_LINES_OUT_S_STATE, + PNE_WESTMERE_L3_LINES_OUT_I_STATE, + PNE_WESTMERE_L3_LINES_OUT_F_STATE, + PNE_WESTMERE_L3_LINES_OUT_ANY, + PNE_WESTMERE_GQ_SNOOP_GOTO_S, + PNE_WESTMERE_GQ_SNOOP_GOTO_I, + PNE_WESTMERE_GQ_SNOOP_GOTO_S_HIT, + PNE_WESTMERE_GQ_SNOOP_GOTO_I_HIT, + PNE_WESTMERE_QHL_REQUESTS_IOH_READS, + PNE_WESTMERE_QHL_REQUESTS_IOH_WRITES, + PNE_WESTMERE_QHL_REQUESTS_REMOTE_READS, + PNE_WESTMERE_QHL_REQUESTS_REMOTE_WRITES, + PNE_WESTMERE_QHL_REQUESTS_LOCAL_READS, + PNE_WESTMERE_QHL_REQUESTS_LOCAL_WRITES, + PNE_WESTMERE_QHL_CYCLES_FULL_IOH, + PNE_WESTMERE_QHL_CYCLES_FULL_REMOTE, + PNE_WESTMERE_QHL_CYCLES_FULL_LOCAL, + PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_IOH, + PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_REMOTE, + PNE_WESTMERE_QHL_CYCLES_NOT_EMPTY_LOCAL, + PNE_WESTMERE_QHL_OCCUPANCY_IOH, + PNE_WESTMERE_QHL_OCCUPANCY_REMOTE, + PNE_WESTMERE_QHL_OCCUPANCY_LOCAL, + PNE_WESTMERE_QHL_ADDRESS_CONFLICTS_2WAY, + PNE_WESTMERE_QHL_ADDRESS_CONFLICTS_3WAY, + PNE_WESTMERE_QHL_CONFLICT_CYCLES_IOH, + PNE_WESTMERE_QHL_CONFLICT_CYCLES_REMOTE, + PNE_WESTMERE_QHL_CONFLICT_CYCLES_LOCAL, + PNE_WESTMERE_QHL_TO_QMC_BYPASS, + PNE_WESTMERE_QMC_ISOC_FULL_READ_CH0, + PNE_WESTMERE_QMC_ISOC_FULL_READ_CH1, + PNE_WESTMERE_QMC_ISOC_FULL_READ_CH2, + PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH0, + PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH1, + PNE_WESTMERE_QMC_ISOC_FULL_WRITE_CH2, + PNE_WESTMERE_QMC_BUSY_READ_CH0, + PNE_WESTMERE_QMC_BUSY_READ_CH1, + PNE_WESTMERE_QMC_BUSY_READ_CH2, + PNE_WESTMERE_QMC_BUSY_WRITE_CH0, + PNE_WESTMERE_QMC_BUSY_WRITE_CH1, + PNE_WESTMERE_QMC_BUSY_WRITE_CH2, + PNE_WESTMERE_QMC_OCCUPANCY_CH0, + PNE_WESTMERE_QMC_OCCUPANCY_CH1, + PNE_WESTMERE_QMC_OCCUPANCY_CH2, + PNE_WESTMERE_QMC_OCCUPANCY_ANY, + PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH0, + PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH1, + PNE_WESTMERE_QMC_ISSOC_OCCUPANCY_CH2, + PNE_WESTMERE_QMC_ISSOC_READS_ANY, + PNE_WESTMERE_QMC_NORMAL_READS_CH0, + PNE_WESTMERE_QMC_NORMAL_READS_CH1, + PNE_WESTMERE_QMC_NORMAL_READS_CH2, + PNE_WESTMERE_QMC_NORMAL_READS_ANY, + PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH0, + PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH1, + PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_CH2, + PNE_WESTMERE_QMC_HIGH_PRIORITY_READS_ANY, + PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH0, + PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH1, + PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_CH2, + PNE_WESTMERE_QMC_CRITICAL_PRIORITY_READS_ANY, + PNE_WESTMERE_QMC_WRITES_FULL_CH0, + PNE_WESTMERE_QMC_WRITES_FULL_CH1, + PNE_WESTMERE_QMC_WRITES_FULL_CH2, + PNE_WESTMERE_QMC_WRITES_FULL_ANY, + PNE_WESTMERE_QMC_WRITES_PARTIAL_CH0, + PNE_WESTMERE_QMC_WRITES_PARTIAL_CH1, + PNE_WESTMERE_QMC_WRITES_PARTIAL_CH2, + PNE_WESTMERE_QMC_WRITES_PARTIAL_ANY, + PNE_WESTMERE_QMC_CANCEL_CH0, + PNE_WESTMERE_QMC_CANCEL_CH1, + PNE_WESTMERE_QMC_CANCEL_CH2, + PNE_WESTMERE_QMC_CANCEL_ANY, + PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH0, + PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH1, + PNE_WESTMERE_QMC_PRIORITY_UPDATES_CH2, + PNE_WESTMERE_QMC_PRIORITY_UPDATES_ANY, + PNE_WESTMERE_IMC_RETRY_CH0, + PNE_WESTMERE_IMC_RETRY_CH1, + PNE_WESTMERE_IMC_RETRY_CH2, + PNE_WESTMERE_IMC_RETRY_ANY, + PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_IOH, + PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_REMOTE, + PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_LOCAL, + PNE_WESTMERE_QHL_FRC_ACK_CNFLTS_ANY, + PNE_WESTMERE_QHL_SLEEPS_IOH_ORDER, + PNE_WESTMERE_QHL_SLEEPS_REMOTE_ORDER, + PNE_WESTMERE_QHL_SLEEPS_LOCAL_ORDER, + PNE_WESTMERE_QHL_SLEEPS_IOH_CONFLICT, + PNE_WESTMERE_QHL_SLEEPS_REMOTE_CONFLICT, + PNE_WESTMERE_QHL_SLEEPS_LOCAL_CONFLICT, + PNE_WESTMERE_ADDR_OPCODE_MATCH_IOH, + PNE_WESTMERE_ADDR_OPCODE_MATCH_REMOTE, + PNE_WESTMERE_ADDR_OPCODE_MATCH_LOCAL, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_HOME_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_SNOOP_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_NDR_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_SINGLE_FLIT_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_DRS_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCB_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_NCS_LINK_1, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_LINK_0, + PNE_WESTMERE_QPI_TX_STALLED_MULTI_FLIT_LINK_1, + PNE_WESTMERE_QPI_TX_HEADER_FULL_LINK_0, + PNE_WESTMERE_QPI_TX_HEADER_BUSY_LINK_0, + PNE_WESTMERE_QPI_TX_HEADER_FULL_LINK_1, + PNE_WESTMERE_QPI_TX_HEADER_BUSY_LINK_1, + PNE_WESTMERE_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_0, + PNE_WESTMERE_QPI_RX_NO_PPT_CREDIT_STALLS_LINK_1, + PNE_WESTMERE_DRAM_OPEN_CH0, + PNE_WESTMERE_DRAM_OPEN_CH1, + PNE_WESTMERE_DRAM_OPEN_CH2, + PNE_WESTMERE_DRAM_PAGE_CLOSE_CH0, + PNE_WESTMERE_DRAM_PAGE_CLOSE_CH1, + PNE_WESTMERE_DRAM_PAGE_CLOSE_CH2, + PNE_WESTMERE_DRAM_PAGE_MISS_CH0, + PNE_WESTMERE_DRAM_PAGE_MISS_CH1, + PNE_WESTMERE_DRAM_PAGE_MISS_CH2, + PNE_WESTMERE_DRAM_READ_CAS_CH0, + PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH0, + PNE_WESTMERE_DRAM_READ_CAS_CH1, + PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH1, + PNE_WESTMERE_DRAM_READ_CAS_CH2, + PNE_WESTMERE_DRAM_READ_CAS_AUTOPRE_CH2, + PNE_WESTMERE_DRAM_WRITE_CAS_CH0, + PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH0, + PNE_WESTMERE_DRAM_WRITE_CAS_CH1, + PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH1, + PNE_WESTMERE_DRAM_WRITE_CAS_CH2, + PNE_WESTMERE_DRAM_WRITE_CAS_AUTOPRE_CH2, + PNE_WESTMERE_DRAM_REFRESH_CH0, + PNE_WESTMERE_DRAM_REFRESH_CH1, + PNE_WESTMERE_DRAM_REFRESH_CH2, + PNE_WESTMERE_DRAM_PRE_ALL_CH0, + PNE_WESTMERE_DRAM_PRE_ALL_CH1, + PNE_WESTMERE_DRAM_PRE_ALL_CH2, + PNE_WESTMERE_DRAM_THERMAL_THROTTLED, + PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_0, + PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_1, + PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_2, + PNE_WESTMERE_THERMAL_THROTTLING_TEMP_CORE_3, + PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_0, + PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_1, + PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_2, + PNE_WESTMERE_THERMAL_THROTTLED_TEMP_CORE_3, + PNE_WESTMERE_PROCHOT_ASSERTION, + PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_0, + PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_1, + PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_2, + PNE_WESTMERE_THERMAL_THROTTLING_PROCHOT_CORE_3, + PNE_WESTMERE_TURBO_MODE_CORE_0, + PNE_WESTMERE_TURBO_MODE_CORE_1, + PNE_WESTMERE_TURBO_MODE_CORE_2, + PNE_WESTMERE_TURBO_MODE_CORE_3, + PNE_WESTMERE_CYCLES_UNHALTED_L3_FLL_ENABLE, + PNE_WESTMERE_CYCLES_UNHALTED_L3_FLL_DISABLE, + PNE_WESTMERE_PNE_WESTMERE_NATNAME_GUARD, +}; + +extern Native_Event_LabelDescription_t WestmereProcessor_info[]; +extern hwi_search_t WestmereProcessor_map[]; + +#endif diff --git a/src/freebsd/map.c b/src/freebsd/map.c new file mode 100644 index 0000000..953b39f --- /dev/null +++ b/src/freebsd/map.c @@ -0,0 +1,48 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: freebsd-map.c +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#include "freebsd.h" +#include "papiStdEventDefs.h" +#include "map.h" + +/** See other freebsd-map*.* for more details! **/ + +Native_Event_Info_t _papi_hwd_native_info[CPU_LAST+1]; + +void init_freebsd_libpmc_mappings (void) +{ + _papi_hwd_native_info[CPU_UNKNOWN].info = UnkProcessor_info; + _papi_hwd_native_info[CPU_P6].info = P6Processor_info; + _papi_hwd_native_info[CPU_P6_C].info = P6_C_Processor_info; + _papi_hwd_native_info[CPU_P6_2].info = P6_2_Processor_info; + _papi_hwd_native_info[CPU_P6_3].info = P6_3_Processor_info; + _papi_hwd_native_info[CPU_P6_M].info = P6_M_Processor_info; + _papi_hwd_native_info[CPU_P4].info = P4Processor_info; + _papi_hwd_native_info[CPU_K7].info = K7Processor_info; + _papi_hwd_native_info[CPU_K8].info = K8Processor_info; + _papi_hwd_native_info[CPU_ATOM].info = AtomProcessor_info; + _papi_hwd_native_info[CPU_CORE].info = CoreProcessor_info; + _papi_hwd_native_info[CPU_CORE2].info = Core2Processor_info; + _papi_hwd_native_info[CPU_CORE2EXTREME].info = Core2ExtremeProcessor_info; + _papi_hwd_native_info[CPU_COREI7].info = i7Processor_info; + _papi_hwd_native_info[CPU_COREWESTMERE].info = WestmereProcessor_info; + + _papi_hwd_native_info[CPU_LAST].info = NULL; +} + +int freebsd_number_of_events (int processortype) +{ + int counter = 0; + + while (_papi_hwd_native_info[processortype].info[counter].name != NULL) + counter++; + + return counter; +} diff --git a/src/freebsd/map.h b/src/freebsd/map.h new file mode 100644 index 0000000..871b874 --- /dev/null +++ b/src/freebsd/map.h @@ -0,0 +1,70 @@ +/****************************/ +/* THIS IS OPEN SOURCE CODE */ +/****************************/ + +/* +* File: freebsd-map.h +* Author: Harald Servat +* redcrash@gmail.com +*/ + +#ifndef _FreeBSD_MAP_H_ +#define _FreeBSD_MAP_H_ + +#include "../papi.h" +#include "../papi_internal.h" +#include "../papi_vector.h" + +enum +{ + CPU_UNKNOWN = 0, + CPU_P6, + CPU_P6_C, + CPU_P6_2, + CPU_P6_3, + CPU_P6_M, + CPU_P4, + CPU_K7, + CPU_K8, + CPU_ATOM, + CPU_CORE, + CPU_CORE2, + CPU_CORE2EXTREME, + CPU_COREI7, + CPU_COREWESTMERE, + CPU_LAST +}; + +typedef struct Native_Event_LabelDescription +{ + char *name; + char *description; +} Native_Event_LabelDescription_t; + +typedef struct Native_Event_Info +{ + /* Name and description for all native events */ + Native_Event_LabelDescription_t *info; +} Native_Event_Info_t; + +extern Native_Event_Info_t _papi_hwd_native_info[CPU_LAST+1]; +extern void init_freebsd_libpmc_mappings (void); +extern int freebsd_number_of_events (int processortype); + +#include "map-unknown.h" +#include "map-p6.h" +#include "map-p6-c.h" +#include "map-p6-2.h" +#include "map-p6-3.h" +#include "map-p6-m.h" +#include "map-p4.h" +#include "map-k7.h" +#include "map-k8.h" +#include "map-atom.h" +#include "map-core.h" +#include "map-core2.h" +#include "map-core2-extreme.h" +#include "map-i7.h" +#include "map-westmere.h" + +#endif /* _FreeBSD_MAP_H_ */ diff --git a/src/freebsd_events.csv b/src/freebsd_events.csv new file mode 100644 index 0000000..56bee0c --- /dev/null +++ b/src/freebsd_events.csv @@ -0,0 +1,294 @@ +# +# FreeBSD presets +# these are needed as event names are different than those in libpfm4 +# + +CPU,UNKNOWN +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS +PRESET,PAPI_BR_INS,NOT_DERIVED,BRANCHES +PRESET,PAPI_BR_INS,NOT_DERIVED,INTERRUPTS +PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCH_MISPREDICTS +PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_MISSES +PRESET,PAPI_L2_ICM,NOT_DERIVED,IC_MISSES +PRESET,PAPI_L2_TCM,DERIVED_ADD, IC_MISSES,DC_MISSES + +CPU,INTEL_P6 +CPU,INTEL_PII +CPU,INTEL_PIII +CPU,INTEL_CL +CPU,INTEL_PM +PRESET,PAPI_L1_DCM,NOT_DERIVED,DCU_LINES_IN +# L2_IFETCH defaults to MESI +PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_IFETCH +# BUS_TRAN_IFETCH defaults to SELF +PRESET,PAPI_L2_DCM,DERIVED_SUB,L2_LINES_IN,BUS_TRAN_IFETCH +# BUS_TRAN_IFETCH defaults to SELF +PRESET,PAPI_L2_ICM,NOT_DERIVED,BUS_TRAN_IFETCH +PRESET,PAPI_L1_TCM,NOT_DERIVED,L2_RQSTS +PRESET,PAPI_L2_TCM,NOT_DERIVED,L2_LINES_IN +PRESET,PAPI_CA_CLN,NOT_DERIVED,BUS_TRAN_RFO +PRESET,PAPI_CA_ITV,NOT_DERIVED,BUS_TRAN_INVAL +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISS +PRESET,PAPI_L1_LDM,NOT_DERIVED,L2_LD +PRESET,PAPI_L1_STM,NOT_DERIVED,L2_ST +PRESET,PAPI_L2_LDM,DERIVED_SUB,L2_LINES_IN,L2M_LINES_INM +PRESET,PAPI_L2_STM,NOT_DERIVED,L2M_LINES_INM +PRESET,PAPI_BTAC_M,NOT_DERIVED,BTB_MISSES +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RX +PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_TAKEN_RETIRED +PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED,BR_TAKEN_RETIRED +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISS_PRED_RETIRED +PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED,BR_MISS_PRED_RETIRED +PRESET,PAPI_TOT_IIS,NOT_DERIVED,INST_DECODED +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED +PRESET,PAPI_FP_INS,NOT_DERIVED,FLOPS +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED +PRESET,PAPI_LST_INS,DERIVED_ADD,L2_LD,L2_ST +PRESET,PAPI_L1_DCH,DERIVED_SUB,DATA_MEM_REFS, DCU_LINES_IN +PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_MEM_REFS +PRESET,PAPI_L2_DCA,DERIVED_ADD,L2_LD, L2_ST +PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_LD +PRESET,PAPI_L2_DCW,NOT_DERIVED,L2_ST +PRESET,PAPI_L1_ICH,DERIVED_SUB,IFU_FETCH, L2_IFETCH +PRESET,PAPI_L2_ICH,DERIVED_SUB,L2_IFETCH, BUS_TRAN_IFETCH +PRESET,PAPI_L1_ICA,NOT_DERIVED,IFU_FETCH +PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L1_ICR,NOT_DERIVED,IFU_FETCH +PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L2_TCH,DERIVED_SUB,L2_RQSTS, L2_LINES_IN +PRESET,PAPI_L1_TCA,DERIVED_ADD,DATA_MEM_REFS, IFU_FETCH +PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS +PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_LD, L2_IFETCH +PRESET,PAPI_L2_TCW,NOT_DERIVED,L2_ST +PRESET,PAPI_FML_INS,NOT_DERIVED,MUL +PRESET,PAPI_FDV_INS,NOT_DERIVED,DIV +PRESET,PAPI_FP_OPS,NOT_DERIVED,FLOPS + +CPU,INTEL_PM +PRESET,PAPI_VEC_INS,DERIVED_ADD,MMX_INSTR_RET, EMON_SSE_SSE2_INST_RETIRED + +CPU,INTEL_PIII +PRESET,PAPI_VEC_INS,DERIVED_ADD,MMX_INSTR_RET, EMON_KNI_INST_RETIRED + +CPU,INTEL_CL +PRESET,PAPI_VEC_INS,NOT_DERIVED,MMX_INSTR_EXEC + +CPU,AMD_K7 +PRESET,PAPI_L1_DCM,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2 +PRESET,PAPI_L1_ICM,NOT_DERIVED,IC_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM +PRESET,PAPI_L1_TCM,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2, IC_MISSES +PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_AND_L2_DTLB_MISSES +PRESET,PAPI_TLB_IM,NOT_DERIVED,L1_AND_L2_ITLB_MISSES +PRESET,PAPI_TLB_TL,DERIVED_ADD,L1_AND_L2_DTLB_MISSES, L1_AND_L2_ITLB_MISSES +PRESET,PAPI_L1_LDM,NOT_DERIVED,DC_REFILLS_FROM_L2_OES +PRESET,PAPI_L1_STM,NOT_DERIVED,DC_REFILLS_FROM_L2_M +PRESET,PAPI_L2_LDM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM_OES +PRESET,PAPI_L2_STM,NOT_DERIVED,DC_REFILLS_FROM_SYSTEM_M +PRESET,PAPI_HW_INT,NOT_DERIVED,HARDWARE_INTERRUPTS +PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_FAR_CONTROL_TRANSFERS +PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_BRANCHES +PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCHES +PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCHES, RETIRED_TAKEN_BRANCHES +PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCHES_MISPREDICTED +PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_BRANCHES, RETIRED_BRANCHES_MISPREDICTED +PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS +PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_TAKEN_BRANCHES +PRESET,PAPI_L1_DCA,NOT_DERIVED,DC_ACCESSES +PRESET,PAPI_L2_DCA,DERIVED_ADD,DC_REFILLS_FROM_SYSTEM, DC_REFILLS_FROM_L2 +PRESET,PAPI_L1_ICA,NOT_DERIVED,IC_FETCHES +PRESET,PAPI_L2_ICA,NOT_DERIVED,IC_MISSES +PRESET,PAPI_L1_ICR,NOT_DERIVED,IC_FETCHES +PRESET,PAPI_L1_TCA,DERIVED_ADD,DC_ACCESSES, IC_FETCHES + +CPU,AMD_K8 +PRESET,PAPI_BR_INS,NOT_DERIVED,FR_RETIRED_BRANCHES +PRESET,PAPI_RES_STL,NOT_DERIVED,FR_DISPATCH_STALLS +PRESET,PAPI_TOT_CYC,NOT_DERIVED,BU_CPU_CLK_UNHALTED +PRESET,PAPI_TOT_INS,NOT_DERIVED,FR_RETIRED_X86_INSTRUCTIONS +PRESET,PAPI_STL_ICY,FR_DECODER_EMPTY +PRESET,PAPI_HW_INT,NOT_DERIVED,FR_RETIRED_TAKEN_HARDWARE_INTERRUPTS +PRESET,PAPI_BR_TKN,NOT_DERIVED,FR_RETIRED_TAKEN_BRANCHES +PRESET,PAPI_BR_MSP,NOT_DERIVED,FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED +PRESET,PAPI_TLB_DM,NOT_DERIVED,DC_L1_DTLB_MISS_AND_L2_DTLB_MISS +PRESET,PAPI_TLB_IM,NOT_DERIVED,IC_L1_ITLB_MISS_AND_L2_ITLB_MISS +PRESET,PAPI_TLB_TL,DERIVED_ADD,DC_L1_DTLB_MISS_AND_L2_DTLB_MISS,IC_L1_ITLB_MISS_AND_L2_ITLB_MISS +PRESET,PAPI_L1_DCA,NOT_DERIVED,DC_ACCESS +PRESET,PAPI_L1_ICA,NOT_DERIVED,IC_FETCH +PRESET,PAPI_L1_TCA,DERIVED_ADD,DC_ACCESS, IC_FETCH +PRESET,PAPI_L1_ICR,NOT_DERIVED,IC_FETCH +PRESET,PAPI_L2_ICH,NOT_DERIVED,IC_REFILL_FROM_L2 +PRESET,PAPI_L2_DCH,NOT_DERIVED,DC_REFILL_FROM_L2 +PRESET,PAPI_L2_DCM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_MOES +PRESET,PAPI_L2_DCA,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES, DC_REFILL_FROM_L2_MOES +PRESET,PAPI_L2_ICM,NOT_DERIVED,IC_REFILL_FROM_SYSTEM +PRESET,PAPI_L2_DCR,NOT_DERIVED,DC_REFILL_FROM_L2_OES +PRESET,PAPI_L2_DCW,NOT_DERIVED,DC_REFILL_FROM_L2_M +PRESET,PAPI_L2_DCH,NOT_DERIVED,DC_REFILL_FROM_L2_MOES +PRESET,PAPI_L1_LDM,NOT_DERIVED,DC_REFILL_FROM_L2_OES +PRESET,PAPI_L1_STM,NOT_DERIVED,DC_REFILL_FROM_L2_M +PRESET,PAPI_L2_LDM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_OES +PRESET,PAPI_L2_STM,NOT_DERIVED,DC_REFILL_FROM_SYSTEM_M +PRESET,PAPI_L1_DCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES, DC_REFILL_FROM_L2_MOES +PRESET,PAPI_L1_ICM,DERIVED_ADD,IC_REFILL_FROM_L2, IC_REFILL_FROM_SYSTEM +PRESET,PAPI_L1_TCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES,DC_REFILL_FROM_L2_MOES,IC_REFILL_FROM_SYSTEM,IC_REFILL_FROM_L2 +PRESET,PAPI_L2_TCM,DERIVED_ADD,DC_REFILL_FROM_SYSTEM_MOES,IC_REFILL_FROM_SYSTEM +PRESET,PAPI_L2_ICA,DERIVED_ADD,IC_REFILL_FROM_SYSTEM,IC_REFILL_FROM_L2 +PRESET,PAPI_L2_TCH,DERIVED_ADD,IC_REFILL_FROM_L2,DC_REFILL_FROM_L2_MOES +PRESET,PAPI_L2_TCA,DERIVED_ADD,IC_REFILL_FROM_L2,IC_REFILL_FROM_SYSTEM,DC_REFILL_FROM_L2_MOES,DC_REFILL_FROM_SYSTEM_MOES +PRESET,PAPI_FML_INS,NOT_DERIVED,FP_DISPATCHED_FPU_MULS +PRESET,PAPI_FAD_INS,NOT_DERIVED,FP_DISPATCHED_FPU_ADDS +PRESET,PAPI_FP_OPS,NOT_DERIVED,FP_DISPATCHED_FPU_ADDS_AND_MULS +PRESET,PAPI_FP_INS,NOT_DERIVED,FR_RETIRED_FPU_INSTRUCTIONS +PRESET,PAPI_FPU_IDL,NOT_DERIVED,FP_CYCLES_WITH_NO_FPU_OPS_RETIRED + +CPU,INTEL_PIV +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL +PRESET,PAPI_TOT_CYC,NOT_DERIVED,GLOBAL_POWER_EVENTS +PRESET,PAPI_L1_ICM,NOT_DERIVED,BPU_FETCH_REQUEST +PRESET,PAPI_L1_ICA,NOT_DERIVED,UOP_QUEUE_WRITES_TC_BUILD_DELIVER +PRESET,PAPI_TLB_DM,NOT_DERIVED,PAGE_WALK_TYPE_D +PRESET,PAPI_TLB_IM,NOT_DERIVED,PAGE_WALK_TYPE_I +PRESET,PAPI_TLB_TL,NOT_DERIVED,PAGE_WALK_TYPE +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR_RETIRED_NON_BOGUS +PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_TYPE +PRESET,PAPI_BR_TKN,NOT_DERIVED,BRANCH_RETIRED_TAKEN +PRESET,PAPI_BR_NTK,NOT_DERIVED,BRANCH_RETIRED_NOT_TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BRANCH_RETIRED_MISPREDICTED +PRESET,PAPI_BR_PRC,NOT_DERIVED,BRANCH_RETIRED_PREDICTED +PRESET,PAPI_L2_TCH,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_HITS +PRESET,PAPI_L2_TCM,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_MISSES +PRESET,PAPI_L2_TCA,NOT_DERIVED,BSQ_CACHE_REFERENCE_2L_ACCESSES +PRESET,PAPI_L3_TCH,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_HITS +PRESET,PAPI_L3_TCM,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_MISSES +PRESET,PAPI_L3_TCA,NOT_DERIVED,BSQ_CACHE_REFERENCE_3L_ACCESSES +PRESET,PAPI_FP_INS,NOT_DERIVED,X87_FP_UOP + +CPU,INTEL_ATOM +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS +PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_ALL_REF, L1I_READS +PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES +PRESET,PAPI_TLB_DM,NOT_DERIVED,DATA_TLB_MISSES.DTLB_MISS +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_EXEC +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC +PRESET,PAPI_RES_STL,RESOURCE_STALLS.ANY +PRESET,PAPI_TOT_CYC,CPU_CLK_UNHALTED.BUS +PRESET,PAPI_TOT_INS,INST_RETIRED.ANY_P +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV +PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD +PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L2_MISS +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES + +CPU,INTEL_CORE +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INSTR_RET +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALL +PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR_RET +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RX +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INSTR_RET +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISS +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISS, ITLB.MISSES +PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD + +CPU,INTEL_CORE2 +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ANY +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.BUS +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED.ANY_P +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS +PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_ALL_REF, L1I_READS +# PAPI_L2_ICH seems not to work +PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD +PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY +PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L1D_MISS +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_LOAD_RETIRED_L1D_MISS, L1I_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED_L2_MISS + +CPU,INTEL_CORE2EXTREME +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ANY +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.BUS +PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED.ANY_P +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED.TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISSP_EXEC +PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB.MISSES +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB.MISSES +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS +PRESET,PAPI_L1_TCA, DERIVED_ADD, L1D_ALL_REF, L1I_READS +# PAPI_L2_ICH seems not to work +PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_LD +PRESET,PAPI_FP_INS,NOT_DERIVED,X87_OPS_RETIRED.ANY +PRESET,PAPI_L1_DCM,NOT_DERIVED,MEM_LOAD_RETIRED.L1D_MISS +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L1_TCM,DERIVED_ADD,MEM_LOAD_RETIRED.L1D_MISS, L1I_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,MEM_LOAD_RETIRED.L2_MISS + +CPU,INTELCOREI7 +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ALL_BRANCHES +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.CORE +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR.RETIRED_ANY +PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INT_RCV +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_MISP_EXEC_TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_EXEC_ANY +PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES_ANY +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB_MISSES_ANY +PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_ALL_REF_ANY +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS +PRESET,PAPI_L1_TCA, DERIVED_ADD, L1D_ALL_REF_ANY, L1I_READS +# PAPI_L2_ICH seems not to work +PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L2_DCH,NOT_DERIVED,MEM_LOAD_RETIRED.L2_HIT +PRESET,PAPI_FP_INS,NOT_DERIVED,INST_RETIRED.X87 +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_PREFETCH_MISS +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_PREFETCH_MISS, L1I_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS_MISS + +CPU,INTEL_WESTMERE +PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED.ALL_BRANCHES +PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS.ANY +PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED.CORE +PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTR.RETIRED_ANY +PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_MISP_EXEC_TAKEN +PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_EXEC_ANY +PRESET,PAPI_TLB_DM,NOT_DERIVED,DTLB_MISSES.ANY +PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES_ANY +PRESET,PAPI_TLB_TL,DERIVED_ADD,DTLB_MISSES.ANY, ITLB_MISSES_ANY +PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_READS +# PAPI_L2_ICH seems not to work +PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_IFETCH +PRESET,PAPI_L2_DCH,NOT_DERIVED,MEM_LOAD_RETIRED.L2_HIT +PRESET,PAPI_FP_INS,NOT_DERIVED,INST_RETIRED.X87 +PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_PREFETCH_MISS +PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_MISSES +PRESET,PAPI_L1_TCM, DERIVED_ADD, L1D_PREFETCH_MISS, L1I_MISSES +PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS_MISS + + diff --git a/src/ftests/Makefile b/src/ftests/Makefile new file mode 100644 index 0000000..315e1e7 --- /dev/null +++ b/src/ftests/Makefile @@ -0,0 +1,31 @@ +# File: ftests/Makefile + +include Makefile.target + +INCLUDE = -I../testlib -I. -I.. +FFLAGS = $(CFLAGS) -ffixed-line-length-132 +testlibdir=../testlib +TESTLIB= $(testlibdir)/libtestlib.a +DOLOOPS= $(testlibdir)/do_loops.o + +ifneq ($(F77),) +include Makefile.recipies + +install: default + @echo "Fortran tests (DATADIR) being installed in: \"$(DATADIR)\""; + -mkdir -p $(DATADIR)/ftests + -chmod go+rx $(DATADIR) + -chmod go+rx $(DATADIR)/ftests + -find . -perm -100 -type f -exec cp {} $(DATADIR)/ftests \; + -chmod go+rx $(DATADIR)/ftests/* + -find . -name "*.[Ffh]" -type f -exec cp {} $(DATADIR)/ftests \; + -cp Makefile.target $(DATADIR)/ftests/Makefile + -cat Makefile.recipies >> $(DATADIR)/ftests/Makefile + +else +all: + @echo "Install Fortran compiler to build and run Fortran tests" + +install: + @echo "No Fortran tests to install." +endif diff --git a/src/ftests/Makefile.recipies b/src/ftests/Makefile.recipies new file mode 100644 index 0000000..9791db2 --- /dev/null +++ b/src/ftests/Makefile.recipies @@ -0,0 +1,82 @@ +ALL = strtest zero zeronamed first second tenth description fdmemtest accum highlevel cost \ + case1 case2 clockres eventname fmatrixlowpapi fmatrixpapi fmatrixpapi2 flops fmultiplex1 \ + johnmay2 fmultiplex2 avail + +.PHONY : all default ftests ftest clean install + +all default ftests ftest: $(ALL) + +clockres: clockres.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) clockres.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o clockres + +avail: avail.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) avail.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o avail + +eventname: eventname.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) eventname.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o eventname + +case1: case1.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) case1.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o case1 + +case2: case2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) case2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o case2 + +fdmemtest: fdmemtest.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fdmemtest.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o fdmemtest + +fmatrixlowpapi: fmatrixlowpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixlowpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixlowpapi + +fmatrixpapi: fmatrixpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixpapi.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixpapi + +fmatrixpapi2: fmatrixpapi2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmatrixpapi2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o fmatrixpapi2 + +strtest: strtest.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) strtest.F $(TESTLIB) $(PAPILIB) $(LDFLAGS) -o strtest + +flops: flops.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) flops.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o flops + +description: description.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) description.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) $(LDFLAGS) -o description + +accum: accum.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) accum.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o accum $(LDFLAGS) + +highlevel: highlevel.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) highlevel.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o highlevel $(LDFLAGS) + +zero: zero.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) zero.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o zero $(LDFLAGS) + +zeronamed: zeronamed.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) zeronamed.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o zeronamed $(LDFLAGS) + +first: first.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) first.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o first $(LDFLAGS) + +second: second.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) second.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o second $(LDFLAGS) + +tenth: tenth.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) tenth.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o tenth $(LDFLAGS) + +cost: cost.F $(TESTLIB) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) cost.F $(TESTLIB) $(PAPILIB) -o cost $(LDFLAGS) + +johnmay2: johnmay2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) johnmay2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o johnmay2 $(LDFLAGS) + +fmultiplex1: fmultiplex1.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmultiplex1.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o fmultiplex1 $(LDFLAGS) + +fmultiplex2: fmultiplex2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) + $(F77) $(INCLUDE) $(FFLAGS) $(FTOPTFLAGS) fmultiplex2.F $(TESTLIB) $(DOLOOPS) $(PAPILIB) -o fmultiplex2 $(LDFLAGS) + +clean: + rm -f *.o *genmod.f90 *genmod.mod *.stderr *.stdout core *~ $(ALL) + +distclean clobber: clean + rm -f Makefile.target diff --git a/src/ftests/Makefile.target.in b/src/ftests/Makefile.target.in new file mode 100644 index 0000000..718586e --- /dev/null +++ b/src/ftests/Makefile.target.in @@ -0,0 +1,23 @@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +datarootdir = @datarootdir@ +datadir = @datadir@/${PACKAGE_TARNAME} +testlibdir = $(datadir)/testlib +DATADIR = $(DESTDIR)$(datadir) +INCLUDE = -I. -I@includedir@ -I$(testlibdir) +LIBDIR = @libdir@ +LIBRARY = @LIBRARY@ +SHLIB=@SHLIB@ +PAPILIB = ../@LINKLIB@ +TESTLIB = $(testlibdir)/libtestlib.a +LDFLAGS = @LDL@ +CC = @CC@ +F77 = @F77@ +CC_R = @CC_R@ +CFLAGS = @CFLAGS@ +OMPCFLGS = @OMPCFLGS@ +FFLAGS = @FFLAGS@ +TOPTFLAGS= @TOPTFLAGS@ +FTOPTFLAGS= @TOPTFLAGS@ + diff --git a/src/ftests/accum.F b/src/ftests/accum.F new file mode 100644 index 0000000..48fdba8 --- /dev/null +++ b/src/ftests/accum.F @@ -0,0 +1,141 @@ +#include "fpapi_test.h" + + program accum + implicit integer (p) + + integer es1, number, i + integer*8 values(10) + integer events(2) + character*PAPI_MAX_STR_LEN name + integer retval + integer tests_quiet, get_quiet + external get_quiet + integer last_char, n + external last_char + + tests_quiet = get_quiet() + es1 = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_create_eventset(es1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + number=2 + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + events(1) = PAPI_TOT_INS + else + events(1) = PAPI_FP_INS + end if + + events(2) = PAPI_TOT_CYC + call PAPIf_add_events( es1, events, number, retval ) + if ( retval.LT.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_events', retval) + end if + + do i=1,10 + values(i)=0 + end do + + call PAPIf_start(es1, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_accum(es1, values(7), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_accum', retval) + end if + + values(1)=values(7) + values(2)=values(8) + call PAPIf_stop(es1, values(3), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_start(es1, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + call PAPIf_accum(es1, values(7), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_accum', retval) + end if + + values(5)=values(7) + values(6)=values(8) + + call fdo_flops(NUM_FLOPS) + + call PAPIf_accum(es1, values(7), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_accum', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es1, values(9), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_remove_events( es1, events, number, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_events', retval) + end if + + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (events(1), name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + n=last_char(name) + print *, "Test case accum: Test of PAPI_add_events, ", + * "PAPI_remove_events, PAPI_accum" + print *, "------------------------------------------", + * "------------------------" + write (*,100) "Test type", 1, 2, 3, 4, 5 + write (*,100) name(1:n), values(1), values(3), + * values(5), values(7), values(9) + write (*,100) "PAPI_TOT_CYC", values(2), values(4), + * values(6), values(8), values(10) + print *, "------------------------------------------", + * "------------------------" + 100 format(a15, ":", i10, i10, i10, i10, i10) + print * + print *, "Verification:" + print *, "Column 2 approximately equals to 0;" + print *, "Column 3 approximately equals 2 * Column 1;" + print *, "Column 4 approximately equals 3 * Column 1;" + print *, "Column 5 approximately equals Column 1." + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/avail.F b/src/ftests/avail.F new file mode 100644 index 0000000..ecb242f --- /dev/null +++ b/src/ftests/avail.F @@ -0,0 +1,88 @@ +C This file performs the following tests: +C Hardware info +#include "fpapi_test.h" + + program avail + IMPLICIT integer (p) + + INTEGER ncpu,nnodes,totalcpus,vendor,model, check, handle, n + CHARACTER*(PAPI_MAX_STR_LEN) vstring, mstring + REAL revision, mhz + integer last_char + external last_char + integer i, avail_flag, flags,k,l + CHARACTER*(PAPI_MAX_STR_LEN) event_name, event_descr, + *event_label, event_note + CHARACTER*(10) avail_str, flags_str + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + handle=0 + check = PAPI_VER_CURRENT + call PAPIf_library_init(check) + if ( check.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', check) + end if + + call PAPIf_get_hardware_info( ncpu,nnodes,totalcpus,vendor, + . vstring, model, mstring, revision, mhz ) + + if (tests_quiet .EQ. 0) then + print *, 'Hardware information and available events' + print *, '--------------------------------------'// + .'---------------------------------------' + n=last_char(vstring) + print *, 'Vendor string and code : ',vstring(1:n), + &' (',vendor,')' + n=last_char(mstring) + print *, 'Model string and code : ',mstring(1:n),' (',model,')' + print *, 'CPU revision : ',revision + print *, 'CPU Megahertz : ',mhz + print *, 'CPUs in an SMP node : ',ncpu + print *, 'Nodes in the system : ',nnodes + print *, 'Total CPUs in the system : ',totalcpus + print *, '--------------------------------------'// + .'---------------------------------------' + + write (*,200) 'Name', 'Code', 'Avail', 'Deriv', + *'Description', '(note)' + 200 format(A8, A12, A9, A6, A25, A30) + end if + event_name=' ' + do i=0, PAPI_MAX_PRESET_EVENTS-1 +C PAPI_L1_DCM is the first event in the list + call papif_get_event_info(PAPI_L1_DCM+i, event_name, + * event_descr, event_label, avail_flag, event_note, flags, check) + if (avail_flag.EQ.1) then + avail_str = 'Yes' + else + avail_str = 'No' + end if + + if (flags.EQ.1) then + flags_str = 'Yes' + else + flags_str = 'No' + end if + + if (check.EQ.PAPI_OK .and. tests_quiet .EQ. 0) then + l=1 + do k=len(event_note),1,-1 + if(l.EQ.1.AND.event_note(k:k).NE.' ') l=k + end do +C PAPI_L1_DCM is the first event in the list + write (6, 100) event_name, PAPI_L1_DCM+i, avail_str, + * flags_str, event_descr, event_note(1:l) + 100 format(A12, '0x', z8, 2x, A5, 1x, A5, A45, 1x,'(', A, ')') + end if + end do + if (tests_quiet .EQ. 0) then + print *, '--------------------------------------'// + .'---------------------------------------' + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/case1.F b/src/ftests/case1.F new file mode 100644 index 0000000..c76dbad --- /dev/null +++ b/src/ftests/case1.F @@ -0,0 +1,108 @@ +C From Dave McNamara at PSRV. Thanks! +C Ported to Fortran by Kevin London +C If you try to add an event that doesn't exist, you get the correct error +C message, yet you get subsequent Seg. Faults when you try to do PAPI_start +C and PAPI_stop. I would expect some bizarre behavior if I had no events +C added to the event set and then tried to PAPI_start but if I had +C successfully added one event, then the 2nd one get an error when I +C tried to add it, is it possible for PAPI_start to work but just +C count the first event? + +#include "fpapi_test.h" + + program case1 + IMPLICIT integer (p) + + INTEGER EventSet + INTEGER retval + INTEGER i,j + INTEGER*8 gl(2) + INTEGER n + REAL c,a,b + + INTEGER last_char + EXTERNAL last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + n = 1000 + a = 0.999 + b = 1.001 + j = 0 + i = 0 + EventSet = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init( retval ) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_create_eventset( EventSet, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_query_event(PAPI_L2_TCM, retval) + if (retval .EQ. PAPI_OK) then + j = j + 1 + end if + if (j .NE. 0) then + call PAPIf_add_event( EventSet, PAPI_L2_TCM, retval ) + if (retval .NE. PAPI_OK) then + if (retval .NE. PAPI_ECNFLCT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', + *retval) + else + j = j - 1 + end if + end if + end if + i = j + + call PAPIf_query_event(PAPI_L2_DCM, retval) + if (retval .EQ. PAPI_OK) then + j = j + 1 + end if + if (j .EQ. i+1) then + call PAPIf_add_event( EventSet, PAPI_L2_DCM, retval ) + if (retval .NE. PAPI_OK) then + if (retval .NE. PAPI_ECNFLCT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', + *retval) + else + j = j - 1 + end if + end if + end if + + if (J .GT. 0) then + call PAPIf_start( EventSet, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + end if + + + do i=1, n + c = a * b + end do + + if (j .GT. 0) then + call PAPIf_stop( EventSet, gl, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/case2.F b/src/ftests/case2.F new file mode 100644 index 0000000..859751e --- /dev/null +++ b/src/ftests/case2.F @@ -0,0 +1,119 @@ +C From Dave McNamara at PSRV. Thanks! +C Ported to fortran by Kevin London +C If an event is countable but you've exhausted the counter resources +C and you try to add an event, it seems subsequent PAPI_start and/or +C PAPI_stop will causes a Seg. Violation. + +C I got around this by calling PAPI to get the # of countable events, +C then making sure that I didn't try to add more than these number of +C events. I still have a problem if someone adds Level 2 cache misses +C and then adds FLOPS 'cause I didn't count FLOPS as actually requiring +C 2 counters. + +#include "fpapi_test.h" + + program case2 + IMPLICIT integer (p) + + REAL c,a,b + INTEGER n + INTEGER EventSet + INTEGER retval + INTEGER I,j + INTEGER*8 gl(3) + + INTEGER last_char + EXTERNAL last_char + + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + a=0.999 + b=1.001 + n=1000 + i=0 + j=0 + EventSet = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init( retval ) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_create_eventset( EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_query_event(PAPI_BR_CN, retval) + if (retval .EQ. PAPI_OK) then + j = j + 1 + end if + if (j .NE. 0) then + call PAPIf_add_event( EventSet, PAPI_BR_CN, retval ) + if ( retval .NE. PAPI_OK ) then + if (tests_quiet .EQ. 0) then + call PAPIf_perror( 'PAPIf_add_event' ) + endif + end if + end if + + i = j + + call PAPIf_query_event(PAPI_TOT_CYC, retval) + if (retval .EQ. PAPI_OK) then + j = j + 1 + end if + if (j .EQ. i+1) then + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK )then + if (tests_quiet .EQ. 0) then + call PAPIf_perror( 'PAPIf_add_event' ) + end if + end if + end if + + i = j + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .EQ. PAPI_OK) then + j = j + 1 + end if + if (j .EQ. i+1) then + call PAPIf_add_event(EventSet,PAPI_TOT_INS,retval) + if ( retval .NE. PAPI_OK )then + if ( retval .NE. PAPI_ECNFLCT ) then + if (tests_quiet .EQ. 0) then + call PAPIf_perror( 'PAPIf_add_event' ) + end if + end if + end if + end if + + if (J .GT. 0) then + call PAPIf_start(EventSet, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + end if + + do i=1,n + c = a * b + end do + + if (J .GT. 0) then + call PAPIf_stop( EventSet, gl, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/clockres.F b/src/ftests/clockres.F new file mode 100644 index 0000000..b453b13 --- /dev/null +++ b/src/ftests/clockres.F @@ -0,0 +1,56 @@ +#include "fpapi_test.h" +#define ITERS 100000 + + program clockres + IMPLICIT integer (p) + + INTEGER*8 elapsed_usec(ITERS), elapsed_cyc(ITERS) + INTEGER*8 total_usec, total_cyc + INTEGER i,handle + INTEGER retval + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + total_usec=0 + total_cyc=0 + handle=0 + + retval = PAPI_VER_CURRENT + call PAPIf_library_init( retval ) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, 'Test case: Clock resolution.' + print *,'-----------------------------------------------' + end if + + do i=1,ITERS + call PAPIf_get_real_cyc( elapsed_cyc(i) ) + end do + + do i=2,ITERS + if ((elapsed_cyc(i)-elapsed_cyc(i-1)).LT.0 ) stop + total_cyc =total_cyc+(elapsed_cyc(i) - elapsed_cyc(i-1)) + end do + + do i=1,ITERS + call PAPIf_get_real_usec(elapsed_usec(i)) + end do + + do i=2,ITERS + if ((elapsed_usec(i) - elapsed_usec(i-1)).LT.0) stop + total_usec=total_usec+(elapsed_usec(i) - elapsed_usec(i-1)) + end do + + if (tests_quiet .EQ. 0) then + print *,'PAPIf_get_real_cyc : ',(total_cyc/(ITERS-1)) + print *,'PAPIf_get_real_usec: ',(total_usec/(ITERS-1)) + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/cost.F b/src/ftests/cost.F new file mode 100644 index 0000000..e8c7029 --- /dev/null +++ b/src/ftests/cost.F @@ -0,0 +1,143 @@ +#include "fpapi_test.h" + + program cost + implicit integer (p) + + integer es + integer*8 values(10) + integer*8 ototcyc, ntotcyc + integer*4 i + integer retval + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es = PAPI_NULL + + if (tests_quiet .EQ. 0) then + print *, "Cost of execution for PAPI start/stop", + *" and PAPI read." + print *, "This test takes a while. Please be patient..." + end if + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_TOT_CYC, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_query_event', retval) + end if + + call PAPIf_query_event(PAPI_TOT_INS, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_query_event', retval) + end if + + call PAPIf_create_eventset(es, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "Performing start/stop test..." + end if + + call PAPIf_start(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call PAPIf_stop(es, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_get_real_cyc(ototcyc) + + do i=0, 50000 + call PAPIf_start(es, retval) + call PAPIf_stop(es, values(1), retval) + end do + + call PAPIf_get_real_cyc(ntotcyc) + ntotcyc=ntotcyc-ototcyc + + if (tests_quiet .EQ. 0) then + print * + print * + print *, "Total cost for PAPI_start/stop(2 counters) over", + *" 50000 iterations:" + write (*, 100) ntotcyc, "total cyc" + write (*, 200) REAL(ntotcyc)/50001.0, "cyc/call pair" + print * + print * + +C Start the read val + + print *, "Performing read test..." + end if + + call PAPIf_start(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call PAPIf_get_real_cyc(ototcyc) + + do i=0, 50000 + call PAPIf_read(es, values(1), retval) + end do + + call PAPIf_stop(es, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_get_real_cyc(ntotcyc) + + + ntotcyc=ntotcyc-ototcyc + + if (tests_quiet .EQ. 0) then + print * + print *, "User level cost for PAPI_read(2 counters) over", + *" 50000 iterations:" + print * + print *, "Total cost for PAPI_read(2 counters) over ", + *"50000 iterations:" + write (*, 100) ntotcyc, "total cyc" + write (*, 200) REAL(ntotcyc)/50001.0, "cyc/call" + end if + + 100 format (I15, A15) + 200 format (F15.6, A15) + call ftests_pass(__FILE__) + end + diff --git a/src/ftests/description.F b/src/ftests/description.F new file mode 100644 index 0000000..c814509 --- /dev/null +++ b/src/ftests/description.F @@ -0,0 +1,185 @@ +#include "fpapi_test.h" + + program description + implicit integer (p) + + integer es1, number + integer*8 values(10) + integer events(2), eventlist(2) + integer eventtotal + integer i + character*PAPI_MAX_STR_LEN name + integer status + integer retval + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es1 = PAPI_NULL + + if (tests_quiet .EQ. 0) then + print *, "Test case descriptions: Test of functions:" + print *, " PAPI_add_events, PAPI_remove_events," + print *, " PAPI_list_events, PAPI_describe_event," + print *, " PAPI_state" + end if + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + *'PAPI_library_init', retval) + end if + + call PAPIf_create_eventset(es1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + number=2 + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + events(1) = PAPI_TOT_INS + else + events(1) = PAPI_FP_INS + end if + events(2) = PAPI_TOT_CYC + + call PAPIf_add_events( es1, events, number, retval ) + if ( retval.LT.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_add_event', retval) + end if + + eventtotal=5 + call PAPIf_list_events(es1, eventlist, eventtotal, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_list_events', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, " " + print *, "Event List:" + print *, "---------------------------------------", + * "---------------------------" + print *, "Event Name Code" + end if + do i = 1, eventtotal + call PAPIf_event_code_to_name (eventlist(i), name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_event_code_to_name', retval) + end if + if (tests_quiet .EQ. 0) then + write (*, 100) name, eventlist(i) + end if + 100 format(A12,O12) + end do + if (tests_quiet .EQ. 0) then + print *, "---------------------------------------", + *"---------------------------" + end if + + call PAPIf_state(es1, status, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_state', retval) + end if + + if (status .NE. PAPI_STOPPED) then + print *, "PAPI_state Error" + stop + end if + if (tests_quiet .EQ. 0) then + print *, "PAPI_state: PAPI_STOPPED" + end if + + call PAPIf_start(es1, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + if (tests_quiet .EQ. 0) then + print *, "PAPI_start" + end if + + call PAPIf_state(es1, status, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_state', retval) + end if + + if (status .NE. PAPI_RUNNING) then + print *, "PAPI_state Error" + stop + end if + if (tests_quiet .EQ. 0) then + print *, "PAPI_state: PAPI_RUNNING" + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es1, values, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "PAPI_stop" + end if + + call PAPIf_state(es1, status, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_state', retval) + end if + + if (status .NE. PAPI_STOPPED) then + print *, "PAPI_state Error" + stop + end if + if (tests_quiet .EQ. 0) then + print *, "PAPI_state: PAPI_STOPPED" + end if + + call PAPIf_remove_events( es1, events, number, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_remove_events', retval) + end if + + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (eventlist(1), name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + print *, " " + print *, "Results:" + print *, "---------------------------------------", + * "---------------------------" + print *, "Test type : 1" + print *, name, " : ", values(1) + print *, "PAPI_TOT_CYC : ", values(2) + print *, "---------------------------------------", + * "---------------------------" + + print *, " " + print *, "Verification:" + print *, "1. The events listed by PAPI_describe_event", + * "should be exactly the same events added by PAPI_add_events." + print *, "2. The PAPI_state should be PAPI_RUNNING after ", + * "PAPI_start and before PAPI_stop." + print *, "It should be PAPI_STOPPED at other time." + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/eventname.F b/src/ftests/eventname.F new file mode 100644 index 0000000..2204db1 --- /dev/null +++ b/src/ftests/eventname.F @@ -0,0 +1,34 @@ +#include "fpapi_test.h" + + program eventname + IMPLICIT integer (p) + + INTEGER retval, handle + INTEGER preset + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + handle = 0 + retval = PAPI_VER_CURRENT + call PAPIf_library_init( retval ) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_event_name_to_code( 'PAPI_FP_INS',preset,retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_event_name_to_code', + *retval) + end if + + if (tests_quiet .EQ. 0) then + write (*, 100) preset + 100 format ('PAPI_FP_INS code is', Z10) + end if + call ftests_pass(__FILE__) + end + diff --git a/src/ftests/fdmemtest.F b/src/ftests/fdmemtest.F new file mode 100644 index 0000000..5312097 --- /dev/null +++ b/src/ftests/fdmemtest.F @@ -0,0 +1,41 @@ +#include "fpapi_test.h" + program dmemtest + IMPLICIT integer (p) + + INTEGER retval + INTEGER*8 dmeminfo(PAPIF_DMEM_MAXVAL) + + integer tests_quiet, get_quiet + external get_quiet + real EventSet + + tests_quiet = get_quiet() + EventSet = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + + CALL PAPIf_get_dmem_info(dmeminfo, retval) + if ( retval.NE.PAPI_OK) then + stop + end if + + if (tests_quiet .EQ. 0) then + print *, "Mem Size: ", dmeminfo(PAPIF_DMEM_VMSIZE) + print *, "Mem Resident: ", dmeminfo(PAPIF_DMEM_RESIDENT) + print *, "Mem High Water: ", dmeminfo(PAPIF_DMEM_HIGH_WATER) + print *, "Mem Shared: ", dmeminfo(PAPIF_DMEM_SHARED) + print *, "Mem Text: ", dmeminfo(PAPIF_DMEM_TEXT) + print *, "Mem Library: ", dmeminfo(PAPIF_DMEM_LIBRARY) + print *, "Mem Heap: ", dmeminfo(PAPIF_DMEM_HEAP) + print *, "Mem Locked: ", dmeminfo(PAPIF_DMEM_LOCKED) + print *, "Mem Stack: ", dmeminfo(PAPIF_DMEM_STACK) + print *, "Mem Pagesize: ", dmeminfo(PAPIF_DMEM_PAGESIZE) + end if + call ftests_pass(__FILE__) + end diff --git a/src/ftests/first.F b/src/ftests/first.F new file mode 100644 index 0000000..fb8d3af --- /dev/null +++ b/src/ftests/first.F @@ -0,0 +1,191 @@ +#include "fpapi_test.h" + + program first + IMPLICIT integer (p) + + integer event1 + INTEGER retval + INTEGER*8 values(10) + INTEGER*8 max, min + INTEGER EventSet + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + character*(PAPI_MAX_STR_LEN) name + + Integer last_char, n + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + EventSet = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + event1 = PAPI_TOT_INS + else + event1 = PAPI_FP_INS + end if + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( EventSet, event1, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', + *retval) + end if + + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', + *retval) + end if + + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_read(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_read', retval) + end if + + call PAPIf_reset(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_reset', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_read(EventSet, values(3), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_read', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_read(EventSet, values(5), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_read', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(EventSet, values(7), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_read(EventSet, values(9), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_read', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, 'TEST CASE 1: Non-overlapping start, stop, read.' + print *, '--------------------------------------------------'// + * '--------------------------------' + end if + + call PAPIf_get_domain(EventSet, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,900) 'Default domain is:', domain, domainstr + 900 format(a20, i3, ' ', a70) + end if + + call PAPIf_get_granularity(eventset, granularity, PAPI_DEFGRN, + *retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + *retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (event1, name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + n=last_char(name) + write (*,800) 'Default granularity is:', granularity, grnstr + 800 format(a25, i3, ' ', a20) + + print *, 'Using', NUM_FLOPS, ' iterations of c += b*c' + print *, '-----------------------------------------------'// + * '-----------------------------------' + + write (*,100) 'Test type', 1, 2, 3, 4, 5 + write (*,100) name(1:n), values(1), values(3), + * values(5), values(7), values(9) + + write (*,100) 'PAPI_TOT_CYC', values(2), values(4), + * values(6), values(8), values(10) + + 100 format(a13, ': ', i11, i11, i11, i11, i11) + print *, '-----------------------------------------------'// + * '-----------------------------------' + + + print *, 'Verification:' + print *, 'Column 1 approximately equals column 2' + print *, 'Column 3 approximately equals 2 * column 2' + print *, 'Column 4 approximately equals 3 * column 2' + print *, 'Column 4 exactly equals column 5' + end if + + min = INT(REAL(values(3))*0.8) + max = INT(REAL(values(3))*1.2) + + if ((values(1).gt.max) .OR. (values(1).lt.min) .OR. + *(values(5).gt.(max*2)) .OR. (values(5).lt.(min*2)) .OR. + *(values(7).gt.(max*3)) .OR. (values(7).lt.(min*3)) .OR. + *(values(7).NE.values(9))) then + call ftest_fail(__FILE__, __LINE__, + . name, 1) + end if + + min = INT(REAL(values(4))*0.65) + max = INT(REAL(values(4))*1.35) + if ((values(2).gt.max) .OR. (values(2).lt.min) .OR. + *(values(6).gt.(max*2)) .OR. (values(6).lt.(min*2)) .OR. + *(values(8).gt.(max*3)) .OR. (values(8).lt.(min*3)) .OR. + *(values(8).NE.values(10))) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_TOT_CYC', 1) + end if + + call ftests_pass(__FILE__) + + end diff --git a/src/ftests/flops.F b/src/ftests/flops.F new file mode 100644 index 0000000..a693046 --- /dev/null +++ b/src/ftests/flops.F @@ -0,0 +1,73 @@ +C A simple example for the use of PAPI, the number of flops you should +C get is about INDEX^3 on machines that consider add and multiply one flop +C such as SGI, and 2*(INDEX^3) that don't consider it 1 flop such as INTEL +C -Kevin London + +#include "fpapi_test.h" + + program flops + implicit integer (p) + integer index + + PARAMETER(index=100) + REAL*4 matrixa(index,index),matrixb(index,index),mres(index,index) + REAL*4 proc_time, mflops, real_time + INTEGER*8 flpins + INTEGER i,j,k, retval + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_INS', PAPI_ENOEVNT) + end if + +C Initialize the Matrix arrays + do i=1,index + do j=1,index + matrixa(i,j) = i+j + matrixb(i,j) = j-i + mres(i,j) = 0.0 + end do + end do + +C Setup PAPI library and begin collecting data from the counters + call PAPIf_flips( real_time, proc_time, flpins, mflops, retval ) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) + end if + +C Matrix-Matrix Multiply + do i=1,index + do j=1,index + do k=1,index + mres(i,j) = mres(i,j) + matrixa(i,k)*matrixb(k,j) + end do + end do + end do + +C Collect the data into the Variables passed in + call PAPIf_flips( real_time, proc_time, flpins, mflops, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, 'PAPIf_flips', retval) + end if + if (tests_quiet .EQ. 0) then + print *, 'Real_time: ', real_time + print *, ' Proc_time: ', proc_time + print *, ' Total flpins: ', flpins + print *, ' MFLOPS: ', mflops + end if + call dummy(mres) + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/fmatrixlowpapi.F b/src/ftests/fmatrixlowpapi.F new file mode 100644 index 0000000..e4d617d --- /dev/null +++ b/src/ftests/fmatrixlowpapi.F @@ -0,0 +1,173 @@ +C **************************************************************************** +C +C matrixpapi.f +C An example of matrix-matrix multiplication and using PAPI low level to +C look at the performance. written by Kevin London +C March 2000 +C **************************************************************************** + +#include "fpapi_test.h" + + program fmatrixlowpapi + implicit integer (p) + + INTEGER ncols1,nrows1,ncols2,nrows2 + PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) + INTEGER i,j,k,retval,nchr,numevents,EventSet + CHARACTER*(PAPI_MAX_STR_LEN) vstring,mstring +C PAPI values of the counters + INTEGER event + INTEGER*8 values(2) + INTEGER*8 starttime,stoptime + REAL*8 finaltime + INTEGER ncpu,nnodes,totalcpus,vendor,model + REAL revision, mhz + REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), + & r(nrows1,ncols2) + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + EventSet = PAPI_NULL + +C Setup default values + numevents=0 + starttime=0 + stoptime=0 + + retval = PAPI_VER_CURRENT + call PAPIf_library_init( retval ) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + *'PAPI_library_init', retval) + end if + +C Create the eventset + call PAPIf_create_eventset(EventSet,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_create_eventset', retval) + end if + +C Total cycles + call PAPIf_add_event(EventSet,PAPI_TOT_CYC,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_add_event PAPI_TOT_CYC', retval) + end if + +C Total [floating point] instructions + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + event = PAPI_TOT_INS + else + event = PAPI_FP_INS + end if + + call PAPIf_add_event(EventSet,event,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_add_event PAPI_TOT_INS', retval) + end if + +C Grab the hardware info + call PAPIf_get_hardware_info( ncpu, nnodes, totalcpus, vendor, + . vstring, model, mstring, revision, mhz ) + do i=len(mstring),1,-1 + if(mstring(i:i).NE.' ') goto 10 + end do + 10 if(i.LT.1)then + nchr=1 + else + nchr=i + end if + + if (tests_quiet .EQ. 0) then + print * + print 100, totalcpus,mstring(1:nchr), mhz + print * + print 101,'ncpu',ncpu, 'nnodes',nnodes, 'totalcpus',totalcpus + print 102,'mhz',mhz,'revision',revision + print 103,'vendor',vendor,'vstring',vstring + print 104,'model',model,'mstring',mstring + print * + end if + 100 format(i5,' CPU(s) ',a,' at ',f7.2,' MHz') + 101 format(a9,' =',i6,7x,a9,' =',i5,5x,a9,'=',i5) + 102 format(a9,' =',f7.2,6x,a9,' =',f15.5) + 103 format(a9,' =',i6,7x,a9,' =',a40) + 104 format(a9,' =',i6,7x,a9,' =',a40) +C Open matrix file number 1 for reading +C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') +C Open matrix file number 2 for reading +C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') + +C matrix 1: read in the matrix values + do i=1, nrows1 + do j=1,ncols1 + p(i,j) = i*j*1.0 + end do + end do + +C matrix 2: read in the matrix values + do i=1, nrows2 + do j=1,ncols2 + q(i,j) = i*j*1.0 + end do + end do + +C Initialize the result matrix + do i=1,nrows1 + do j=1, ncols2 + r(i,j) = i*j*1.0 + end do + end do + +C Grab the beginning time + call PAPIf_get_real_usec( starttime ) + +C Start the event counters + call PAPIf_start( EventSet, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + +C Compute the matrix-matrix multiplication + do i=1,nrows1 + do j=1,ncols2 + do k=1,ncols1 + r(i,j)=r(i,j) + p(i,k)*q(k,j) + end do + end do + end do + +C Stop the counters and put the results in the array values + call PAPIf_stop(EventSet,values,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_get_real_usec( stoptime ) + finaltime=(REAL(stoptime)/1000000.0)-(REAL(starttime)/1000000.0) + +C Make sure the compiler does not optimize away the multiplication + call dummy(r) + + if (tests_quiet .EQ. 0) then + print *, 'Time: ', finaltime, 'seconds' + print *, 'Cycles: ', values(1) + + if (event .EQ. PAPI_TOT_INS) then + print *, 'Total Instructions: ', values(2) + else + print *, 'FP Instructions: ', values(2) + write(*,'(a,f9.6)') ' Efficiency (fp/cycle):', + & real(values(2))/real(values(1)) + end if + end if + + call ftests_pass(__FILE__) + end + diff --git a/src/ftests/fmatrixpapi.F b/src/ftests/fmatrixpapi.F new file mode 100644 index 0000000..f04206f --- /dev/null +++ b/src/ftests/fmatrixpapi.F @@ -0,0 +1,146 @@ +C**************************************************************************** +C +C fmatrixpapi.f +C An example of matrix-matrix multiplication and using PAPI high level to +C look at the performance. written by Kevin London +C March 2000 +C**************************************************************************** + +#include "fpapi_test.h" + + program fmatrixpapi + IMPLICIT integer (p) + + INTEGER ncols1,nrows1,ncols2,nrows2 + PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) + INTEGER i,j,k,num_events,retval +C PAPI standardized event to be monitored + INTEGER event(2) +C PAPI values of the counters + INTEGER*8 values(2) + REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), + & r(nrows1,ncols2),tmp + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + +C Setup default values + num_events=0 + +C Open matrix file number 1 for reading +C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') +C Open matrix file number 2 for reading +C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') + +C See how many hardware events at one time are supported +C This also initializes the PAPI library + call PAPIf_num_counters( num_events ) + if ( num_events .LT. 2 ) then + print *,'This example program requries the architecture to ', + . 'support 2 simultaneous hardware events...shutting down.' + call ftest_skip(__FILE__, __LINE__, + * 'too few counters', num_events) + end if + + if (tests_quiet .EQ. 0) then + print *, 'Number of hardware counters supported: ', num_events + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + event(1) = PAPI_TOT_INS + else +C Total floating point operations + event(1) = PAPI_FP_INS + end if + +C Time used + event(2) = PAPI_TOT_CYC + +C matrix 1: read in the matrix values + do i=1, nrows1 + do j=1,ncols1 + p(i,j) = i*j*1.0 + end do + end do + +C matrix 2: read in the matrix values + do i=1, nrows2 + do j=1,ncols2 + q(i,j) = i*j*1.0 + end do + end do + +C Initialize the result matrix + do i=1,nrows1 + do j=1, ncols2 + r(i,j) = i*j*1.0 + end do + end do + +C Set up the counters + num_events = 2 + call PAPIf_start_counters( event, num_events, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_start_counters', retval) + end if + +C Clear the counter values + call PAPIf_read_counters(values, num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_read_counters', retval) + end if + +C Compute the matrix-matrix multiplication + do i=1,nrows1 + do j=1,ncols2 + do k=1,ncols1 + r(i,j)=r(i,j) + p(i,k)*q(k,j) + end do + end do + end do + +C Stop the counters and put the results in the array values + call PAPIf_stop_counters(values,num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_stop_counters', retval) + end if + +C Make sure the compiler does not optimize away the multiplication + call dummy(r) + + if (tests_quiet .EQ. 0) then + + if (event(1) .EQ. PAPI_TOT_INS) then + print *, 'TOT Instructions: ',values(1) + else + print *, 'FP Instructions: ',values(1) + end if + + print *, 'Cycles: ',values(2) + + if (event(1) .EQ. PAPI_FP_INS) then + write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', + & real(values(1))/real(values(2)) +C Compare measured FLOPS to expected value + tmp=2.0*real(nrows1)*real(ncols2)*real(ncols1) + if(abs(values(1)-tmp).gt.tmp*0.05)then +C Maybe we are counting FMAs? + tmp=tmp/2.0 + if(abs(values(1)-tmp).gt.tmp*0.05)then + print *,'Expected operation count:',2.0*tmp + print *,'Or possibly (using FMA): ',tmp + print *,'Instead I got: ',values(1) + call ftest_fail(__FILE__, __LINE__, + * 'Unexpected FLOP count (check vector operations)', 1) + end if + end if + end if + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/fmatrixpapi2.F b/src/ftests/fmatrixpapi2.F new file mode 100644 index 0000000..b379574 --- /dev/null +++ b/src/ftests/fmatrixpapi2.F @@ -0,0 +1,195 @@ +C **************************************************************************** +C +C fmatrixpapi2.f +C An example of matrix-matrix multiplication and using PAPI high level to +C look at the performance. The example illustrates how PAPIF_read_counters +C and PAPIF_accum_counters can be used to selectively measure parts of a +C code without having to use the low-level interface. +C +C Derived from an example written by Kevin London March 2000 +C **************************************************************************** + +#include "fpapi_test.h" + + program fmatrixpapi + IMPLICIT integer (p) + + INTEGER ncols1,nrows1,ncols2,nrows2 + PARAMETER(nrows1=175,ncols1=225,nrows2=ncols1,ncols2=150) + INTEGER i,j,num_events,retval +C PAPI standardized event to be monitored + INTEGER event(2) +C PAPI values of the counters + INTEGER*8 values(2), dummies(2) + REAL*8 p(nrows1,ncols1),q(nrows2,ncols2), + & r(nrows1,ncols2) + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + +C Setup default values + num_events=0 + +C Open matrix file number 1 for reading +C OPEN(UNIT=1,FILE='fmt1',STATUS='OLD') +C Open matrix file number 2 for reading +C OPEN(UNIT=2,FILE='fmt2',STATUS='OLD') + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + +C Total floating point operations + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + event(1) = PAPI_TOT_INS + else +C Total floating point operations + event(1) = PAPI_FP_INS + end if + +C Time used + event(2) = PAPI_TOT_CYC + +C See how many hardware events at one time are supported + call PAPIf_num_counters( num_events ) + if ( num_events .LT. 2 ) then + print *,'This example program requries the architecture ', + . 'to support 2 simultaneous hardware events...shutting down.' + stop + end if + + if (tests_quiet .EQ. 0) then + print *, 'Number of hardware counters supported: ', num_events + end if + +C matrix 1: read in the matrix values + do i=1, nrows1 + do j=1,ncols1 + p(i,j) = i*j*1.0 + end do + end do + +C matrix 2: read in the matrix values + do i=1, nrows2 + do j=1,ncols2 + q(i,j) = i*j*1.0 + end do + end do + +C Initialize the result matrix + do i=1,nrows1 + do j=1, ncols2 + r(i,j) = i*j*1.0 + end do + end do + +C Set up the counters + num_events = 2 + call PAPIf_start_counters( event, num_events, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_start_counters', retval) + end if + +C We wish to count the events for this call + call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) + +C Read and clear the counter values + call PAPIf_read_counters(values, num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_read_counters', retval) + end if + + if (tests_quiet .EQ. 0) then + print * + if (event(1) .EQ. PAPI_TOT_INS) then + print *, 'TOT Instructions: ',values(1) + else + print *, 'FP Instructions: ',values(1) + end if + + print *, 'Cycles: ',values(2) + + if (event(1) .EQ. PAPI_FP_INS) then + write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', + & real(values(1))/real(values(2)) + end if + end if + +C We don't wish to count the events for this call + call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) + +C Clear the counter values + call PAPIf_read_counters(dummies, num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_read_counters', retval) + end if + +C We wish to count the events for this call + call Adding_MatMult(p,q,r,nrows1,ncols1,ncols2) + +C Read the counter values + call PAPIf_accum_counters(values, num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_accum_counters', retval) + end if + +C Stop the counters and put the results in the array values + call PAPIf_stop_counters(dummies,num_events,retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + *'PAPIf_stop_counters', retval) + end if + + if (tests_quiet .EQ. 0) then + print * + if (event(1) .EQ. PAPI_TOT_INS) then + print *, 'TOT Instructions: ',values(1) + else + print *, 'FP Instructions: ',values(1) + end if + + print *, 'Cycles: ',values(2) + + if (event(1) .EQ. PAPI_FP_INS) then + write(*,'(a,f9.6)') ' Efficiency (flops/cycles):', + & real(values(1))/real(values(2)) + end if + + print * + print *,'----------------------------------------------------' + print *,'The second instruction and cycle counts should be' + print *,'approximately twice the first ones. The efficiency' + print *,'metric should be fairly equal between the cases.' + end if + + call ftests_pass(__FILE__) + end + + subroutine Adding_MatMult(p,q,r,ni,nk,nj) + implicit integer (p) + integer ni,nk,nj + real*8 p(ni,*),q(nk,*),r(ni,nj) + + integer i,j,k +C Compute the matrix-matrix multiplication + do i=1,ni + do j=1,nj + do k=1,nk + r(i,j)=r(i,j) + p(i,k)*q(k,j) + end do + end do + end do + +C Make sure the compiler does not optimize away the multiplication + call dummy(r) + + end diff --git a/src/ftests/fmultiplex1.F b/src/ftests/fmultiplex1.F new file mode 100644 index 0000000..6143d54 --- /dev/null +++ b/src/ftests/fmultiplex1.F @@ -0,0 +1,418 @@ +#include "fpapi_test.h" + + program multiplex1 + IMPLICIT integer (p) + + integer retval + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + if (tests_quiet .EQ. 0) then + write (*, 100) NUM_ITERS + 100 FORMAT ("multiplex1: Using ", I3, " iterations") + write (*,*) "case1: Does PAPI_multiplex_init() not break", + *" regular operation?" + end if + call case1(retval, tests_quiet) + if (tests_quiet .EQ. 0) then + write (*,*) "case2: Does setmpx/add work?" + end if + call case2(retval, tests_quiet) + if (tests_quiet .EQ. 0) then + write (*,*) "case3: Does add/setmpx work?" + end if + call case3(retval, tests_quiet) + if (tests_quiet .EQ. 0) then + write (*,*) "case4: Does add/setmpx/add work?" + end if + call case4(retval, tests_quiet) + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPI_library_init', retval) + end if + call ftests_pass(__FILE__) + + end + + subroutine init_papi(event) + IMPLICIT integer (p) + integer retval + integer event + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPI_library_init', retval) + end if + call PAPIf_query_event(PAPI_TOT_INS, retval) + if (retval .NE. PAPI_OK) then + event = PAPI_TOT_CYC + else + event = PAPI_TOT_INS + end if + end + +C Tests that PAPI_multiplex_init does not mess with normal operation. + subroutine case1(ret, tests_quiet) + IMPLICIT integer (p) + integer ret, tests_quiet, event + integer retval, EventSet + INTEGER*8 values(4) + integer fd + + EventSet = PAPI_NULL + call init_papi(event) + call init_multiplex() + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_create_eventset', retval) + end if + + call PAPIf_add_event( EventSet, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + end if + + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set list' + call PrintEventSet(EventSet) + end if + + call do_stuff() + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_start', retval) + end if + + fd = 1 + call do_stuff() + call PAPIf_stop(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_stop', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "case1: ", values(1), values(2) + end if + + call PAPIf_cleanup_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_cleanup_eventset', retval) + end if + + call PAPIF_shutdown() + ret = SUCCESS + end + +C Tests that PAPI_set_multiplex() works before adding events + subroutine case2(ret, tests_quiet) + IMPLICIT integer (p) + integer ret, tests_quiet, event + integer retval, EventSet + INTEGER*8 values(4) + integer fd + + EventSet = PAPI_NULL + call init_papi(event) + call init_multiplex() + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_create_eventset', retval) + end if + + call PAPIf_assign_eventset_component(EventSet, 0, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_assign_eventset_component', retval) + end if + + call PAPIf_set_multiplex(EventSet, retval) + if ( retval.EQ.PAPI_ENOSUPP) then + call ftest_skip(__FILE__, __LINE__, + & 'Multiplex not implemented', 1) + end if + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'papif_set_multiplex', retval) + end if + + call PAPIf_add_event( EventSet, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + end if + +C This print-out is disabled until PAPIf_list_event is working +C for multiplexed event sets (change -4711 to 0 when it is working) + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set list' + call PrintEventSet(EventSet) + endif + + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_start', retval) + end if + + fd = 1 + call do_stuff() + call PAPIf_stop(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_stop', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "case2: ", values(1), values(2) + end if + + call PAPIf_cleanup_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_cleanup_eventset', retval) + end if + + call PAPIF_shutdown() + ret = SUCCESS + end + +C Tests that PAPI_set_multiplex() works after adding events + subroutine case3(ret, tests_quiet) + IMPLICIT integer (p) + integer ret, tests_quiet, event + integer retval, EventSet + INTEGER*8 values(4) + integer fd + + EventSet = PAPI_NULL + call init_papi(event) + call init_multiplex() + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_create_eventset', retval) + end if + + call PAPIf_add_event( EventSet, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set before call to PAPIf_set_multiplex:' + call PrintEventSet(EventSet) + endif + + call PAPIf_set_multiplex(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'papif_set_multiplex', retval) + end if + + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set after call to PAPIf_set_multiplex:' + call PrintEventSet(EventSet) + endif + + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_start', retval) + end if + + fd = 1 + call do_stuff() + call PAPIf_stop(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_stop', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "case3: ", values(1), values(2) + end if + + call PAPIf_cleanup_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_cleanup_eventset', retval) + end if + + call PAPIF_shutdown() + ret = SUCCESS + end + +C Tests that PAPI_set_multiplex() works before adding events +C Tests that PAPI_add_event() works after +C PAPI_add_event()/PAPI_set_multiplex() + subroutine case4(ret, tests_quiet) + IMPLICIT integer (p) + integer ret, tests_quiet, event + integer retval, EventSet + INTEGER*8 values(4) + integer fd + + EventSet = PAPI_NULL + call init_papi(event) + call init_multiplex() + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_create_eventset', retval) + end if + + call PAPIf_add_event( EventSet, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( EventSet, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call PAPIf_add_event( EventSet, PAPI_TOT_IIS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + end if + + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set before call to PAPIf_set_multiplex:' + call PrintEventSet(EventSet) + endif + + call PAPIf_set_multiplex(EventSet, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'papif_set_multiplex', retval) + end if + + if(tests_quiet .EQ. 0) then + write(*,*) 'Event set after call to PAPIf_set_multiplex:' + call PrintEventSet(EventSet) + endif + +#if (defined(i386)&&defined(linux))||defined(mips) || (defined(__ia64__) && defined(linux)) || (SUBSTR==aix-power) + + call PAPIf_add_event( EventSet, PAPI_L1_DCM, retval ) +C Try alternative event if the above is not possible to use... + if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then + call PAPIf_add_event( EventSet, PAPI_L2_DCM, retval ) + end if + if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then + call PAPIf_add_event( EventSet, PAPI_L2_TCM, retval ) + end if + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( EventSet, PAPI_L1_ICM, retval ) +C Try alternative event if the above is not possible to use... + if ( retval .EQ. PAPI_ECNFLCT .OR. retval .EQ. PAPI_ENOEVNT ) then + call PAPIf_add_event( EventSet, PAPI_L1_LDM, retval ) + end if + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + +#elif (defined(sparc) && defined(sun)) + call PAPIf_add_event( EventSet, PAPI_LD_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + call PAPIf_add_event( EventSet, PAPI_SR_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + +#elif (defined(__alpha)&&defined(__osf__)) + call PAPIf_add_event( EventSet, PAPI_TLB_DM, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_add_event', retval) + end if + +#else + print *,'*** Did not match in event selection ***' +#endif + + if(tests_quiet .EQ. 0) then + write(*,*) 'Updated event set list:' + call PrintEventSet(EventSet) + endif + + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_start', retval) + end if + + fd = 1 + call do_stuff() + call PAPIf_stop(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_stop', retval) + end if + + if (tests_quiet .EQ. 0) then + write (*, *) "case4: ", values(1), values(2), values(3), + * values(4) + end if + + call PAPIf_cleanup_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_cleanup_eventset', retval) + end if + + call PAPIF_shutdown() + ret = SUCCESS + end + diff --git a/src/ftests/fmultiplex2.F b/src/ftests/fmultiplex2.F new file mode 100644 index 0000000..83346fa --- /dev/null +++ b/src/ftests/fmultiplex2.F @@ -0,0 +1,161 @@ +#include "fpapi_test.h" + +#define MAX_TO_ADD 5 + + program multiplex2 + IMPLICIT integer (p) + + integer retval + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + if (tests_quiet .EQ. 0) then + write (*, 100) NUM_ITERS + 100 FORMAT ("multiplex2: Using ", I3, " iterations") + write (*,*) "case1: Does PAPI_multiplex_init() handle", + * " lots of events?" + end if + call case1(tests_quiet, retval) + call ftests_pass(__FILE__) + end + + subroutine init_papi() + IMPLICIT integer (p) + integer retval + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + end + + subroutine case1(tests_quiet, ret) + IMPLICIT integer (p) + integer tests_quiet + integer retval + integer i, ret, fd + integer EventCode + character*(PAPI_MAX_STR_LEN) event_name, event_descr, + * event_label, event_note + integer avail_flag, flags, check + integer EventSet,mask1 + integer*8 values(MAX_TO_ADD*2) + + EventSet = PAPI_NULL + call init_papi() + + call init_multiplex() + + call PAPIf_create_eventset(EventSet, retval) + if ( retval.NE.PAPI_OK) then + + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + * retval) + end if + + call PAPIf_assign_eventset_component(EventSet, 0, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + & 'PAPIf_assign_eventset_component', retval) + end if + + call PAPIf_set_multiplex(EventSet, retval) + if ( retval.EQ.PAPI_ENOSUPP) then + call ftest_skip(__FILE__, __LINE__, + . 'Multiplex not implemented', retval) + end if + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'papif_set_multiplex', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, "Checking for available events..." + end if + + EventCode = 0 + i = 1 + do while (i .LE. MAX_TO_ADD) + avail_flag=0 + do while ((avail_flag.EQ.0).AND. + * (EventCode.LT.PAPI_MAX_PRESET_EVENTS)) + mask1 = ((PAPI_L1_DCM)+EventCode) + if (mask1.NE.PAPI_TOT_CYC) then + call papif_get_event_info(mask1, + * event_name, event_descr, event_label, avail_flag, + * event_note, flags, check) + end if + EventCode = EventCode + 1 + end do + + if ( EventCode.EQ.PAPI_MAX_PRESET_EVENTS .AND. + * i .LT. MAX_TO_ADD ) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_add_event', retval) + end if + + if (tests_quiet .EQ. 0) then + write (*, 200) " Adding Event ", event_name + 200 FORMAT(A22, A12) + end if + + mask1 = ((PAPI_L1_DCM)+EventCode) + mask1 = mask1 - 1 + call PAPIf_add_event( EventSet, mask1, retval ) + if ( retval .NE. PAPI_OK .AND. retval .NE. PAPI_ECNFLCT) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_add_event', retval) + stop + end if + + if (tests_quiet .EQ. 0) then + if (retval .EQ. PAPI_OK) then + write (*, 200) " Added Event ", event_name + else + write (*, 200) " Could not add Event ", event_name + end if + end if + + if (retval .EQ. PAPI_OK) then + i = i + 1 + end if + end do + + call PAPIf_start(EventSet, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + fd = 1 + call do_stuff() + + call PAPIf_stop(EventSet, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_cleanup_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_cleanup_eventset', + * retval) + end if + + call PAPIf_destroy_eventset(EventSet, retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_destroy_eventset', + * retval) + end if + + ret = SUCCESS + + end + diff --git a/src/ftests/highlevel.F b/src/ftests/highlevel.F new file mode 100644 index 0000000..fe2689f --- /dev/null +++ b/src/ftests/highlevel.F @@ -0,0 +1,85 @@ +#include "fpapi_test.h" + + program highlevel + implicit integer (p) + + integer*8 values(10) + integer events(2) + integer eventnum + integer availcounters + integer retval + integer tests_quiet, get_quiet + external get_quiet + character*PAPI_MAX_STR_LEN name + integer last_char, n + external last_char + + tests_quiet = get_quiet() + + eventnum = 2 + call PAPIf_num_counters(availcounters) + if (eventnum .GT. availcounters) then + print *, "Not enough hardware counters!" + stop + end if + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + events(1)=PAPI_TOT_INS + else + events(1)=PAPI_FP_INS + end if + events(2)=PAPI_TOT_CYC + + call PAPIf_start_counters(events, eventnum, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start_counters', + *retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_read_counters(values(1), eventnum, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_read_counters', + *retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop_counters(values(3), eventnum, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop_counters', + *retval) + end if + + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (events(1), name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + n=last_char(name) + print *, "Test case highlevel: Test of high-level APIs." + print *, "---------------------------------------------", + * "---------------------" + write (*,100) "Test type", 1, 2 + write (*,100) name(1:n), values(1), values(3) + write (*,100) "PAPI_TOT_CYC", values(2), values(4) + 100 format(a15, ":", i12, i12) + print *, "---------------------------------------------", + * "---------------------" + end if + + call ftests_pass(__FILE__) + End diff --git a/src/ftests/johnmay2.F b/src/ftests/johnmay2.F new file mode 100644 index 0000000..e4e0bdc --- /dev/null +++ b/src/ftests/johnmay2.F @@ -0,0 +1,134 @@ +#include "fpapi_test.h" + + program johnmay2 + implicit integer (p) + + integer*8 values(10) + integer es, event + integer retval + character*PAPI_MAX_STR_LEN name + + Integer last_char, n + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval.EQ.PAPI_OK) then + event = PAPI_FP_INS + else + call PAPIf_query_event(PAPI_TOT_INS, retval) + if ( retval.EQ.PAPI_OK) then + event = PAPI_TOT_INS + else + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_query_event', retval) + end if + end if + + call PAPIf_create_eventset(es, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_start(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call PAPIf_cleanup_eventset(es, retval) + if (retval .NE. PAPI_EISRUN) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_cleanup_eventset', + *retval) + end if + + call PAPIf_destroy_eventset(es, retval) + if (retval .NE. PAPI_EISRUN) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_destroy_eventset', + *retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_destroy_eventset(es, retval) + if (retval .NE. PAPI_EINVAL) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_destroy_eventset', + *retval) + end if + + call PAPIf_cleanup_eventset(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_cleanup_eventset', + *retval) + end if + + call PAPIf_destroy_eventset(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_destroy_eventset', + *retval) + end if + + if (es .NE. PAPI_NULL) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_destroy_eventset', + *retval) + end if + + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (event, name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + n=last_char(name) + print *, "Test case John May 2: cleanup / ", + * "destroy eventset." + print *, "--------------------------------", + * "-----------------" + print *, "Test run : 1" + print *, name(1:n), " : ", values(1) + print *, "----------------------------------", + * "---------------" + + print *, "Verification:" + print *, "These error messages:" + print *, "PAPI Error Code -10: PAPI_EISRUN: ", + * "EventSet is currently counting" + print *, "PAPI Error Code -10: PAPI_EISRUN: ", + * "EventSet is currently counting" + print *, "PAPI Error Code -1: PAPI_EINVAL: ", + * "Invalid argument" + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/nineth.F b/src/ftests/nineth.F new file mode 100644 index 0000000..dbcca3b --- /dev/null +++ b/src/ftests/nineth.F @@ -0,0 +1,174 @@ +#include "fpapi_test.h" + + program nineth + implicit integer (p) + + integer es1, es2 + integer*8 values(10),tvalues(10) + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + integer retval + integer clockrate + real*8 test_flops, min, max + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_OPS, retval) + if (retval.NE.PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_OPS', PAPI_ENOEVNT) + end if + + call PAPIf_create_eventset(es1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es1, PAPI_FP_OPS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es1, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_create_eventset(es2, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es2, PAPI_FLOPS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_get_clockrate(clockrate) + if (tests_quiet .EQ. 0) then + print *, 'Clockrate:', clockrate + end if + + call PAPIf_start(es1, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call do_flops(NUM_FLOPS) + + call PAPIf_stop(es1, tvalues(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_start(es2, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call do_flops(NUM_FLOPS) + + call PAPIf_stop(es2, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_remove_event( es1, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es1, PAPI_FP_OPS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es2, PAPI_FLOPS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + test_flops = tvalues(1)*clockrate*1000000.0 + if ( tvalues(2) .NE. 0) then + test_flops = test_flops / tvalues(2) + else + test_flops = 0.0 + end if + + if (tests_quiet .EQ. 0) then + print *, "Test case 9: start, stop for derived event PAPI_FLOPS" + print *, "---------------------------------------------" + end if + call PAPIf_get_domain(es1, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,900) "Default domain is:", domain, domainstr + 900 format(a20, i3, " ", a70) + end if + + call PAPIf_get_granularity(es1, granularity, PAPI_DEFGRN, + *retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + *retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + write (*,800) "Default granularity is:", granularity, grnstr + 800 format(a25, i3, " ", a20) + + print *, " Using", NUM_FLOPS, " iterations of c += b*c" + print *, "---------------------------------------------" + + write (*,810) "Test type :", 1, 2 + write (*,810) "PAPI_FP_OPS :", tvalues(1), 0 + write (*,810) "PAPI_TOT_CYC:", tvalues(2), 0 + write (*,810) "PAPI_FLOPS :", 0, values(1) + print *, "---------------------------------------------" + 810 format(a15, i15, i15) + + print *, "Verification:" + print *, "Last number in row 3 approximately equals", test_flops + end if + + min = values(1) * 0.9 + max = values(1) * 1.1 + if ((test_flops.gt.max) .OR. (test_flops.lt.min)) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_FLOPS', 1) + end if + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/second.F b/src/ftests/second.F new file mode 100644 index 0000000..0603448 --- /dev/null +++ b/src/ftests/second.F @@ -0,0 +1,289 @@ +#include "fpapi_test.h" + + program second + implicit integer (p) + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + integer*8 values(10), max, min + integer es1, es2, es3 + integer retval + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet +#if (defined(sgi) && defined(host_mips)) + integer id + integer*4 getuid +#endif + +#if (defined(sgi) && defined(host_mips)) + id = getuid() +#endif + + tests_quiet = get_quiet() + es1 = PAPI_NULL + es2 = PAPI_NULL + es3 = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_TOT_INS, retval) + if (retval.NE.PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, 'PAPI_FP_INS', PAPI_ENOEVNT) + end if + + call PAPIf_create_eventset(es1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es1, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es1, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_create_eventset(es2, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es2, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es2, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_create_eventset(es3, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es3, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es3, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_set_event_domain(es1, PAPI_DOM_ALL, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_set_domain', retval) + end if + + call PAPIf_set_event_domain(es2, PAPI_DOM_KERNEL, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_set_domain', retval) + end if + + call PAPIf_set_event_domain(es3, PAPI_DOM_USER, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_set_domain', retval) + end if + + call PAPIf_start(es1, retval) + + call fdo_flops(NUM_FLOPS) + + if (retval.eq.PAPI_OK) then + call PAPIf_stop(es1, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + end if + + call PAPIf_start(es2, retval) + + call fdo_flops(NUM_FLOPS) + + if (retval.eq.PAPI_OK) then + call PAPIf_stop(es2, values(3), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + end if + + call PAPIf_start(es3, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es3, values(5), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_remove_event( es1, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es1, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es2, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es2, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es3, PAPI_TOT_INS, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + call PAPIf_remove_event( es3, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + if (tests_quiet .EQ. 0) then + print *, 'Test case 2: Non-overlapping start, stop, read', + *' for all 3 domains.' + print *, '-------------------------------------------------'// + * '------------------------------' + end if + + call PAPIf_get_domain(es1, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,900) 'Default domain is:', domain, domainstr + end if + 900 format(a20, i3, ' ', a70) + + call PAPIf_get_granularity(es1, granularity, PAPI_DEFGRN, + *retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + *retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + write (*,800) 'Default granularity is:', granularity, grnstr + end if + 800 format(a25, i3, ' ', a20) + + if (tests_quiet .EQ. 0) then + print *, 'Using', NUM_FLOPS, ' iterations of c += b*c' + + print *, '-------------------------------------------------'// + * '------------------------------' + + print *, 'Test type : PAPI_DOM_ALL PAPI_DOM_KERNEL', + *' PAPI_DOM_USER' + write (*,200) 'PAPI_TOT_INS', values(1), values(3), values(5) + write (*,200) 'PAPI_TOT_CYC', values(2), values(4), values(6) + 200 format(A15, ': ', I15, I15, I15) + + print *, '-------------------------------------------------'// + * '------------------------------' + + print *, 'Verification:' + print *, 'Row 1 approximately equals N 0 N' + print *, 'Column 1 approximately equals column 2 plus column 3' +#if defined(sgi) && defined(host_mips) + print * + print *, '* IRIX requires root for PAPI_DOM_KERNEL', + *' and PAPI_DOM_ALL.' + print *, '* The first two columns will be invalid if not', + *' run as root for IRIX.' +#endif + end if + +#if (defined(sgi) && defined(host_mips)) + if (id.NE.0) then + min = NUM_FLOPS*0.9 + max = NUM_FLOPS*1.1 + if ((values(5) .lt. min) .OR. (values(5) .gt. max)) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_FP_INS', 1) + end if + else + min = values(5)*0.9 + max = values(5)*1.1 + if ((values(1) .lt. min) .OR. (values(1) .gt. max)) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_FP_INS', 1) + end if + min = values(2)*0.9 + max = values(2)*1.1 + if (((values(4)+values(6)) .lt. min) .OR. + * ((values(4)+values(6)) .gt. max)) then + call ftest_fail(__FILE__, __LINE__, 'PAPI_TOT_CYC', 1) + end if + endif +#else + min = INT(REAL(values(5))*0.9) + max = INT(REAL(values(5))*1.1) + if ((values(1) .lt. min) .OR. (values(1) .gt. max)) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_FP_INS', 1) + end if + + min = INT(REAL(values(2))*0.8) + max = INT(REAL(values(2))*1.2) + if (((values(4)+values(6)) .lt. min) .OR. + * ((values(4)+values(6)) .gt. max)) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_TOT_CYC', 1) + end if +#endif + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/strtest.F b/src/ftests/strtest.F new file mode 100644 index 0000000..5a06704 --- /dev/null +++ b/src/ftests/strtest.F @@ -0,0 +1,366 @@ +C Strtest - Perform some basic tests of the functionality of the +C string passing to and from the PAPI Fortran interface. +C +C Test 1: Look up an event name from an event code. Use this name +C to try and locate the event code using the name received. +C Long, short and too short strings are used in the tests +C +C Test 2: Look up a PAPI error string. Use long, short and too +C short strings to store the result. +C +C Test 3: Look up and display event descriptions +C using PAPIf_get_event_info. +C +C Comments: +C When using the Fortran interface it may not always be possible to +C use the PAPI predefined constants as actual arguments. Due to the +C values in these compilers might occasionally cast these into the +C wrong type. In the code below the line code=MSGCODE is used to +C make sure that the event code get the right type. +C +#include "fpapi_test.h" +C Set MSGLEN to the number of characters in the named event in MSGCODE +#define MSGLEN 11 +#define MSGCODE PAPI_L1_DCM +#define ERRCODE PAPI_EINVAL + + program strtest + implicit integer (p) + + CHARACTER*(PAPI_MAX_STR_LEN) papistr + CHARACTER*(PAPI_MAX_STR_LEN*2) papidblstr + + CHARACTER*(PAPI_MAX_STR_LEN) ckstr + CHARACTER*(MSGLEN) invstr1 + CHARACTER*(MSGLEN+1) invstr2 + CHARACTER*(MSGLEN+2) invstr3 + CHARACTER*(MSGLEN-1) invstr4 + CHARACTER*(MSGLEN-2) invstr5 + + integer check,lastchar + integer code,papicode + integer getstrlen + external getstrlen + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + + check=PAPI_VER_CURRENT + call PAPIF_library_init(check) + if ( check.NE.PAPI_VER_CURRENT) then + call PAPIF_perror( 'PAPI_library_init' ) + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', check) + end if + + code=MSGCODE + + if (tests_quiet .EQ. 0) then + print *,'---------------------------------------------------' + print *,' Testing PAPIF_name_to_code/PAPIF_code_to_name ' + print *,'---------------------------------------------------' + print *,' These tests look up an event name and event code' + print *,' On no occasion should a NULL character be found(+)' + print *,' When strings are too short, the lookup should fail' + print * + print *,' Tests use the event code ',code + print * + end if + + lastchar=PAPI_MAX_STR_LEN + call checkstr(code,ckstr,check,lastchar,tests_quiet) + lastchar=getstrlen(ckstr) + + call checkstr(code,invstr1,check,lastchar,tests_quiet) + + call checkstr(code,invstr2,check,lastchar,tests_quiet) + + call checkstr(code,invstr3,check,lastchar,tests_quiet) + + call checkstr(code,invstr4,check,lastchar,tests_quiet) + + call checkstr(code,invstr5,check,lastchar,tests_quiet) + + if (tests_quiet .EQ. 0) then + print *,'---------------------------------------------------' + print *,' Testing PAPIF_descr_event ' + print *,'---------------------------------------------------' + print *,' These tests should return a PAPI description for' + print *,' various event names and argument shapes.' + print *,' On no occasion should a NULL character be found(+)' + print * + + print 200,'Test 1' + end if + + papistr=" " + papicode=PAPI_L1_DCM + call test_papif_descr(papistr,papicode,papidblstr, + . check,tests_quiet) + call checkcode(papicode,PAPI_L1_DCM,tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 2' + end if + + papistr=" " + papicode=PAPI_L2_DCM + call test_papif_descr(papistr,papicode,papidblstr, + . check,tests_quiet) + call checkname(papistr,"PAPI_L2_DCM",tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 3' + end if + + invstr1=" " + papicode=PAPI_L1_ICM + call test_papif_descr(invstr1,papicode,papidblstr, + . check,tests_quiet) + call checkcode(papicode,PAPI_L1_ICM,tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 4' + end if + + invstr1=" " + papicode=PAPI_L2_ICM + call test_papif_descr(invstr1,papicode,papidblstr, + . check,tests_quiet) + call checkname(invstr1,"PAPI_L2_ICM",tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 5 (This should get a truncated description)' + end if + + invstr2=" " + papicode=PAPI_L3_DCM + call test_papif_descr(invstr2,papicode,invstr1, + . check,tests_quiet) + call checkcode(papicode,PAPI_L3_DCM,tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 6 (This should get a truncated description)' + end if + + invstr2=" " + papicode=PAPI_L3_ICM + call test_papif_descr(invstr2,papicode,invstr1, + . check,tests_quiet) + call checkname(invstr2,"PAPI_L3_ICM",tests_quiet) + + if (tests_quiet .EQ. 0) then + print * + print 200,'Test 7 (This should get a truncated name)' + end if + + invstr4=" " + papicode=PAPI_L1_DCM + call test_papif_descr(invstr4,papicode,papistr, + . check,tests_quiet) + if (tests_quiet .EQ. 0) then + call checkname(invstr4,"PAPI_L1_DCM",tests_quiet) + end if + + 200 format(t1,a) + + if (tests_quiet .EQ. 0) then + print *,'---------------------------------------------------' + print *,'(+) Fortran implementations that do not provide the' + print *,' string argument length might show NULL '// + . 'characters.' + print *,' This may or may not be OK depending on the '// + . 'Fortran' + print *,' compiler. See papi_fwrappers.c and your Fortran' + print *,' compiler reference manual.' + end if + + call ftests_pass(__FILE__) + end + + subroutine checkstr(incode,string,check,lastchar,quiet) + implicit integer (P) + integer incode + integer check,lastchar, quiet + character*(*) string + integer code + integer getstrlen + external getstrlen + + 100 format(t1,a,i4) + + if (quiet .EQ. 0) then + print 100,"Testing string length ",len(string) + if(len(string).lt.lastchar)then + print *,'This call should return an error code.' + end if + end if + + code=incode + call PAPIF_event_code_to_name(code,string,check) + if(check.ne.PAPI_OK)then + if (len(string).ge.lastchar)then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIF_event_code_to_name', check) + else + if (quiet .EQ. 0) then + call PAPIF_perror( 'PAPIF_event_code_to_name' ) + print *,'*ERROR* ' + print *,'******* '//'Error in checkstr using '// + $ 'PAPIF_event_code_to_name' + end if + end if + end if + + 200 format(t1,a,'"',a,'"') + if (quiet .EQ. 0) then + print 200,'The event name is: ',string(1:getstrlen(string)) + end if + + call PAPIF_event_name_to_code(string,code,check) + if(check.ne.PAPI_OK)then + if (len(string).ge.lastchar)then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIF_event_name_to_code', check) + else + if (quiet .EQ. 0) then + call PAPIF_perror( 'PAPIF_event_name_to_code' ) + print *,'*ERROR* ' + print *,'******* '//'Error in checkstr using '// + $ 'PAPIF_event_name_to_code' + end if + end if + end if + + call findnull(string,quiet) + + if (quiet .EQ. 0) then + print * + end if + + return + end + + subroutine test_papif_descr(name,code,string,check,quiet) + implicit integer (P) + integer code,count,flags + integer check,quiet + character*(*) name,string + + character*(PAPI_MAX_STR_LEN) label,note + integer getstrlen + external getstrlen + +C This API was deprecated with PAPI 3 +C call PAPIF_describe_event(name,code,string,check) + call PAPIF_get_event_info(code,name,string,label,count, + $ note,flags,check) + 100 format(t1,a,'"',a,'"') + if (quiet .EQ. 0) then + print 100,'The event description is: ', + $ string(1:getstrlen(string)) + end if + + if(check.ne.PAPI_OK)then + if (quiet .EQ. 0) then + call PAPIF_perror( 'PAPI_get_event_info' ) + print *,'*ERROR* ' + print *,'******* '//'Error in test_papif_descr using '// + $ 'PAPIF_get_event_info' + else + call ftest_fail(__FILE__, __LINE__, + . 'PAPIF_get_event_info', check) + end if + end if + + call findnull(string,quiet) + call findnull(name,quiet) + + return + end + + integer function getstrlen(string) + implicit integer (P) + character*(*) string + integer i + + do i=len(string),1,-1 + if(string(i:i).ne.' ') then + goto 20 + end if + end do + getstrlen=0 + return + + 20 continue + getstrlen=i + return + end + + subroutine findnull(string,quiet) + implicit integer (P) + integer quiet,i + character*(*) string + + i=index(string,char(0)) + if(i.gt.0)then + if(quiet.EQ.0)then + print *,'NULL character found in string!!!' + else + call ftest_fail(__FILE__, __LINE__, + . 'NULL character found in string!!!', 0) + end if + end if + + return + end + + + subroutine checkcode(code,check,quiet) + implicit integer (P) + integer code + integer check,quiet + + if(code.ne.check)then + if(quiet.EQ.0)then + print 100,'Code look up failed?' + else + call ftest_fail(__FILE__, __LINE__, + . 'Code look up failed?', 0) + end if + end if + 100 format(t2,a) + + return + end + + subroutine checkname(name,check,quiet) + implicit integer (P) + character*(*) name + character*(*) check + integer i,quiet + integer getstrlen + + i=getstrlen(name) + if(name(1:i).ne.check)then + if (quiet .eq. 0) then + print 100,'PAPI name incorrect?' + print 110,'Got: ',name(1:i) + print 110,'Expected: ',check + else + call ftest_fail(__FILE__, __LINE__, + . 'PAPI name incorrect?', 0) + end if + end if + + 100 format(t2,a) + 110 format(a12,'"',a,'"') + + return + end diff --git a/src/ftests/tenth.F b/src/ftests/tenth.F new file mode 100644 index 0000000..742cbdc --- /dev/null +++ b/src/ftests/tenth.F @@ -0,0 +1,244 @@ +#include "fpapi_test.h" +#define ITERS 100 + +#if defined(sun) && defined(sparc) +#define CACHE_LEVEL "PAPI_L2_TCM" +#define EVT1 PAPI_L2_TCM +#define EVT2 PAPI_L2_TCA +#define EVT3 PAPI_L2_TCH +#define EVT1_STR "PAPI_L2_TCM" +#define EVT2_STR "PAPI_L2_TCA" +#define EVT3_STR "PAPI_L2_TCH" +#else +#if defined(__powerpc__) +#define CACHE_LEVEL "PAPI_L1_DCA" +#define EVT1 PAPI_L1_DCA +#define EVT2 PAPI_L1_DCW +#define EVT3 PAPI_L1_DCR +#define EVT1_STR "PAPI_L1_DCA" +#define EVT2_STR "PAPI_L1_DCW" +#define EVT3_STR "PAPI_L1_DCR" +#else +#define CACHE_LEVEL "PAPI_L1_TCM" +#define EVT1 PAPI_L1_TCM +#define EVT2 PAPI_L1_ICM +#define EVT3 PAPI_L1_DCM +#define EVT1_STR "PAPI_L1_TCM" +#define EVT2_STR "PAPI_L1_ICM" +#define EVT3_STR "PAPI_L1_DCM" +#endif +#endif + + program tenth + implicit integer (p) + + integer*8 values(10) + integer es1, es2, es3 + integer*4 mask1, mask2, mask3 + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + integer retval + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es1 = PAPI_NULL + es2 = PAPI_NULL + es3 = PAPI_NULL + + mask1 = EVT1 + mask2 = EVT2 + mask3 = EVT3 + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(mask1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, + .'PAPIf_query_event', retval) + end if + call PAPIf_query_event(mask2, retval) + if ( retval.NE.PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, + .'PAPIf_query_event', retval) + end if + call PAPIf_query_event(mask3, retval) + if ( retval.NE.PAPI_OK) then + call ftest_skip(__FILE__, __LINE__, + .'PAPIf_query_event', retval) + end if + + call PAPIf_create_eventset(es1, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es1, mask1, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_create_eventset(es2, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + *retval) + end if + + call PAPIf_add_event( es2, mask2, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_create_eventset(es3, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + * retval) + end if + + call PAPIf_add_event( es3, mask3, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call fdo_l1misses(ITERS) + + call PAPIf_start(es1, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_l1misses(ITERS) + + call PAPIf_stop(es1, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + + call PAPIf_start(es2, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_l1misses(ITERS) + call PAPIf_stop(es2, values(3), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_start(es3, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_l1misses(ITERS) + + call PAPIf_stop(es3, values(5), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_remove_event( es1, mask1, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es2, mask2, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + call PAPIf_remove_event( es3, mask3, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_remove_event', retval) + end if + + if (tests_quiet .EQ. 0) then +#if (defined(sun) && defined(sparc)) + print *, "Test case 10: start, stop for derived event ", + *"PAPI_L2_TCM." +#else + print *, "Test case 10: start, stop for derived event ", + *"PAPI_L1_TCM." +#endif + + print *, "------------------------------------------------------" + end if + call PAPIf_get_domain(es1, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,900) "Default domain is:", domain, domainstr + 900 format(a20, i3, " ", a70) + end if + + call PAPIf_get_granularity(es1, granularity, PAPI_DEFGRN, + *retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + *retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + write (*,800) "Default granularity is:", granularity, grnstr + 800 format(a25, i3, " ", a20) + print *, "Using", NUM_FLOPS, " iterations of c += b*c" + print *, "------------------------------------------------------" + + write (*,500) "Test type", 1, 2, 3 +#if (defined(sun) && defined(sparc)) + write (*,500) EVT1_STR, values(1), 0, 0 + write (*,500) EVT2_STR, 0, values(3), 0 + write (*,500) EVT3_STR, 0, 0, values(5) + print *, "------------------------------------------------", + *"------" + + print *, "Verification:" + print *, "First number row 1 approximately equals (2,2) - (3,3) ", + *"or ",(values(3)-values(5)) +#else + write (*,500) EVT1_STR, values(1), 0, 0 + write (*,500) EVT2_STR, 0, values(3), 0 + write (*,500) EVT3_STR, 0, 0, values(5) + print *, "------------------------------------------------", + *"------" + + print *, "Verification:" + print *, "First number row 1 approximately equals (2,2) + (3,3) ", + *"or ", (values(3)+values(5)) +#endif + end if + 500 format(A13, ": ", I10, I10, I10) + + call ftests_pass(__FILE__) + end diff --git a/src/ftests/zero.F b/src/ftests/zero.F new file mode 100644 index 0000000..4e0e14d --- /dev/null +++ b/src/ftests/zero.F @@ -0,0 +1,130 @@ +#include "fpapi_test.h" + + program zero + + integer*8 values(10) + integer es, event + integer*8 uso, usn, cyco, cycn + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + character*(PAPI_MAX_STR_LEN) name + integer retval + + Integer last_char, n + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_event(PAPI_FP_INS, retval) + if (retval .NE. PAPI_OK) then + event = PAPI_TOT_INS + else + event = PAPI_FP_INS + end if + + call PAPIf_create_eventset(es, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_create_eventset', + * retval) + end if + + call PAPIf_add_event( es, event, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_add_event( es, PAPI_TOT_CYC, retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event', retval) + end if + + call PAPIf_get_real_usec(uso) + call PAPIf_get_real_cyc(cyco) + + call PAPIf_start(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_get_real_usec(usn) + call PAPIf_get_real_cyc(cycn) + + if (tests_quiet .EQ. 0) then + print *, "Test case 0: start, stop." + print *, "-----------------------------------------------", + * "--------------------------" + end if + + call PAPIf_get_domain(es, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,800) "Default domain is :", domain, domainstr + end if + + call PAPIf_get_granularity(es, granularity, PAPI_DEFGRN, + * retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + * retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (event, name, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + * 'PAPIf_event_code_to_name', retval) + end if + n=last_char(name) + write (*,800) "Default granularity is:", granularity, grnstr + 800 format(a25, i3, " ", a70) + write (*,810) "Using", NUM_FLOPS, + $ " iterations of c = c + a * b" + 810 format(a7, i9, a) + + print *, "-----------------------------------------------", + * "--------------------------" + + write (*,100) "Test type", 1 + write (*,100) name(1:n), values(1) + write (*,100) "PAPI_TOT_CYC", values(2) + write (*,100) "Real usec", usn-uso + write (*,100) "Real cycles", cycn-cyco + 100 format(a13, ":", i12) + print *, "-----------------------------------------------", + * "--------------------------" + + print *, "Verification: none" + endif + + call ftests_pass(__FILE__) + end + diff --git a/src/ftests/zeronamed.F b/src/ftests/zeronamed.F new file mode 100644 index 0000000..e07e1b4 --- /dev/null +++ b/src/ftests/zeronamed.F @@ -0,0 +1,130 @@ +#include "fpapi_test.h" + + program zero + + integer*8 values(10) + integer es, event + integer*8 uso, usn, cyco, cycn + integer domain, granularity + character*(PAPI_MAX_STR_LEN) domainstr, grnstr + character*(PAPI_MAX_STR_LEN) name + integer retval + + Integer last_char + External last_char + integer tests_quiet, get_quiet + external get_quiet + + tests_quiet = get_quiet() + es = PAPI_NULL + + retval = PAPI_VER_CURRENT + call PAPIf_library_init(retval) + if ( retval.NE.PAPI_VER_CURRENT) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPI_library_init', retval) + end if + + call PAPIf_query_named_event('PAPI_TOT_CYC', retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_query_named_event: PAPI_TOT_CYC', retval) + end if + + call PAPIf_query_named_event('PAPI_TOT_INS', retval) + if (retval .NE. PAPI_OK) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_query_named_event: PAPI_TOT_INS', retval) + end if + + call PAPIf_create_eventset(es, retval) + if ( retval.NE.PAPI_OK) then + call ftest_fail( __FILE__, __LINE__, + . 'PAPIf_create_eventset', retval ) + end if + + call PAPIf_add_named_event( es, 'PAPI_TOT_CYC', retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event: PAPI_TOT_CYC', retval) + end if + + call PAPIf_add_named_event( es, 'PAPI_TOT_INS', retval ) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_add_event: PAPI_TOT_INS', retval) + end if + + call PAPIf_get_real_usec(uso) + call PAPIf_get_real_cyc(cyco) + + call PAPIf_start(es, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_start', retval) + end if + + call fdo_flops(NUM_FLOPS) + + call PAPIf_stop(es, values(1), retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_stop', retval) + end if + + call PAPIf_get_real_usec(usn) + call PAPIf_get_real_cyc(cycn) + + if (tests_quiet .EQ. 0) then + print *, "PAPI_{query, add, remove}_named_event API test." + print *, "-----------------------------------------------", + * "--------------------------" + end if + + call PAPIf_get_domain(es, domain, PAPI_DEFDOM, retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_domain', retval) + end if + + call stringify_domain(domain, domainstr) + if (tests_quiet .EQ. 0) then + write (*,800) "Default domain is :", domain, domainstr + end if + + call PAPIf_get_granularity(es, granularity, PAPI_DEFGRN, + * retval) + if ( retval .NE. PAPI_OK ) then + call ftest_fail(__FILE__, __LINE__, + . 'PAPIf_get_granularity', + * retval) + end if + + call stringify_granularity(granularity, grnstr) + if (tests_quiet .EQ. 0) then + call PAPIf_event_code_to_name (event, name, retval) + write (*,800) "Default granularity is:", granularity, grnstr + 800 format(a25, i3, " ", a70) + write (*,810) "Using", NUM_FLOPS, + $ " iterations of c = c + a * b" + 810 format(a7, i9, a) + + print *, "-----------------------------------------------", + * "--------------------------" + + write (*,100) "Test type", 1 + write (*,100) "PAPI_TOT_CYC", values(1) + write (*,100) "PAPI_TOT_INS", values(2) + write (*,100) "Real usec", usn-uso + write (*,100) "Real cycles", cycn-cyco + 100 format(a13, ":", i12) + print *, "-----------------------------------------------", + * "--------------------------" + + print *, "Verification: PAPI_TOT_CYC should be roughly ", + * "real_cycles" + endif + + call ftests_pass(__FILE__) + end + diff --git a/src/genpapifdef.c b/src/genpapifdef.c new file mode 100644 index 0000000..60a1eda --- /dev/null +++ b/src/genpapifdef.c @@ -0,0 +1,293 @@ +/* This file generates the #defines needed for Fortran examples of PAPI. + Its output is usually directed to fpapi.h. See Makefile.inc for details. +*/ + +/* Modified to produce any of cpp, f77, or f90-style include files. + Accepts an optional command-line argument, one of -c, -f77, or -f90 + (-c default, as in original version of the program). + The Fortran versions are fixed-format (source starts in column 7) + Note: No check is made to ensure that lines don't extend past 72 columns. + Date: 1/26/02 + Rick Kufrin, NCSA/Univ of Illinois +*/ + +/* Modified to eliminate reliance on libpapi.a. + Now it relies only on a small collection of papi header files. +*/ + +/* Modified to always generate a symbolic representation for the + maximum negative number. This is a work-around for a compiler + limitation that first showed up on Cray X1 and then spread to + GNU Fortran 4.3.2. Thanks to Jim Rosinski (ORNL) for + identification and testing on this issue. + Date: 06/03/08 +*/ + +#include +#include + +#include +#include +#include +#include + +#include "papi.h" + +#undef NDEBUG +#include + +#define PAPI_EVENTS_IN_DERIVED_EVENT 8 /* to satisfy papi_preset.h */ +#include "papi_preset.h" +#include "papi_common_strings.h" +/* + The following array is used to create a series of defines + for use with PAPI in Fortran programs. + The value/name pairs come straight from papi.h. + They should be manually synchronized with papi.h when changes are made. + The definition of hwi_describe_t is in papi_preset.h +*/ + +const hwi_describe_t _papi_def[] = { + {PAPI_NULL, "PAPI_NULL", NULL}, + {PAPI_VER_CURRENT, "PAPI_VER_CURRENT", NULL}, + {PAPI_VERSION, "PAPI_VERSION", NULL}, + {PAPI_MAX_PRESET_EVENTS, "PAPI_MAX_PRESET_EVENTS", NULL}, + + {PAPI_NOT_INITED, "PAPI_NOT_INITED", NULL}, + {PAPI_LOW_LEVEL_INITED, "PAPI_LOW_LEVEL_INITED", NULL}, + {PAPI_HIGH_LEVEL_INITED, "PAPI_HIGH_LEVEL_INITED", NULL}, + {PAPI_THREAD_LEVEL_INITED, "PAPI_THREAD_LEVEL_INITED", NULL}, + + {PAPI_DOM_USER, "PAPI_DOM_USER", NULL}, + {PAPI_DOM_KERNEL, "PAPI_DOM_KERNEL", NULL}, + {PAPI_DOM_OTHER, "PAPI_DOM_OTHER", NULL}, + {PAPI_DOM_SUPERVISOR, "PAPI_DOM_SUPERVISOR", NULL}, + {PAPI_DOM_ALL, "PAPI_DOM_ALL", NULL}, + {PAPI_DOM_MIN, "PAPI_DOM_MIN", NULL}, + {PAPI_DOM_MAX, "PAPI_DOM_MAX", NULL}, + {PAPI_DOM_HWSPEC, "PAPI_DOM_HWSPEC", NULL}, + + {PAPI_STOPPED, "PAPI_STOPPED", NULL}, + {PAPI_RUNNING, "PAPI_RUNNING", NULL}, + {PAPI_PAUSED, "PAPI_PAUSED", NULL}, + {PAPI_NOT_INIT, "PAPI_NOT_INIT", NULL}, + {PAPI_OVERFLOWING, "PAPI_OVERFLOWING", NULL}, + {PAPI_PROFILING, "PAPI_PROFILING", NULL}, + {PAPI_MULTIPLEXING, "PAPI_MULTIPLEXING", NULL}, + {PAPI_ATTACHED, "PAPI_ATTACHED", NULL}, + {PAPI_CPU_ATTACHED, "PAPI_CPU_ATTACHED", NULL}, + + {PAPI_QUIET, "PAPI_QUIET", NULL}, + {PAPI_VERB_ECONT, "PAPI_VERB_ECONT", NULL}, + {PAPI_VERB_ESTOP, "PAPI_VERB_ESTOP", NULL}, + + {PAPI_MIN_STR_LEN, "PAPI_MIN_STR_LEN", NULL}, + {PAPI_HUGE_STR_LEN, "PAPI_HUGE_STR_LEN", NULL}, + {PAPI_MAX_STR_LEN, "PAPI_MAX_STR_LEN", NULL}, + {PAPI_NUM_ERRORS, "PAPI_NUM_ERRORS", NULL}, + + {PAPI_MULTIPLEX_DEFAULT, "PAPI_MULTIPLEX_DEFAULT", NULL}, + {PAPI_MULTIPLEX_FORCE_SW, "PAPI_MULTIPLEX_FORCE_SW", NULL}, + + {PAPI_DEBUG, "PAPI_DEBUG", NULL}, + {PAPI_MULTIPLEX, "PAPI_MULTIPLEX", NULL}, + {PAPI_DEFDOM, "PAPI_DEFDOM", NULL}, + {PAPI_DOMAIN, "PAPI_DOMAIN", NULL}, + {PAPI_DEFGRN, "PAPI_DEFGRN", NULL}, + {PAPI_GRANUL, "PAPI_GRANUL", NULL}, + {PAPI_DEF_MPX_NS, "PAPI_DEF_MPX_NS", NULL}, + // {PAPI_EDGE_DETECT, "PAPI_EDGE_DETECT", NULL}, + //{PAPI_INVERT, "PAPI_INVERT", NULL}, + {PAPI_MAX_MPX_CTRS, "PAPI_MAX_MPX_CTRS", NULL}, + {PAPI_PROFIL, "PAPI_PROFIL", NULL}, + {PAPI_PRELOAD, "PAPI_PRELOAD", NULL}, + {PAPI_CLOCKRATE, "PAPI_CLOCKRATE", NULL}, + {PAPI_MAX_HWCTRS, "PAPI_MAX_HWCTRS", NULL}, + {PAPI_HWINFO, "PAPI_HWINFO", NULL}, + {PAPI_EXEINFO, "PAPI_EXEINFO", NULL}, + {PAPI_MAX_CPUS, "PAPI_MAX_CPUS", NULL}, + {PAPI_ATTACH, "PAPI_ATTACH", NULL}, + {PAPI_SHLIBINFO, "PAPI_SHLIBINFO", NULL}, + {PAPI_LIB_VERSION, "PAPI_LIB_VERSION", NULL}, + {PAPI_COMPONENTINFO, "PAPI_COMPONENTINFO", NULL}, + {PAPI_DETACH, "PAPI_DETACH", NULL}, + + {PAPI_GRN_THR, "PAPI_GRN_THR", NULL}, + {PAPI_GRN_MIN, "PAPI_GRN_MIN", NULL}, + {PAPI_GRN_PROC, "PAPI_GRN_PROC", NULL}, + {PAPI_GRN_PROCG, "PAPI_GRN_PROCG", NULL}, + {PAPI_GRN_SYS, "PAPI_GRN_SYS", NULL}, + {PAPI_GRN_SYS_CPU, "PAPI_GRN_SYS_CPU", NULL}, + {PAPI_GRN_MAX, "PAPI_GRN_MAX", NULL}, + + {PAPI_DERIVED, "PAPI_DERIVED", NULL}, + + {PAPI_PROFIL_POSIX, "PAPI_PROFIL_POSIX", NULL}, + {PAPI_PROFIL_RANDOM, "PAPI_PROFIL_RANDOM", NULL}, + {PAPI_PROFIL_WEIGHTED, "PAPI_PROFIL_WEIGHTED", NULL}, + {PAPI_PROFIL_COMPRESS, "PAPI_PROFIL_COMPRESS", NULL}, + {PAPI_PROFIL_BUCKET_16, "PAPI_PROFIL_BUCKET_16", NULL}, + {PAPI_PROFIL_BUCKET_32, "PAPI_PROFIL_BUCKET_32", NULL}, + {PAPI_PROFIL_BUCKET_64, "PAPI_PROFIL_BUCKET_64", NULL}, + + {PAPI_USR1_LOCK, "PAPI_USR1_LOCK", NULL}, + {PAPI_USR2_LOCK, "PAPI_USR2_LOCK", NULL}, + {PAPI_LOCK_USR1, "PAPI_LOCK_USR1", NULL}, + {PAPI_LOCK_USR2, "PAPI_LOCK_USR2", NULL}, + {PAPI_LOCK_NUM, "PAPI_LOCK_NUM", NULL}, + + {PAPI_MH_MAX_LEVELS, "PAPI_MH_MAX_LEVELS", NULL}, + + {PAPI_USR1_TLS, "PAPI_USR1_TLS", NULL}, + {PAPI_USR2_TLS, "PAPI_USR2_TLS", NULL}, + {PAPI_TLS_USR1, "PAPI_TLS_USR1", NULL}, + {PAPI_TLS_USR2, "PAPI_TLS_USR2", NULL}, + {PAPI_TLS_HIGH_LEVEL, "PAPI_TLS_HIGH_LEVEL", NULL}, + {PAPI_TLS_NUM, "PAPI_TLS_NUM", NULL}, + +/* Fortran offsets into PAPI_dmem_info_t structure. */ + {PAPIF_DMEM_VMSIZE, "PAPIF_DMEM_VMSIZE", NULL}, + {PAPIF_DMEM_RESIDENT, "PAPIF_DMEM_RESIDENT", NULL}, + {PAPIF_DMEM_HIGH_WATER, "PAPIF_DMEM_HIGH_WATER", NULL}, + {PAPIF_DMEM_SHARED, "PAPIF_DMEM_SHARED", NULL}, + {PAPIF_DMEM_TEXT, "PAPIF_DMEM_TEXT", NULL}, + {PAPIF_DMEM_LIBRARY, "PAPIF_DMEM_LIBRARY", NULL}, + {PAPIF_DMEM_HEAP, "PAPIF_DMEM_HEAP", NULL}, + {PAPIF_DMEM_LOCKED, "PAPIF_DMEM_LOCKED", NULL}, + {PAPIF_DMEM_STACK, "PAPIF_DMEM_STACK", NULL}, + {PAPIF_DMEM_PAGESIZE, "PAPIF_DMEM_PAGESIZE", NULL}, + {PAPIF_DMEM_MAXVAL, "PAPIF_DMEM_MAXVAL", NULL}, + +/* PAPI error defines */ + /* 0 */ {PAPI_OK, "PAPI_OK", "No error"}, + /* 1 */ {PAPI_EINVAL, "PAPI_EINVAL", "Invalid argument"}, + /* 2 */ {PAPI_ENOMEM, "PAPI_ENOMEM", "Insufficient memory"}, + /* 3 */ {PAPI_ESYS, "PAPI_ESYS", "A System/C library call failed"}, + /* 4 */ {PAPI_ECMP, "PAPI_ECMP", "Not supported by component"}, + /* 5 */ {PAPI_ECLOST, "PAPI_ECLOST", "Access to the counters was lost or interrupted"}, + /* 6 */ {PAPI_EBUG, "PAPI_EBUG", "Internal error, please send mail to the developers"}, + /* 7 */ {PAPI_ENOEVNT, "PAPI_ENOEVNT", "Event does not exist"}, + /* 8 */ {PAPI_ECNFLCT, "PAPI_ECNFLCT", "Event exists, but cannot be counted due to hardware resource limits"}, + /* 9 */ {PAPI_ENOTRUN, "PAPI_ENOTRUN", "EventSet is currently not running"}, + /*10 */ {PAPI_EISRUN, "PAPI_EISRUN", "EventSet is currently counting"}, + /*11 */ {PAPI_ENOEVST, "PAPI_ENOEVST", "No such EventSet available"}, + /*12 */ {PAPI_ENOTPRESET, "PAPI_ENOTPRESET", "Event in argument is not a valid preset"}, + /*13 */ {PAPI_ENOCNTR, "PAPI_ENOCNTR", "Hardware does not support performance counters"}, + /*14 */ {PAPI_EMISC, "PAPI_EMISC", "Unknown error code"}, + /*15 */ {PAPI_EPERM, "PAPI_EPERM", "Permission level does not permit operation"}, + /*16 */ {PAPI_ENOINIT, "PAPI_ENOINIT", "PAPI hasn't been initialized yet"}, + /*17 */ {PAPI_ENOCMP, "PAPI_ENOCMP", "Component Index isn't set"}, + /*18 */ {PAPI_ENOSUPP, "PAPI_ENOSUPP", "Not supported"}, + /*19 */ {PAPI_ENOIMPL, "PAPI_ENOIMPL", "Not implemented"}, + /*20 */ {PAPI_EBUF, "PAPI_EBUF", "Buffer size exceeded"}, + /*21 */ {PAPI_EINVAL_DOM, "PAPI_EINVAL_DOM", "EventSet domain is not supported for the operation"}, + /*22 */ {PAPI_EATTR, "PAPI_EATTR", "Invalid or missing event attributes"}, + /*23 */ {PAPI_ECOUNT, "PAPI_ECOUNT", "Too many events or attributes"}, + /*24 */ {PAPI_ECOMBO, "PAPI_ECOMBO", "Bad combination of features"} + +}; + + +enum deftype_t +{ CDEFINE, F77DEFINE, F90DEFINE }; +static char comment_char = 'C'; + +static void +define_val( const char *val_string, int val, enum deftype_t deftype ) +{ + char value[20]; + /* The Fortran spec defines negative numbers as the negation of a positive number. + Because of that definition, the largest possible 2's complement negative + number cannot be legally expressed in Fortran. Compiler behavior is undefined + and unpredictable on this issue. + Several FORTRAN compilers (GNU Fortran (GCC) > 4.2.3, others?) + will throw errors or warnings for an explicit numeric value of -2147483648, + However, they don't object to an arithmetic evaluation that produces the + desired value. This value happens to be used for the PAPI preset + PAPI_L1_DCM, and PAPI_DOM_HWSPEC. + The hack below works around that limitation. + */ + if ( ( ( unsigned ) val ) == 0x80000000 ) { + sprintf( value, "((-2147483647) - 1)" ); + } else { + sprintf( value, "%d", val ); + } + + switch ( deftype ) { + case CDEFINE: + printf( "#define %-18s %s\n", val_string, value ); + break; + case F77DEFINE: + printf( " INTEGER %-18s\n PARAMETER (%s=%s)\n", val_string, + val_string, value ); + break; + case F90DEFINE: + printf( " INTEGER, PARAMETER :: %-18s = %s\n", val_string, value ); + break; + } +} + +static void +createDef( char *title, const hwi_describe_t * descr, int size, + enum deftype_t deftype ) +{ + int i, j; + /* compute the size of the predefined array */ + j = size / sizeof ( hwi_describe_t ); + + /* create defines for each line in the general arrays */ + printf( "\n%c\n%c %s\n%c\n\n", comment_char, comment_char, title, + comment_char ); + for ( i = 0; i < j; i++ ) + define_val( descr[i].name, descr[i].value, deftype ); +} + + +int +main( int argc, char **argv ) +{ + int i; + enum deftype_t deftype = CDEFINE; + + if ( argc > 1 ) { + if ( strcmp( argv[1], "-f77" ) == 0 ) { + deftype = F77DEFINE; + comment_char = '!'; + } else if ( strcmp( argv[1], "-f90" ) == 0 ) { + deftype = F90DEFINE; + comment_char = '!'; + } else if ( strcmp( argv[1], "-c" ) == 0 ) { + deftype = CDEFINE; + comment_char = 'C'; + } else { + fprintf( stderr, "Usage: %s [ -c | -f77 | -f90 ]\n", argv[0] ); + exit( 1 ); + } + } + + /* print a file header block */ + printf + ( "%c\n%c This file contains defines required by the PAPI Fortran interface.\n", + comment_char, comment_char ); + printf( "%c It is automagically generated by genpapifdef.c\n", + comment_char ); + printf( "%c DO NOT modify its contents and expect the changes to stick.\n", + comment_char ); + printf( "%c Changes MUST be made in genpapifdef.c instead.\n%c\n\n", + comment_char, comment_char ); + + /* create defines for the internal array pairs */ + createDef( "General purpose defines.", _papi_def, sizeof ( _papi_def ), + deftype ); + /* create defines for each member of the PRESET array */ + printf( "\n%c\n%c PAPI preset event values.\n%c\n\n", comment_char, + comment_char, comment_char ); + + for ( i = 0; i < PAPI_MAX_PRESET_EVENTS; i++ ) { + if ( _papi_hwi_presets[i].symbol ) { /* if the event is in the preset table */ + define_val( _papi_hwi_presets[i].symbol, + ( i | PAPI_PRESET_MASK ), deftype ); + } + } + exit( 0 ); +} diff --git a/src/libpapi.exp b/src/libpapi.exp new file mode 100644 index 0000000..15ff317 --- /dev/null +++ b/src/libpapi.exp @@ -0,0 +1,61 @@ +PAPI_accum +PAPI_add_event +PAPI_add_events +PAPI_cleanup_eventset +PAPI_create_eventset +PAPI_destroy_eventset +PAPI_enum_event +PAPI_event_code_to_name +PAPI_event_name_to_code +PAPI_get_event_info +PAPI_get_executable_info +PAPI_get_hardware_info +PAPI_get_multiplex +PAPI_get_opt +PAPI_get_real_cyc +PAPI_get_real_usec +PAPI_get_shared_lib_info +PAPI_get_thr_specific +PAPI_get_overflow_event_index +PAPI_get_virt_cyc +PAPI_get_virt_usec +PAPI_is_initialized +PAPI_library_init +PAPI_list_events +PAPI_lock +PAPI_multiplex_init +PAPI_num_hwctrs +PAPI_num_events +PAPI_overflow +PAPI_perror +PAPI_profil +PAPI_query_event +PAPI_read +PAPI_register_thread +PAPI_remove_event +PAPI_remove_events +PAPI_reset +PAPI_set_debug +PAPI_set_domain +PAPI_set_granularity +PAPI_set_multiplex +PAPI_set_opt +PAPI_set_thr_specific +PAPI_shutdown +PAPI_sprofil +PAPI_start +PAPI_state +PAPI_stop +PAPI_strerror +PAPI_thread_id +PAPI_thread_init +PAPI_unlock +PAPI_write +PAPI_accum_counters +PAPI_num_counters +PAPI_read_counters +PAPI_start_counters +PAPI_stop_counters +PAPI_flips +PAPI_flops +PAPI_ipc diff --git a/src/libpfm-3.y/COPYRIGHT b/src/libpfm-3.y/COPYRIGHT new file mode 100644 index 0000000..dc951b5 --- /dev/null +++ b/src/libpfm-3.y/COPYRIGHT @@ -0,0 +1,18 @@ +Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/libpfm-3.y/ChangeLog b/src/libpfm-3.y/ChangeLog new file mode 100644 index 0000000..70b6fa2 --- /dev/null +++ b/src/libpfm-3.y/ChangeLog @@ -0,0 +1,333 @@ +2006-08-21 Stephane Eranian + This file will not be updated anymore, Refer to + SF.net CVS log for diff information + +2006-07-10 Stephane Eranian + * removed PFM_FL_X86_INSECURE because it is not needed anymore + * removed perfmon_i386.h and perfmon_mips64.h because empty + +2006-06-28 Stephane Eranian + * added pfmsetup.c (Kevin Corry IBM) + * fixed pfmsetup.c to correctly handle sampling format uuid + +2006-06-28 Stephane Eranian + * added libpfm_montecito.3 man page + * updated libpfm_itanium2.3 man page + * removed pfm_print_event_info() and related calls from library + * removed unused pfmlib_mont_ipear_mode_t struct + * remove etb_ds from Montecito ETB struct as it can only have one value + * added showevtinfo.c example + * added PFMLIB_ITA2_EVT_NO_SET to pfmlib_itanium2.h + * added PFMLIB_MONT_EVT_NO_SET to pfmlib_montecito.h + * replaced pfm_mont_get_event_caf() by pfm_mont_get_event_type() + * added missing perfmon_compat.h from include install (Will Cohen) + * fortify showreginfo.c for FC6 (Will Cohen) + +2006-06-13 Stephane Eranian + * added generic support or event umask (Kevin Corry from IBM) + * changed detect_pmcs.c to use pfm-getinfo_evtsets() + * updated all examples to use the new detect_unavailable_pmcs() + * the examples require 2.6.17-rc6 to run + +2006-05-22 Stephane Eranian + * corrected architected IA-32 PMU detection code, e.g., PIC assembly + * fixed counter width of IA-32 architected PMU to 32 + * fixed definition of perfevtsel to 64-bit wide for IA-32 architected PMU + +2006-05-11 Stephane Eranian + * added support for IA-32 architected PMU as specified + in the latest IA-32 architecure manuals. There is enough + to support miinual functionalities on Core Duo/Solo processors + + * updated system call number to match those used with 2.6.17-rc4 + * enhanced i386_p6 model detection code + +2006-04-25 Stephane Eranian + * updated pfmlib_gen_mips64.c with latest code from Phil Mucci + * introduced get_event_code_counter() internal method to handle + the fact that on smoe MPU (MIPS) an event may have a different + value based on the counter it is assigned to. This is a superset + of the previous get_event_code(). added PFMLIB_CNT_FIRST to ask + for first value (or don't care) + +2006-04-05 Stephane Eranian + * added support for install_prefix in makefile + * fixed broken ETB_EVENT (not report has ETB event) + * added BRANCH_EVENT as alias to ETB_EVENT for Montecito + * added support for unavailable PMC registers to pfm_dispatch_events() + * added detect_pmcs.c, detect_pmcs.h in examples + * updated all generic examples to use detect_unavail_pmcs() helper function + * updated pfm_dispatch_events() man pages + * cleanup PFMLIB_REGMASK_*, change to pfm_regmask_* + * created a separate set of man pages for all pfm_regmask_* functions + +2006-04-04 Stephane Eranian + * fixed makefile in include to install + perfmon_i386.h for x86_64 install (Will Cohen from Redhat) + * install pfmlib_montecito.h on IA64 + +2006-04-05 Stephane Eranian + * updated system call numbers to 2.6.17-rc1 + * incorporated a type change for reg_value + in pfmlib.h (Kevin Corry from IBM) + +2006-03-22 Stephane Eranian + * changed HT detection for PEBS examples + +2006-03-07 Stephane Eranian + * updated to 2.6.16-rc5 new perfmon code base support + * added preliminary Montecito support + * incorporated AMD provided event list for X86-64 (Ray Bryant) + * renamed all GEN_X86_64 gen_x86_64 to amd_x86_64 + * removed PFM_32BIT_ABI_64BIT_OS, ABI now supports ILP32,LP64 + without special compilation + +2006-01-16 Stephane Eranian + * added PFM_32BIT_ABI_64BIT_OS to allow + 32-bit compile (32-bit ABI) for a 64-bit OS + * added C++ support to perfmon header files + * added MIPS64 (5K,20K) support (provided by Phil Mucci) + * restructured *_standalone.c examples + * added pfm_get_event_code_counter() and man page + * changed implementation of pfm_get_num_pm*() + * remove non-sense example task_view.c + * added support for MIPS in some examples + +2006-01-09 Stephane Eranian + * examples code cleanups + * example support up to 2048 CPU (syst.c) + * portable sampling examples support more than 64 PMDs +2005-12-15 Stephane Eranian + * updated all examples to new pfm_create_context() prototype + * fixed some type mismatch in pfmlib_itanium2.c + * required for 2.6.15-rc5-git3 kernel patch +2005-10-18 Stephane Eranian + * forced perfsel.en bit to 1 for X86-64 and i386/p6 + * inverted reset mask to be more familiar in examples/showreginfo.c + * updated P4 examples to force enable bit to 1 + +2005-09-28 Stephane Eranian + * split p6/pentium M event tables. Pentium M + adds a few more events and changes the semantic + of some. + * added smpl_standalone.c, notify_standalone.c + and ia32/smpl_pebs.c + * cleanup the examples some more + * updated multiplex. to match structure of multiplex2.c + * updated perfmon2 kernel headers to match 2.6.14-rc2-mm1 + release + * added man pages for libpfm_p6 and libpfm_x86_64 + * fixed handling of edge field for P6 + +2005-08-01 Stephane Eranian + * switch all examples in examples/dir to use the + multi system call interface. + * updated perfmon.h/perfmon_compat.h to latest + kernel interface (multi syscall) + +2004-06-24 Stephane Eranian + * fixed Itanium2 events tables L2_FORCE_RECIRC_* + and L2_L3ACCESS_* events can only be measured by PMC4 + + * fixed pfm_*_get_event_counters(). It would always + return the counter mask for event index 0. + +2004-06-24 Stephane Eranian + * fixed pfm_print_event_info_*() because it would not print + the PMC/PMD mask correctly + * updated pfm_dispatch_*ear() for Itanium2 + * updated pfm_dispatch_irange() for Itanium2 + * updated pfm_ita2_print_info() + * updated pfm_ita2_num_pmcs() and pfm_ita2_num_pmds() + +2004-02-12 Stephane Eranian + * fixed a bug in pfmlib_itanium2.c which cause measurements + using opcode matching with an event different from + IA64_TAGGED_INST_RETIRED* to return wrong results, i.e., + opcode filter was ignored. + +2003-11-21 Stephane Eranian + * changed interface to pfm_get_impl_*() to use + a cleaner definition for bitmasks. pfmlib_regmask_t is + now a struct and applications must use accesor macros + PFMLIB_REGMASK_*() + * added pfm_get_num_pmcs(), pfm_get_num_pmds(), pfm_get_num_counters() + * updated man pages to reflect changes + * cleanup all examples to reflect bitmask changes + +2003-10-24 Stephane Eranian + * added reserved fields to the key pfmlib structure for future + extensions (recompilation from beta required). + +2003-10-24 Stephane Eranian + * released beta of version 3.0 + * some of the changes not reported by older entries: + * removed freesmpl.c example + * added ita2_btb.c, ita2_dear.c, ita_dear.c, multiplex.c + * added task_attach.c, task_attach_timeout.c, task_smpl.c + * added missing itanium2 events, mostly subevent combinations for + SYLL_NOT_DISPERSED, EXTERN_DP_PINS_0_TO_3, and EXTERN_DP_PINS_4_TO_5 + * got rid of pfm_get_first_event(), pfm_get_next_event(). First valid + index is always 0, use pfm_get_num_events() to find last event index + * renamed pfm_stop() to pfm_self_stop(), pfm_start() + to pfm_self_start() + * updated all examples to perfmon2 interface + * added notify_self2.c, notify_self3.c examples + * updated perfmon.h/perfmon_default_smpl.h to reflect latest + perfmon-2 changes (2.6.0-test8) + +2003-08-25 Stephane Eranian + * allowed mulitple EAR/BTB events + * really implemented the 4 different ways + of programming EAR/BTB + +2003-07-30 Stephane Eranian + * updated all man pages to reflect changes for 3.0 + * more cleanups in the examples to make all package compile + without warning with ecc + +2003-07-29 Stephane Eranian + * fixed a limitation in the iod_table[] used if dispatch_drange(). + Pure Opc mode is possible using the IBR/Opc mode. Reported by + Geoff Kent at UIUC. + * cleaned up all functions using a bitmask as arguments + +2003-06-30 Stephane Eranian + * added pfm_get_max_event_name_len() + * unsigned vs. int cleanups + * introduced pfm_*_pmc_reg_t and pfm_*_pmd_reg_t + * cleaned up calls using bitmasks + * renamed PMU_MAX_* to PFMLIB_MAX_* + * got rid of PMU_FIRST_COUNTER + * introduced pfmlib_counter_t + * internal interface changes, renaming: pmu_name vs name + * got rid of char **name and replaced with char *name, int maxlen + * added pfm_start(), pfm_stop() as real functions + * changed interface of pfm_dispatch_events to make input vs. output + parameters more explicit + * model-specific input/output to pfm_dispatch_event() now arguments + instead of being linked from the generic argument. + +2003-06-27 Stephane Eranian + * added missing const to char arguments for + pfm_find_event, pfm_find_event_byname, + pfm_print_event_info. Suggestion by Hans + * renamed pfp_pc to pfp_pmc + * renamed pfp_pc_count to pfp_pmc_count + +2003-06-11 Stephane Eranian + * updated manuals to reflect library changes + * updated all examples to match the new Linux/ia64 + kernel interface (perfmon2). + +2003-06-10 Stephane Eranian + * fix pfmlib_itanium.c: dispatch_dear(), dispatch_iear() + to setup EAR when there is an EAR event but no + detailed setting in ita_param. + * added pfm_ita_ear_mode_t to pfmlib_itanium.h + * added pfm_ita_get_ear_mode() to pfmlib_itanium.h + +2003-06-06 Stephane Eranian + * add a generic call to return hardware counter + width: pfm_get hw_counter_width() + * updated perfmon.h to perfmon2 + * added flag to itanium/itanium2 specific parameter + to tell the library to ignore per-even qualifier + constraints. + see PFMLIB_ITA_FL_CNT_NO_QUALCHECK and + PFMLIB_ITA2_FL_CNT_NO_QUALCHECK. + +2003-05-06 Stephane Eranian + * got rid of all connections to perfmon.h. + the library is now fully self-contained. + pfarg_reg_t has been replaced by pfmlib_reg_t. + +2002-03-20 Stephane Eranian + * fix %x vs. %lx for pmc8/9 in pfmlib_itanium.c + and pfmlib_itanium2.c + +2002-12-20 Stephane Eranian + * added PFM_FL_EXCL_IDLE to perfmon.h + +2002-12-18 Stephane Eranian + * clear ig_ad, inv fields in PMC8,9 when no code range + restriction is used. + +2002-12-17 Stephane Eranian + * update pfm_initialize.3 to clarify when this + function needs to be called. + +2002-12-10 Stephane Eranian + * changed _SYS_PERFMON.h to _PERFMON_PERFMON.h + +2002-12-06 Stephane Eranian + * integrated Peter Chubb's Debian script fixes + * fixed the Debian script to include the examples + +2002-12-05 Stephane Eranian + * added man pages for pfm_start() and pfm_stop() + * release 2.0 beta for review + +2002-12-04 Stephane Eranian + * the pfmlib_param_t structure now contains + the pmc array (pfp_pc[]) as well as a counter representing the + number of valid entries written to pfp_pc[]. cleaned up all + modules and headers to reflect changes. + * added pfm_ita2_is_fine_mode() to test whether or not fine + mode was used for code ranges. + +2002-12-03 Stephane Eranian + * removed pfm_ita_ism from pfmlib_ita_param_t + * removed pfm_ita2_ism from pfmlib_ita2_param_t + * added libpfm.3, libpfm_itanium.3, libpfm_itanium2.3 + * enabled per-range privilege level mask in pfmlib_itanium.c + and pfmlib_itanium2.c + +2002-11-21 Stephane Eranian + * added pfmlib_generic.h to cleanup pfmlib.h + * dropped retry argument to pfm_find_event() + * got rid of the pfm_find_byvcode*() interface (internal only) + * cleanup up interface code is int not unsigned long + * added man pages in docs/man for the generic library interface + * moved the PMU specific handy shortcuts for register struct to + module specific file. Avoid possible conflicts in applications + using different PMU models in one source file. + +2002-11-20 Stephane Eranian + * separated the library, headers, examples from the pfmon tool + * changed license of library to MIT-style license + * set version number to 2.0 + * added support to generate a shared version of libpfm + * fix pfm_dispatch_opcm() to check for effective use of + IA64_TAGGED_INST_IBRPX_PMCY before setting the bits in PMC15 + (spotted by UIUC Impact Team). + * cleaned up error messages in the examples + * fix bug in pfm_ita2_print_info() which caused extra umask bits to be + displayed for EAR. + +2002-11-19 Stephane Eranian + * added pfm_get_impl_counters() to library interface and PMU models + * added missing support for pfm_get_impl_pmds(), pfm_get_impl_pmcs() + to pfmlib_generic.c + * created pfmlib_compiler.h to encapsulate inline assembly differences + between compilers. + * created pfmlib_compiler_priv.h to encapsulate the inline assembly + differences for library private code. + +2002-11-13 Stephane Eranian + * fixed definition of pmc10 in pfmlib_itanium2.h to account for + a layout difference between cache and TLB mode (spotted by UIUC Impact Team). + Was causing problems with some latency values in IEAR cache mode. + * fixed initialization of pmc10 in pfmlib_itanium2.c to reflect above + change. + +2002-10-14 Stephane Eranian + * fixed impl_pmds[] in pfmlib_itanium.c and pfmlib_itanium2.c. + PMD17 was missing. + +2002-09-09 Stephane Eranian + * updated include/perfmon/perfmon.h to include sampling period + randomization. + +2002-08-14 Stephane Eranian + * fix bitfield length for pmc14_ita2_reg and pmd3_ita2_reg in pfmlib_itanium2.h (David Mosberger) diff --git a/src/libpfm-3.y/Makefile b/src/libpfm-3.y/Makefile new file mode 100644 index 0000000..032b045 --- /dev/null +++ b/src/libpfm-3.y/Makefile @@ -0,0 +1,76 @@ +# +# Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +# +# Look in config.mk for options +# +TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi) + +include config.mk + +DIRS=lib include docs + +EXAMPLES_DIRS = examples_v2.x + +ifneq ($(CONFIG_PFMLIB_OLD_PFMV2),y) +EXAMPLES_DIRS += examples_v3.x +endif + +ifeq ($(ARCH),ia64) +DIRS +=examples_ia64_v2.0 +endif + +ifeq ($(SYS),Linux) +DIRS +=libpfms +endif + +DIRS += $(EXAMPLES_DIRS) + +all: + @echo Compiling for \'$(ARCH)\' target + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done + +lib: + $(MAKE) -C lib + +clean: + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done + +distclean: clean + +depend: + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done + +tar: clean + a=`basename $$PWD`; cd ..; tar zcf $$a.tar.gz $$a; echo generated ../$$a.tar.gz; + +tarcvs: clean + a=`basename $$PWD`; cd ..; tar --exclude=CVS -zcf $$a.tar.gz $$a; echo generated ../$$a.tar.gz; +install: + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done + +install_examples: + @set -e ; for d in $(EXAMPLES_DIRS) ; do $(MAKE) -C $$d $@ ; done + +.PHONY: tar tarcvs lib + +# DO NOT DELETE diff --git a/src/libpfm-3.y/README b/src/libpfm-3.y/README new file mode 100644 index 0000000..630cd40 --- /dev/null +++ b/src/libpfm-3.y/README @@ -0,0 +1,103 @@ + ------------------------------------------------------ + libpfm-3.10: + a helper library to program the Performance Monitoring Unit (PMU) + ------------------------------------------------------ + Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. + Contributed by Stephane Eranian + + +This package provides a library, called libpfm, which can be used +to develop monitoring tools which use the Performance Monitoring Unit (PMU) +of several modern processors. + + +This version of libpfm supports: + - For Intel IA-64: + Itanium (Merced), Itanium 2 (McKinley, Madison, Deerfield), + Itanium 2 9000/9100 (Montecito, Montvale) and Generic + - For AMD X86: + AMD64 (K8, family 10h) + - For Intel X86: + Intel P6 (Pentium II, Pentium Pro, Pentium III, Pentium M) + Intel Yonah (Core Duo/Core Solo), + Intel Netburst (Pentium 4, Xeon) + Intel Core (Merom, Penryn, Dunnington) Core 2 and Quad + Intel Atom + Intel Nehalem (Nehalem, Westmere) + Intel architectural perfmon v1, v2, v3 + - For MIPS: + 5K, 20K, 25KF, 34K, 5KC, 74K, R10000, R12000, RM7000, RM9000, + SB1, VR5432, VR5500, SiCortex ICA9A/ICE9B + - For Cray: + XT3, XT4, XT5, XT5h, X2 + - For IBM: + IBM Cell processor + POWER: PPC970, PPC970MP, POWER4+, POWER5, POWER5+, POWER6, POWER7 + - For Sun: + Sparc: Ultra12, Ultra3, Ultra3i, Ultra3Plus, Ultra4Plus, + Sparc: Niagara1, Niagara2 + +The core library is generic and does not depend on the perfmon interface. It is +possible to use it on other operating systems. + +WHAT'S THERE +------------- + - the library source code including support for all processors listed + above + + - a set of examples showing how the library can be used with the + perfmon2 and perfmon3 kernel interface. + + - a set of older examples for IA-64 only using the legacy perfmon2 + interface (v2.0). + + - a set of library header files and the perfmon2 and perfmon3 kernel + interface headers + + - libpfms: a simple library to help setup SMP system-wide monitoring + sessions. It comes with a simple example. This library is not part + of libpfm. + + - man pages for all the library entry points + + - Python bindings for libpfm and the perfmon interface (experimental). + +INSTALLATION +------------ + - edit config.mk to : + - update some of the configuration variables + - make your compiler options + + - type make + - type make install + + - To compile and install the Python bindings, you need to go to the + python sub-directory and type make. Python is not systematically + built + + - to compile the library for another ABI (e.g. 32-bit x86 on a + 64-bit x86) system, you can pass the ABI flag to the compiler as + follows (assuming you have the multilib version of gcc): + $ make OPTION="-m32 -O2" + +REQUIREMENTS: +------------- + - to run the programs in the examples subdir, you MUST be using a linux + kernel with perfmon3. Perfmon3 is available as a branch of the + perfmon kernel GIT tree on kernel.org. + + - to run the programs in the examples_v2x subdir, you MUST be using a + linux kernel with perfmon2. Perfmon2 is available as the main branch + of the perfmon kernel GIT tree on kernel.org. + + - On IA-64, the examples in old_interface_ia64_examples work with + any 2.6.x kernels. + + - to compile the Python bindings, you need to have SWIG and the python + development packages installed + +DOCUMENTATION +------------- + - man pages for all entry points + - More information can be found on library web site: + http://perfmon2.sf.net diff --git a/src/libpfm-3.y/TODO b/src/libpfm-3.y/TODO new file mode 100644 index 0000000..567e6a8 --- /dev/null +++ b/src/libpfm-3.y/TODO @@ -0,0 +1,10 @@ +TODO list: +---------- +- add Linux/ia64 perfmon support to GNU libc, this would avoid + having the perfmon.h perfmon_default_smpl.h headers here. + +- add library interface to help setup system-wide mode SMP + on Linux/ia64 + +- add support for cumulative calls to pfm_dispatch_events() + diff --git a/src/libpfm-3.y/config.mk b/src/libpfm-3.y/config.mk new file mode 100644 index 0000000..71cd8ff --- /dev/null +++ b/src/libpfm-3.y/config.mk @@ -0,0 +1,207 @@ +# +# Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# This file is part of libpfm, a performance monitoring support library for +# applications on Linux. +# + +# +# This file defines the global compilation settings. +# It is included by every Makefile +# +# +SYS := $(shell uname -s) +ARCH := $(shell uname -m) +ifeq (i686,$(findstring i686,$(ARCH))) +override ARCH=ia32 +endif +ifeq (i586,$(findstring i586,$(ARCH))) +override ARCH=ia32 +endif +ifeq (i486,$(findstring i486,$(ARCH))) +override ARCH=ia32 +endif +ifeq (i386,$(findstring i386,$(ARCH))) +override ARCH=ia32 +endif +ifeq (ppc,$(findstring ppc,$(ARCH))) +override ARCH=powerpc +endif +ifeq (sparc64,$(findstring sparc64,$(ARCH))) +override ARCH=sparc +endif + +# +# CONFIG_PFMLIB_SHARED: y=compile static and shared versions, n=static only +# CONFIG_PFMLIB_OLD_PFMV2: enable old ( 2.x, x <=4) perfmon2 (mutually exclusive with v3 support) +CONFIG_PFMLIB_SHARED?=y +CONFIG_PFMLIB_OLD_PFMV2?=n + +# +# Cray-X2 is cross-compiled. Check the programming environment +# +PE := $(shell echo $${CRAY_PE_TARGET}) +ifeq (cray-x2,$(PE)) +override ARCH=crayx2 +endif + +# +# Cell Broadband Engine is reported as PPC but needs special handling. +# +ifeq ($(SYS),Linux) +MACHINE := $(shell grep -q 'Cell Broadband Engine' /proc/cpuinfo && echo cell) +ifeq (cell,$(MACHINE)) +override ARCH=cell +endif +endif + +# +# Library version +# +VERSION=3 +REVISION=10 +AGE=0 + +# +# Where should things (lib, headers, man) go in the end. +# +install_prefix?=/usr/local +PREFIX?=$(install_prefix) +LIBDIR=$(PREFIX)/lib +INCDIR=$(PREFIX)/include +MANDIR=$(PREFIX)/share/man +EXAMPLESDIR=$(PREFIX)/share/doc/libpfm-$(VERSION).$(REVISION).$(AGE)/examples + +# +# Configuration Paramaters for libpfm library +# +ifeq ($(ARCH),ia64) +CONFIG_PFMLIB_ARCH_IA64=y +endif + +ifeq ($(ARCH),x86_64) +CONFIG_PFMLIB_ARCH_X86_64=y +endif + +ifeq ($(ARCH),ia32) +CONFIG_PFMLIB_ARCH_I386=y +endif + +ifeq ($(ARCH),mips64) +CONFIG_PFMLIB_ARCH_MIPS64=y +# +# SiCortex/Linux +# +MACHINE := $(shell test -f /etc/sicortex-release && echo sicortex) +ifeq (sicortex,$(MACHINE)) +CONFIG_PFMLIB_ARCH_SICORTEX=y +endif +endif + +ifeq ($(ARCH),powerpc) +CONFIG_PFMLIB_ARCH_POWERPC=y +endif + +ifeq ($(ARCH),sparc) +CONFIG_PFMLIB_ARCH_SPARC=y +endif + +ifeq ($(XTPE_COMPILE_TARGET),linux) +CONFIG_PFMLIB_ARCH_CRAYXT=y +CONFIG_PFMLIB_SHARED=n +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +ifeq ($(XTPE_COMPILE_TARGET),catamount) +CONFIG_PFMLIB_ARCH_CRAYXT=y +CONFIG_PFMLIB_SHARED=n +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +ifeq ($(ARCH),crayx2) +CONFIG_PFMLIB_ARCH_CRAYX2=y +CONFIG_PFMLIB_SHARED=n +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +ifeq ($(ARCH),cell) +CONFIG_PFMLIB_CELL=y +endif + +# handle special cases for 64-bit builds +ifeq ($(BITMODE),64) +ifeq ($(ARCH),powerpc) +CONFIG_PFMLIB_ARCH_POWERPC64=y +endif +endif + +# +# you shouldn't have to touch anything beyond this point +# + +# +# The entire package can be compiled using +# icc the Intel Itanium Compiler (7.x,8.x, 9.x) +# or GNU C +#CC=icc +CC?=gcc +LIBS= +INSTALL=install +LN?=ln -sf +PFMINCDIR=$(TOPDIR)/include +PFMLIBDIR=$(TOPDIR)/lib +DBG?=-g -Wall -Werror +# gcc/mips64 bug +ifeq ($(CONFIG_PFMLIB_ARCH_SICORTEX),y) +OPTIM?=-O +else +OPTIM?=-O2 +endif +CFLAGS+=$(OPTIM) $(DBG) -I$(PFMINCDIR) +MKDEP=makedepend +PFMLIB=$(PFMLIBDIR)/libpfm.a + +# Reset options for Cray XT +ifeq ($(CONFIG_PFMLIB_ARCH_CRAYXT),y) +LDFLAGS+=-static +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +# Reset the compiler for Cray-X2 (load x2-gcc module) +ifeq ($(CONFIG_PFMLIB_ARCH_CRAYX2),y) +CC=craynv-cray-linux-gnu-gcc +LDFLAGS+=-static +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_SICORTEX),y) +CONFIG_PFMLIB_OLD_PFMV2=y +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_POWERPC64),y) +CFLAGS+= -m64 +LDFLAGS+= -m64 +LIBDIR=$(PREFIX)/lib64 +endif + +ifeq ($(CONFIG_PFMLIB_OLD_PFMV2),y) +CFLAGS +=-DPFMLIB_OLD_PFMV2 +endif diff --git a/src/libpfm-3.y/docs/Makefile b/src/libpfm-3.y/docs/Makefile new file mode 100644 index 0000000..525c466 --- /dev/null +++ b/src/libpfm-3.y/docs/Makefile @@ -0,0 +1,72 @@ +# +# Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/.. + +include $(TOPDIR)/config.mk +include $(TOPDIR)/rules.mk + +ifeq ($(CONFIG_PFMLIB_ARCH_IA64),y) +ARCH_MAN=libpfm_itanium.3 libpfm_itanium2.3 libpfm_montecito.3 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_I386),y) +ARCH_MAN=libpfm_p6.3 libpfm_core.3 libpfm_amd64.3 libpfm_atom.3 libpfm_nehalem.3 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_X86_64),y) +ARCH_MAN=libpfm_amd64.3 libpfm_core.3 libpfm_atom.3 libpfm_nehalem.3 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_MIPS64),y) +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_POWERPC),y) +ARCH_MAN=libpfm_powerpc.3 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_CRAYXT),y) +endif + +ifeq ($(CONFIG_PFMLIB_CELL),y) +endif + +GEN_MAN= libpfm.3 pfm_dispatch_events.3 pfm_find_event.3 pfm_find_event_bycode.3 \ + pfm_find_event_bycode_next.3 pfm_find_event_mask.3 pfm_find_full_event.3 \ + pfm_force_pmu.3 pfm_get_cycle_event.3 pfm_get_event_code.3 pfm_get_event_code_counter.3 \ + pfm_get_event_counters.3 pfm_get_event_description.3 pfm_get_event_mask_code.3 \ + pfm_get_event_mask_description.3 pfm_get_event_mask_name.3 pfm_get_event_name.3 \ + pfm_get_full_event_name.3 pfm_get_hw_counter_width.3 pfm_get_impl_counters.3 \ + pfm_get_impl_pmcs.3 pfm_get_impl_pmds.3 pfm_get_inst_retired.3 pfm_get_max_event_name_len.3 \ + pfm_get_num_counters.3 pfm_get_num_events.3 pfm_get_num_pmcs.3 \ + pfm_get_num_pmds.3 pfm_get_pmu_name.3 pfm_get_pmu_name_bytype.3 \ + pfm_get_pmu_type.3 pfm_get_version.3 pfm_initialize.3 \ + pfm_list_supported_pmus.3 pfm_pmu_is_supported.3 pfm_regmask_and.3 \ + pfm_regmask_clr.3 pfm_regmask_copy.3 pfm_regmask_eq.3 pfm_regmask_isset.3 \ + pfm_regmask_or.3 pfm_regmask_set.3 pfm_regmask_weight.3 pfm_set_options.3 \ + pfm_strerror.3 + +MAN=$(GEN_MAN) $(ARCH_MAN) +install: + + -mkdir -p $(DESTDIR)$(MANDIR)/man3 + ( cd man3; $(INSTALL) -m 644 $(MAN) $(DESTDIR)$(MANDIR)/man3 ) diff --git a/src/libpfm-3.y/docs/man3/libpfm.3 b/src/libpfm-3.y/docs/man3/libpfm.3 new file mode 100644 index 0000000..4e0dad7 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm.3 @@ -0,0 +1,105 @@ +.TH LIBPFM 3 "March, 2008" "" "Linux Programmer's Manual" +.SH NAME +libpfm \- a helper library to program Hardware Performance Units (PMUs) +.SH SYNOPSIS +.nf +.B #include +.SH DESCRIPTION +The libpfm library is a helper library which is used by applications to +help program the Performance Monitoring Unit (PMU), i.e., the hardware +performance counters of modern processors. It provides a generic and portable +programming interface to help setup the PMU configuration registers given a +list of events to measure. + +A diversity of PMU hardware is supported, a list can be found below +under \fBSUPPORTED HARDWARE\fR. The library is primarily designed to be used in +conjunction with the Perfmon2 Linux kernel interface. However, at its core, +it is totally independent of that interface and could as well be used on other +operating systems. It is important to realize that the library does not make the +actual kernel calls to program the PMU, it simply helps applications figure out +which PMU registers to use to measure certain events or access certain advanced +PMU features. + +The library logically divides PMU registers into two categories. The +performance monitoring data registers (PMD) are used to collect results, e.g., +counts. The performance monitoring configuration registers (PMCS) are used +to indicate what events to measure or what feature to enable. Programming the +PMU consists in setting up the PMC registers and collecting the results in the +PMD registers. The central piece of the library is the \fBpfm_dispatch_events\fR +function. + +The number of PMC and PMD registers varies between architectures and +CPU models. The association of PMC to PMD can also change. Moreover +the number and encodings of events can also widely change. Finally, the +structure of a PMC register can also change. All these factors make it +quite difficult to write monitoring tools. + +This library is designed to simplify the programming of the PMC registers by +hiding the complexity behind a simple interface. The library does this without +limiting accessibility to model specific features by using a layered design. + +The library is structured in two layers. The common layer provides an interface +that is shared across all PMU models. This layer is good enough to setup simple +monitoring sessions which count occurrences of events. Then, there is a +model-specific layer which gives access to the model-specific features. +For instance, on Itanium, applications can use the library to setup the +registers for the Branch Trace Buffer. Model-specific interfaces have the +abbreviated PMU model name in their names. For instance, +\fBpfm_ita2_get_event_umask()\fR is an Itanium2 (ita2) specific function. + +When the library is initialized, it automatically probes the host CPU and +enables the right set of interfaces. + +The common interface is defined in the \fBpfmlib.h\fR header file. +Model-specific interfaces are defined in model-specific header files. +For instance, \fBpfmlib_amd64.h\fR provides the AMD64 interface. +.SH ENVIRONMENT VARIABLES +It is possible to enable certain debug output of the library using environment +variables. The following variables are defined: +.TP +.B LIBPFM_VERBOSE +Enable verbose output. Value must be 0 or 1. When not set, verbosity level +can be controlled with this function. +.TP +.B LIBPFM_DEBUG +Enable debug output. Value must be 0 or 1. When not set, debug level +can be controlled with this function. +.TP +.B LIBPFM_DEBUG_STDOUT +Redirect verbose and debug output to the standard output file descriptor (stdout). +By default, the output is directed to the standard error file descriptor (stderr). +.sp +Alternatively, it is possible to control verbosity and debug output using +the \fBpfm_set_options\fR function. +.LP +.SH SUPPORTED HARDWARE +.nf +libpfm_amd64(3) AMD64 processors K8 and Barcelona (families 0Fh and 10h) +libpfm_core(3) Intel Core processor family +libpfm_atom(3) Intel Atom processor family +libpfm_itanium(3) Intel Itanium +libpfm_itanium2(3) Intel Itanium 2 +libpfm_montecito(3) Intel dual-core Itanium 2 9000 (Montecito) +libpfm_p6(3) P6 processor family including the Pentium M processor +libpfm_powerpc(3) IBM PowerPC and POWER processor families + (PPC970(FX,GX), PPC970MP POWER4, POWER4+, POWER5, + POWER5+, and POWER6) +.fi +.SH AUTHORS +.nf +Stephane Eranian +Robert Richter +.if +.PP +.SH SEE ALSO +libpfm(3), libpfm_amd64(3), libpfm_core(3), libpfm_itanium2(3), +libpfm_itanium(3), libpfm_montecito(3), libpfm_p6(3), +libpfm_powerpc(3). +.nf +pfm_dispatch_events(3), pfm_find_event(3), pfm_set_options(3), +pfm_get_cycle_event(3), pfm_get_event_name(3), pfm_get_impl_pmcs(3), +pfm_get_pmu_name(3), pfm_get_version(3), pfm_initialize(3), +pfm_regmask_set(3), pfm_set_options(3), pfm_strerror(3). +.fi +.sp +Examples shipped with the library diff --git a/src/libpfm-3.y/docs/man3/libpfm_amd64.3 b/src/libpfm-3.y/docs/man3/libpfm_amd64.3 new file mode 100644 index 0000000..99e6412 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_amd64.3 @@ -0,0 +1,158 @@ +.TH LIBPFM 3 "April, 2008" "" "Linux Programmer's Manual" +.SH NAME +libpfm_amd64 - support for AMD64 processors +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the AMD64 processor +families 0Fh and 10H (K8, Barcelona, Phenom) when running in either +32-bit or 64-bit mode. The interface is defined in +\fBpfmlib_amd64.h\fR. It consists of a set of functions and structures +which describe and allow access to the AMD64 specific PMU +features. Note that it only supports AMD processors. +.sp +When AMD64 processor-specific features are needed to support a +measurement, their descriptions must be passed as model-specific input +arguments to the \fBpfm_dispatch_events()\fR function. The AMD64 +processor-specific input arguments are described in the +\fBpfmlib_amd64_input_param_t\fR structure and the output parameters +in \fBpfmlib_amd64_output_param_t\fR. They are defined as follows: +.sp +.nf +typedef struct { + uint32_t cnt_mask; + uint32_t flags; +} pfmlib_amd64_counter_t; + +typedef struct { + unsigned int maxcnt; + unsigned int options; +} ibs_param_t; + +typedef struct { + pfmlib_amd64_counter_t pfp_amd64_counters[PMU_AMD64_MAX_COUNTERS]; + uint32_t flags; + uint32_t reserved1; + ibs_param_t ibsfetch; + ibs_param_t ibsop; + uint64_t reserved2; +} pfmlib_amd64_input_param_t; + +typedef struct { + uint32_t ibsfetch_base; + uint32_t ibsop_base; + uint64_t reserved[7]; +} pfmlib_amd64_output_param_t; +.fi +.LP +The \fBflags\fR field of \fBpfmlib_amd64_input_param_t\fR describes +which features of the PMU to use. Following use flags exist: +.TP +.B PFMLIB_AMD64_USE_IBSFETCH +Profile IBS fetch performance (see below under \fBINSTRUCTION BASED +SAMPLING\fR) +.TP +.B PFMLIB_AMD64_USE_IBSOP +Profile IBS execution performance (see below under \fBINSTRUCTION BASED +SAMPLING\fR) +.LP +Multiple features can be selected. Note that there are no use flags +needed for \fBADDITIONAL PER-EVENT FEATURES\fR. +.LP +Various typedefs for MSR encoding and decoding are available. See +\fBpfmlib_amd64.h\fR for details. +.SS ADDITIONAL PER-EVENT FEATURES +AMD64 processors provide a few additional per-event features for +counters: thresholding, inversion, edge detection, +virtualization. They can be set using the \fBpfp_amd64_counters\fR +data structure for each event. The \fBflags\fR field of +\fBpfmlib_amd64_counter_t\fR can be initialized as follows: +.TP +.B PFMLIB_AMD64_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set +.TP +.B PFMLIB_AMD64_SEL_EDGE +Enables edge detection of events. +.TP +.B PFMLIB_AMD64_SEL_GUEST +On AMD64 Family 10h processors only. Event is only measured when +processor is in guest mode. +.TP +.B PFMLIB_AMD64_SEL_HOST +On AMD64 Family 10h processors only. Event is only measured when +processor is in host mode. +.LP +The \fBcnt_mask\fR field is used to set the event threshold. The +value of the counter is incremented each time the number of +occurrences per cycle of the event is greater or equal to the value of +the field. When zero all occurrences are counted. +.SS INSTRUCTION BASED SAMPLING (IBS) +The libpfm_amd64 provides access to the model specific feature +Instruction Based Sampling (IBS). IBS has been introduced with family +10h. +.LP +The IBS setup is using the structures \fBpfmlib_amd64_input_param_t\fR +and \fBpfmlib_amd64_output_param_t\fR with its members \fBflags\fR, +\fBibsfetch\fR, \fBibsop\fR, \fBibsfetch_base\fR, +\fBibsop_base\fR. The input arguments \fBibsop\fR and \fBibsfetch\fR +can be set in inp_mod (type \fBpfmlib_amd64_input_param_t\fR). The +corresponding \fBflags\fR must be set to enable a feature. +.LP +Both, IBS execution profiling and IBS fetch profiling, require a +maximum count value of the periodic counter (\fBmaxcnt\fR) as +parameter. This is a 20 bit value, bits 3:0 are always set to +zero. Additionally, there is an option (\fBoptions\fR) to enable +randomization (\fBIBS_OPTIONS_RANDEN\fR) for IBS fetch profiling. +.LP +The IBS registers IbsFetchCtl (0xC0011030) and IbsOpCtl (0xC0011033) +are available as PMC and PMD in Perfmon. The function +\fBpfm_dispatch_events()\fR initializes these registers according to +the input parameters in \fBpfmlib_amd64_input_param_t\fR. +.LP +Also, \fBpfm_dispatch_events()\fR passes back the index in pfp_pmds[] +of the IbsOpCtl and IbsFetchCtl register. For this there are the +entries \fBibsfetch_base\fR and \fBibsop_base\fR in +\fBpfmlib_amd64_output_param_t\fR. The index may vary depending on +other PMU settings, especially counter settings. If using the PMU with +only one IBS feature and no counters, the index of the base register +is 0. +.LP +Example code: +.LP +.nf + /* initialize IBS */ + inp_mod.ibsop.maxcnt = 0xFFFF0; + inp_mod.flags |= PFMLIB_AMD64_USE_IBSOP; + ret = pfm_dispatch_events(NULL, &inp_mod, &outp, &outp_mod); + if (ret != PFMLIB_SUCCESS) { ... } + + /* setup PMU */ + /* PMC_IBSOPCTL */ + pc[0].reg_num = outp.pfp_pmcs[0].reg_num; + pc[0].reg_value = outp.pfp_pmcs[0].reg_value; + /* PMD_IBSOPCTL */ + pd[0].reg_num = outp.pfp_pmds[0].reg_num; + pd[0].reg_value = 0; + + /* setup sampling */ + pd[0].reg_flags = PFM_REGFL_OVFL_NOTIFY; + /* add range check here */ + pd[0].reg_smpl_pmds[0] = + ((1UL << PMD_IBSOP_NUM) - 1) << outp.pfp_pmds[0].reg_num; + + /* write pc and pd to PMU */ + ... +.fi +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHORS +.nf +Stephane Eranian +Robert Richter +.if +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_atom.3 b/src/libpfm-3.y/docs/man3/libpfm_atom.3 new file mode 100644 index 0000000..d86dd0e --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_atom.3 @@ -0,0 +1,70 @@ +.TH LIBPFM 3 "November, 2006" "" "Linux Programmer's Manual" +.SH NAME +libpfm_core - support for Intel Atom processor family +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the Intel Atom processor. This +processor implements Intel architectural perfmon v3 with Precise Event-Based +Sampling (PEBS) support. It also implements all architected events to which +it adds lots of Atom specific events. +.sp +The libpfm interface is defined in \fBpfmlib_intel_atom.h\fR. It consists +of a set of functions and structures which describe and allow access to the +Intel Atom processor specific PMU features. +.sp +When Intel Atom processor specific features are needed to support a measurement, their descriptions +must be passed as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The Intel +Atom processors specific input arguments are described in the \fBpfmlib_intel_atom_input_param_t\fR structure. +No output parameters are currently defined. The input parameters are defined as follows: +.sp +.nf +typedef struct { + unsigned int cnt_mask; + unsigned int flags; +} pfmlib_intel_atom_counter_t; + +typedef struct { + pfmlib_intel_atom_counter_t pfp_intel_atom_counters[PMU_INTEL_ATOM_NUM_COUNTERS]; + unsigned int pfp_intel_atom_pebs_used; + uint64_t reserved[4]; +} pfmlib_core_input_param_t; +.fi +.sp +.sp +The Intel Atom processor provides several additional per-event features for +counters: thresholding, inversion, edge detection, monitoring both threads. They +can be set using the \fBpfp_intel_atom_counters\fR data structure for each event. +The \fBflags\fR field can be initialized with any combinations of the following +values: +.TP +.B PFMLIB_INTEL_ATOM_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set +.TP +.B PFMLIB_INTEL_ATOM_SEL_EDGE +Enable edge detection of events. +.TP +.B PFMLIB_INTEL_ATOM_SEL_ANYTHR +Enable measuring the event in any of the two threads. By default only the current thread is measured. +.LP +The \fBcnt_mask\fR field is used to set the event threshold. +The value of the counter is incremented each time the number of occurrences +per cycle of the event is greater or equal to the value of the field. +Thus the event is modified to actually measure the number of qualifying cycles. +When zero all occurrences are counted (this is the default). +.sp +.SH Support for Precise-Event Based Sampling (PEBS) +The library can be used to setup the PMC registers when using PEBS. In this case, +the \fBpfp_intel_atom_pebs_used\fR field must be set to 1. When using PEBS, it is +not possible to use more than one event. +.LP +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_core.3 b/src/libpfm-3.y/docs/man3/libpfm_core.3 new file mode 100644 index 0000000..b738434 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_core.3 @@ -0,0 +1,72 @@ +.TH LIBPFM 3 "November, 2006" "" "Linux Programmer's Manual" +.SH NAME +libpfm_core - support for Intel Core processor family +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the Intel Core processor family, including +the Core 2 Duo and Quad series. The interface is defined in \fBpfmlib_core.h\fR. It consists +of a set of functions and structures which describe and allow access to the +Intel Core processors specific PMU features. +.sp +When Intel Core processor specific features are needed to support a measurement, their descriptions +must be passed as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The Intel Core +processors specific input arguments are described in the \fBpfmlib_core_input_param_t\fR structure. No +output parameters are currently defined. The input parameters are defined as follows: +.sp +.nf +typedef struct { + unsigned int cnt_mask; + unsigned int flags; +} pfmlib_core_counter_t; + +typedef struct { + unsigned int pebs_used; +} pfmlib_core_pebs_t; + +typedef struct { + pfmlib_core_counter_t pfp_core_counters[PMU_CORE_NUM_COUNTERS]; + pfmlib_core_pebs_t pfp_core_pebs; + uint64_t reserved[4]; +} pfmlib_core_input_param_t; +.fi +.sp +.sp +The Intel Core processor provides a few additional per-event features for +counters: thresholding, inversion, edge detection. They can be set using the +\fBpfp_core_counters\fR data structure for each event. The \fBflags\fR +field can be initialized with any combinations of the following values: +.TP +.B PFMLIB_CORE_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set +.TP +.B PFMLIB_CORE_SEL_EDGE +Enables edge detection of events. +.LP +The \fBcnt_mask\fR field is used to set the event threshold. +The value of the counter is incremented each time the number of occurrences +per cycle of the event is greater or equal to the value of the field. +Thus the event is modified to actually measure the number of qualifying cycles. +When zero all occurrences are counted (this is the default). +.sp +.SH Support for Precise-Event Based Sampling (PEBS) +The library can be used to setup the PMC registers when using PEBS. In this case, +the \fBpfp_core_pebs\fR structure must be used and the \fBpebs_used\fR field must +be set to 1. When using PEBS, it is not possible to use more than one event. +.SH Support for Intel Core 2 Duo and Quad processors +The Intel Core 2 Duo and Quad processors are based on the Intel Core micro-architecture. +They implement the Intel architectural PMU and some extensions such as PEBS. +They support all the architectural events and a lot more Core 2 specific events. +The library auto-detects the processor and provides access to Core 2 events whenever +possible. +.LP +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_itanium.3 b/src/libpfm-3.y/docs/man3/libpfm_itanium.3 new file mode 100644 index 0000000..298fba2 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_itanium.3 @@ -0,0 +1,396 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +libpfm_itanium - support for Itanium specific PMU features +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.BI "int pfm_ita_is_ear(unsigned int " i ");" +.BI "int pfm_ita_is_dear(unsigned int " i ");" +.BI "int pfm_ita_is_dear_tlb(unsigned int " i ");" +.BI "int pfm_ita_is_dear_cache(unsigned int " i ");" +.BI "int pfm_ita_is_iear(unsigned int " i ");" +.BI "int pfm_ita_is_iear_tlb(unsigned int " i ");" +.BI "int pfm_ita_is_iear_cache(unsigned int " i ");" +.BI "int pfm_ita_is_btb(unsigned int " i ");" +.BI "int pfm_ita_support_opcm(unsigned int " i ");" +.BI "int pfm_ita_support_iarr(unsigned int " i ");" +.BI "int pfm_ita_support_darr(unsigned int " i ");" +.BI "int pfm_ita_get_event_maxincr(unsigned int " i ", unsigned int *"maxincr ");" +.BI "int pfm_ita_get_event_umask(unsigned int " i ", unsigned long *"umask ");" +.sp +.SH DESCRIPTION +The libpfm library provides full support for all the Itanium specific features +of the PMU. The interface is defined in \fBpfmlib_itanium.h\fR. It consists +of a set of functions and structures which describe and allow access to the +Itanium specific PMU features. +.sp +The Itanium specific functions presented here are mostly used to retrieve +the characteristics of an event. Given a opaque event descriptor, obtained +by the \fBpfm_find_event()\fR or its derivative functions, they return a boolean value +indicating whether this event support this features or is of a particular +kind. +.sp +The \fBpfm_ita_is_ear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a EAR event, i.e., an Event Address Register +type of events. Otherwise 0 is returned. For instance, \fBDATA_EAR_CACHE_LAT4\fR is an ear event, but +\fBCPU_CYCLES\fR is not. It can be a data or instruction EAR event. +.sp +The \fBpfm_ita_is_dear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an Data EAR event. Otherwise 0 is returned. +It can be a cache or TLB EAR event. +.sp +The \fBpfm_ita_is_dear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_ita_is_dear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_ita_is_iear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR event. Otherwise 0 is returned. +It can be a cache or TLB instruction EAR event. +.sp +The \fBpfm_ita_is_iear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_ita_is_iear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_ita_support_opcm()\fR function returns 1 if the event +designated by \fBi\fR supports opcode matching, i.e., can this event be measured accurately +when opcode matching via PMC8/PMC9 is active. Not all events supports this feature. +.sp +The \fBpfm_ita_support_iarr()\fR function returns 1 if the event +designated by \fBi\fR supports code address range restrictions, i.e., can this event be measured accurately when +code range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_ita_support_darr()\fR function returns 1 if the event +designated by \fBi\fR supports data address range restrictions, i.e., can this event be measured accurately when +data range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_ita_get_event_maxincr()\fR function returns in \fBmaxincr\fR the maximum number of +occurrences per cycle for the event designated by \fBi\fR. Certain Itanium events can occur more than +once per cycle. When an event occurs more than once per cycle, the PMD counter will be incremented accordingly. +It is possible to restrict measurement when event occur more than once per cycle. For instance, +\fBNOPS_RETIRED\fR can happen up to 6 times/cycle which means that the threshold can be adjusted between 0 and 5, +where 5 would mean that the PMD counter would be incremented by 1 only when the nop instruction is executed more +than 5 times/cycle. This function returns the maximum number of occurrences of the event per cycle, and +is the non-inclusive upper bound for the threshold to program in the PMC register. +.sp +The \fBpfm_ita_get_event_umask()\fR function returns in \fBumask\fR the umask for the event +designated by \fBi\fR. +.sp + +When the Itanium specific features are needed to support a measurement their descriptions must be passed +as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The Itanium specific +input arguments are described in the \fBpfmlib_ita_input_param_t\fR structure and the output +parameters in \fBpfmlib_ita_output_param_t\fR. They are defined as follows: +.sp +.nf +typedef enum { + PFMLIB_ITA_ISM_BOTH=0, + PFMLIB_ITA_ISM_IA32=1, + PFMLIB_ITA_ISM_IA64=2 +} pfmlib_ita_ism_t; + +typedef struct { + unsigned int flags; + unsigned int thres; + pfmlib_ita_ism_t ism; +} pfmlib_ita_counter_t; + +typedef struct { + unsigned char opcm_used; + unsigned long pmc_val; +} pfmlib_ita_opcm_t; + +typedef struct { + unsigned char btb_used; + + unsigned char btb_tar; + unsigned char btb_tac; + unsigned char btb_bac; + unsigned char btb_tm; + unsigned char btb_ptm; + unsigned char btb_ppm; + unsigned int btb_plm; +} pfmlib_ita_btb_t; + +typedef enum { + PFMLIB_ITA_EAR_CACHE_MODE= 0, + PFMLIB_ITA_EAR_TLB_MODE = 1, +} pfmlib_ita_ear_mode_t; + +typedef struct { + unsigned char ear_used; + + pfmlib_ita_ear_mode_t ear_mode; + pfmlib_ita_ism_t ear_ism; + unsigned int ear_plm; + unsigned long ear_umask; +} pfmlib_ita_ear_t; + +typedef struct { + unsigned int rr_plm; + unsigned long rr_start; + unsigned long rr_end; +} pfmlib_ita_input_rr_desc_t; + +typedef struct { + unsigned long rr_soff; + unsigned long rr_eoff; +} pfmlib_ita_output_rr_desc_t; + + +typedef struct { + unsigned int rr_flags; + pfmlib_ita_input_rr_desc_t rr_limits[4]; + unsigned char rr_used; +} pfmlib_ita_input_rr_t; + +typedef struct { + unsigned int rr_nbr_used; + pfmlib_ita_output_rr_desc_t rr_infos[4]; + pfmlib_reg_t rr_br[8]; +} pfmlib_ita_output_rr_t; + +typedef struct { + pfmlib_ita_counter_t pfp_ita_counters[PMU_ITA_NUM_COUNTERS]; + + unsigned long pfp_ita_flags; + + pfmlib_ita_opcm_t pfp_ita_pmc8; + pfmlib_ita_opcm_t pfp_ita_pmc9; + pfmlib_ita_ear_t pfp_ita_iear; + pfmlib_ita_ear_t pfp_ita_dear; + pfmlib_ita_btb_t pfp_ita_btb; + pfmlib_ita_input_rr_t pfp_ita_drange; + pfmlib_ita_input_rr_t pfp_ita_irange; +} pfmlib_ita_input_param_t; + +typedef struct { + pfmlib_ita_output_rr_t pfp_ita_drange; + pfmlib_ita_output_rr_t pfp_ita_irange; +} pfmlib_ita_output_param_t; +.fi +.sp +.SH INSTRUCTION SET +.sp +The Itanium processor provides two additional per-event features for +counters: thresholding and instruction set selection. They can be set using the +\fBpfp_ita_counters\fR data structure for each event. The \fBism\fR +field can be initialized as follows: +.TP +.B PFMLIB_ITA_ISM_BOTH +The event will be monitored during IA-64 and IA-32 execution +.TP +.B PFMLIB_ITA_ISM_IA32 +The event will only be monitored during IA-32 execution +.TP +.B PFMLIB_ITA_ISM_IA64 +The event will only be monitored during IA-64 execution +.sp +.LP +If \fBism\fR has a value of zero, it will default to PFMLIB_ITA_ISM_BOTH. +.sp +The \fBthres\fR indicates the threshold for the event. A threshold of \fBn\fR means +that the counter will be incremented by one only when the event occurs more than \fBn\fR +times per cycle. + +The \fBflags\fR field contains event-specific flags. The currently defined flags are: +.sp +.TP +PFMLIB_ITA_FL_EVT_NO_QUALCHECK +When this flag is set it indicates that the library should ignore the qualifiers constraints +for this event. Qualifiers includes opcode matching, code and data range restrictions. When an +event is marked as not supporting a particular qualifier, it usually means that it is ignored, i.e., +the extra level of filtering is ignored. For instance, the CPU_CYCLES events does not support code +range restrictions and by default the library will refuse to program it if range restriction is also +requested. Using the flag will override the check and the call to the \fBpfm_dispatch_events()\fR function will succeed. +In this case, CPU_CYCLES will be measured for the entire program and not just for the code range requested. +For certain measurements this is perfectly acceptable as the range restriction will only be applied relevant +to events which support it. Make sure you understand which events do not support certain qualifiers before +using this flag. +.LP + +.SH OPCODE MATCHING +.sp +The \fBpfp_ita_pmc8\fR and \fBpfp_ita_pmc9\fR fields of type \fBpfmlib_ita_opcm_t\fR contain +the description of what to do with the opcode matchers. Itanium supports opcode matching via +PMC8 and PMC9. When this feature is used the \fBopcm_used\fR field must be set to 1, otherwise +it is ignored by the library. The \fBpmc_val\fR simply contains the raw value to store in +PMC8 or PMC9. The library does not modify the values for PMC8 and PMC9, they will be stored in +the \fBpfp_pmcs\fR table of the generic output parameters. + +.SH EVENT ADDRESS REGISTERS +.sp +The \fBpfp_ita_iear\fR field of type \fBpfmlib_ita_ear_t\fR describes what to do with instruction +Event Address Registers (I-EARs). Again if this feature is used the \fBear_used\fR must be set to 1, +otherwise it will be ignored by the library. The \fBear_mode\fR must be set to either one of +\fBPFMLIB_ITA_EAR_TLB_MODE\fR, \fBPFMLIB_ITA_EAR_CACHE_MODE\fRto indicate the type of EAR to program. +The umask to store into PMC10 must be in \fBear_umask\fR. The privilege level mask at which the I-EAR will be +monitored must be set in \fBear_plm\fR which can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, +\fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBear_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +Finally the instruction set for which to monitor is in \fBear_ism\fR and can be any one of +\fBPFMLIB_ITA_ISM_BOTH\fR, \fBPFMLIB_ITA_ISM_IA32\fR, or \fBPFMLIB_ITA_ISM_IA64\fR. +.sp +The \fBpfp_ita_dear\fR field of type \fBpfmlib_ita_ear_t\fR describes what to do with data Event Address +Registers (D-EARs). The description is identical to the I-EARs except that it applies to PMC11. + +In general, there are four different methods to program the EAR (data or instruction): +.TP +.B Method 1 +There is an EAR event in the list of events to monitor and \fBear_used\fR is cleared. In this +case the EAR will be programmed (PMC10 or PMC11) based on the information encoded in the event. +A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count DATA_EAR_EVENT or INSTRUCTION_EAR_EVENTS +depending on the type of EAR. +.TP +.B Method 2 +There is an EAR event in the list of events to monitor and \fBear_used\fR is set. In this +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita_iear\fR or +\fBpfp_ita_dear\fR structure because it contains more detailed information, such as privilege level and +instruction set. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count DATA_EAR_EVENT or +INSTRUCTION_EAR_EVENTS depending on the type of EAR. +.TP +.B Method 3 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is cleared. In this case +no EAR is programmed. +.TP +.B Method 4 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is set. In this case +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita_iear\fR or +\fBpfp_ita_dear\fR structure. This is the free running mode for the EAR. +.sp +.SH BRANCH TRACE BUFFER +The \fBpfp_ita_btb\fR of type \fBpfmlib_ita_btb_t\fR field is used to configure the Branch Trace Buffer (BTB). If the +\fBbtb_used\fR is set, then the library will take the configuration into account, otherwise any BTB configuration will be ignored. +The various fields in this structure provide means to filter out the kind of branches that gets recorded in the BTB. +Each one represents an element of the branch architecture of the Itanium processor. Refer to the Itanium specific +documentation for more details on the branch architecture. The fields are as follows: +.TP +.B btb_tar +If the value of this field is 1, then branches predicted by the Target Address Register (TAR) predictions are captured. If 0 no branch +predicted by the TAR is included. +.TP +.B btb_tac +If this field is 1, then branches predicted by the Target Address Cache (TAC) are captured. If 0 no branch predicted by the TAC +is included. +.TP +.B btb_bac +If this field is 1, then branches predicted by the Branch Address Corrector (BAC) are captured. If 0 no branch predicted by the BAC +is included. +.TP +.B btb_tm +If this field is 0, then no branch is captured. If this field is 1, then non taken branches are captured. If this field is 2, then +taken branches are captured. Finally if this field is 3 then all branches are captured. +.TP +.B btb_ptm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted target address are captured. If this field +is 2, then branches with correctly predicted target address are captured. Finally if this field is 3 then all branches are captured regardless of +target address prediction. +.TP +.B btb_ppm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted path (taken/non taken) are captured. If this field +is 2, then branches with correctly predicted path are captured. Finally if this field is 3 then all branches are captured regardless of +their path prediction. +.TP +.B btb_plm +This is the privilege level mask at which the BTB captures branches. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, +\fBPFM_PLM3\fR. If \fBbtb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +.sp +There are 4 methods to program the BTB and they are as follows: +.sp +.TP +.B Method 1 +The \fBBRANCH_EVENT\fR is in the list of events to monitor and \fBbtb_used\fR is cleared. In this case, +the BTB will be configured (PMC12) to record ALL branches. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to +count \fBBRANCH_EVENT\fR. +.TP +.B Method 2 +The \fBBRANCH_EVENT\fR is in the list of events to monitor and \fBbtb_used\fR is set. In this case, +the BTB will be configured (PMC12) using the information in the \fBpfp_ita_btb\fR structure. A counting monitor +(PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBBRANCH_EVENT\fR. +.TP +.B Method 3 +The \fBBRANCH_EVENT\fR is not in the list of events to monitor and \fBbtb_used\fR is set. In this case, +the BTB will be configured (PMC12) using the information in the \fBpfp_ita_btb\fR structure. This is the +free running mode for the BTB. +.TP +.B Method 4 +The \fBBRANCH_EVENT\fR is not in the list of events to monitor and \fBbtb_used\fR is cleared. In this case, +the BTB is not programmed. +.sp +.SH DATA AND CODE RANGE RESTRICTIONS +The \fBpfp_ita_drange\fR and \fBpfp_ita_irange\fR fields control the range restrictions for the data and +code respectively. The idea is that the application passes a set of ranges, each designated by a start +and end address. Upon return from the \fBpfm_dispatch_events()\fR function, the application gets back the set of +registers and their values that needs to be programmed via a kernel interface. + +Range restriction is implemented using the debug registers. There is a limited number of debug registers +and they go in pair. With 8 data debug registers, a maximum of 4 distinct ranges can be specified. The same +applies to code range restrictions. Moreover, there are some severe constraints on the alignment and size +of the range. Given that the size range is specified using a bitmask, there can be situations where the actual +range is larger than the requested range. The library will make the best effort to cover only what is requested. +It will never cover less than what is requested. The algorithm uses more than one pair of debug registers to +get a more precise range if necessary. Hence, up to the 4 pairs can be used to describe a single range. The library +returns the start and end offsets of the actual range compared to the requested range. + +If range restriction is to be used, the \fBrr_used\fR field must be set to one, otherwise settings will be ignored. +The ranges are described by the \fBpfmlib_ita_input_rr_t\fR structure. Up to 4 ranges can be defined. Each +range is described in by a entry in \fBrr_limits\fR. + +The \fBpfmlib_ita_input_rr_desc_t\fR structure is defined as follows: +.TP +.B rr_plm +The privilege level at which the range is active. It can be any combinations of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. +If \fBrr_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used.The privilege level is only relevant +for code ranges, data ranges ignores the setting. +.TP +.B rr_start +This is the start address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.TP +.B rr_end +This is the end address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.LP +.sp +The library will provide the values for the debug registers as well as some information +about the actual ranges in the output parameters and more precisely in the \fBpfmlib_ita_output_rr_t\fR +structure for each range. The structure is defined as follows: +.TP +.B rr_nbr_used +Contains the number of debug registers used to cover the range. This is necessarily an even number +as debug registers always go in pair. The value of this field is between 0 and 7. +.TP +.B rr_br +This table contains the list of debug registers necessary to cover the ranges. Each element is +of type \fBpfmlib_reg_t\fR. The \fBreg_num\fR field contains the debug register index while +\fBreg_value\fR contains the debug register value. Both the index and value must be copied +into the kernel specific argument to program the debug registers. The library never programs them. +.TP +.B rr_infos +Contains information about the ranges defined. Because of alignment restrictions, the actual range +covered by the debug registers may be larger than the requested range. This table describe the differences +between the requested and actual ranges expressed as offsets: +.TP +.B rr_soff +Contains the start offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the beginning of the range. Otherwise it represents the number +of byte by which the actual range precedes the requested range. +.TP +.B rr_eoff +Contains the end offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the end of the range. Otherwise it represents the number of +bytes by which the actual range exceeds the requested range. +.sp +.LP +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors when using the Itanium +specific input and output arguments. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_itanium2.3 b/src/libpfm-3.y/docs/man3/libpfm_itanium2.3 new file mode 100644 index 0000000..edb05ba --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_itanium2.3 @@ -0,0 +1,456 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +libpfm_itanium2 - support for Itanium2 specific PMU features +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.BI "int pfm_ita2_is_ear(unsigned int " i ");" +.BI "int pfm_ita2_is_dear(unsigned int " i ");" +.BI "int pfm_ita2_is_dear_tlb(unsigned int " i ");" +.BI "int pfm_ita2_is_dear_cache(unsigned int " i ");" +.BI "int pfm_ita2_is_dear_alat(unsigned int " i ");" +.BI "int pfm_ita2_is_iear(unsigned int " i ");" +.BI "int pfm_ita2_is_iear_tlb(unsigned int " i ");" +.BI "int pfm_ita2_is_iear_cache(unsigned int " i ");" +.BI "int pfm_ita2_is_btb(unsigned int " i ");" +.BI "int pfm_ita2_support_opcm(unsigned int " i ");" +.BI "int pfm_ita2_support_iarr(unsigned int " i ");" +.BI "int pfm_ita2_support_darr(unsigned int " i ");" +.BI "int pfm_ita2_get_event_maxincr(unsigned int "i ", unsigned int *"maxincr ");" +.BI "int pfm_ita2_get_event_umask(unsigned int "i ", unsigned long *"umask ");" +.BI "int pfm_ita2_get_event_group(unsigned int "i ", int *"grp ");" +.BI "int pfm_ita2_get_event_set(unsigned int "i ", int *"set ");" +.BI "int pfm_ita2_get_ear_mode(unsigned int "i ", pfmlib_ita2_ear_mode_t *"mode ");" +.BI "int pfm_ita2_irange_is_fine(pfmlib_output_param_t *"outp ", pfmlib_ita2_output_param_t *"mod_out ");" +.sp +.SH DESCRIPTION +The libpfm library provides full support for all the Itanium 2 specific features +of the PMU. The interface is defined in \fBpfmlib_itanium2.h\fR. It consists +of a set of functions and structures which describe and allow access to the +Itanium 2 specific PMU features. +.sp +The Itanium 2 specific functions presented here are mostly used to retrieve +the characteristics of an event. Given a opaque event descriptor, obtained +by the \fBpfm_find_event()\fR or its derivative functions, they return a boolean value +indicating whether this event support this feature or is of a particular +kind. +.sp +The \fBpfm_ita2_is_ear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a EAR event, i.e., an Event Address Register +type of events. Otherwise 0 is returned. For instance, \fBDATA_EAR_CACHE_LAT4\fR is an ear event, but +\fBCPU_CYCLES\fR is not. It can be a data or instruction EAR event. +.sp +The \fBpfm_ita2_is_dear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an Data EAR event. Otherwise 0 is returned. +It can be a cache or TLB EAR event. +.sp +The \fBpfm_ita2_is_dear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_ita2_is_dear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_ita2_is_dear_alat()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a ALAT EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_ita2_is_iear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR event. Otherwise 0 is returned. +It can be a cache or TLB instruction EAR event. +.sp +The \fBpfm_ita2_is_iear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_ita2_is_iear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_ita2_support_opcm()\fR function returns 1 if the event +designated by \fBi\fR supports opcode matching, i.e., can this event be measured accurately +when opcode matching via PMC8/PMC9 is active. Not all events supports this feature. +.sp +The \fBpfm_ita2_support_iarr()\fR function returns 1 if the event +designated by \fBi\fR supports code address range restrictions, i.e., can this event be measured accurately when +code range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_ita2_support_darr()\fR function returns 1 if the event +designated by \fBi\fR supports data address range restrictions, i.e., can this event be measured accurately when +data range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_ita2_get_event_maxincr()\fR function returns in \fBmaxincr\fR the maximum number of +occurrences per cycle for the event designated by \fBi\fR. Certain Itanium 2 events can occur more than +once per cycle. When an event occurs more than once per cycle, the PMD counter will be incremented accordingly. +It is possible to restrict measurement when event occur more than once per cycle. For instance, +\fBNOPS_RETIRED\fR can happen up to 6 times/cycle which means that the threshold can be adjusted between 0 and 5, +where 5 would mean that the PMD counter would be incremented by 1 only when the nop instruction is executed more +than 5 times/cycle. This function returns the maximum number of occurrences of the event per cycle, and +is the non-inclusive upper bound for the threshold to program in the PMC register. +.sp +The \fBpfm_ita2_get_event_umask()\fR function returns in \fBumask\fR the umask for the event +designated by \fBi\fR. +.sp +The \fBpfm_ita2_get_event_grp()\fR function returns in \fBgrp\fR the group to which the +event designated by \fBi\fR belongs. The notion of group is used for L1 and L2 cache events only. +For all other events, a group is irrelevant and can be ignored. If the event is an L2 +cache event then the value of \fBgrp\fR will be \fBPFMLIB_ITA2_EVT_L2_CACHE_GRP\fR. Similarly, +if the event is an L1 cache event, the value of \fBgrp\fR will be \fBPFMLIB_ITA2_EVT_L1_CACHE_GRP\fR. +In any other cases, the value of \fBgrp\fR will be \fBPFMLIB_ITA2_EVT_NO_GRP\fR. +.sp +The \fBpfm_ita2_get_event_set()\fR function returns in \fBset\fR the set to which the +event designated by \fBi\fR belongs. A set is a subdivision of a group and is therefore +only relevant for L1 and L2 cache events. An event can only belong to one group and +one set. This partitioning of the cache events is due to some hardware limitations which +impose some restrictions on events. For a given group, events from different sets +cannot be measured at the same time. If the event does not belong to a group +then the value of \fBset\fR is \fBPFMLIB_MONT_EVT_NO_SET\fR. +.sp +The \fBpfm_ita2_irange_is_fine()\fR function returns 1 if the configuration description passed +in \fBoutp\fR, the generic output parameters and \fBmod_out\fR, the Itanium2 specific output parameters, +use code range restriction in fine mode. Otherwise the function returns 0. This function can only be +called after a call to the \fBpfm_dispatch_events()\fR function returns successfully and had the data +structures pointed to by \fBoutp\fR and \fBmod_out\fR as output parameters. +.sp +The \fBpfm_ita2_get_event_ear_mode()\fR function returns in \fBmode\fR the EAR mode of the +event designated by \fBi\fR. If the event is not an EAR event, then \fBPFMLIB_ERR_INVAL\fR +is returned and mode is not updated. Otherwise mode can have the following values: +.TP +.B PFMLIB_ITA2_EAR_TLB_MODE +The event is an EAR TLB mode. It can be either data or instruction TLB EAR. +.TP +.B PFMLIB_ITA2_EAR_CACHE_MODE +The event is a cache EAR. It can be either data or instruction cache EAR. +.TP +.B PFMLIB_ITA2_EAR_ALAT_MODE +The event is an ALAT EAR. It can only be a data EAR event. +.sp +.LP +When the Itanium 2 specific features are needed to support a measurement their descriptions must be passed +as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The Itanium 2 specific +input arguments are described in the \fBpfmlib_ita2_input_param_t\fR structure and the output +parameters in \fBpfmlib_ita2_output_param_t\fR. They are defined as follows: +.sp +.nf +typedef enum { + PFMLIB_ITA2_ISM_BOTH=0, + PFMLIB_ITA2_ISM_IA32=1, + PFMLIB_ITA2_ISM_IA64=2 +} pfmlib_ita2_ism_t; + +typedef struct { + unsigned int flags; + unsigned int thres; + pfmlib_ita2_ism_t ism; +} pfmlib_ita2_counter_t; + +typedef struct { + unsigned char opcm_used; + unsigned long pmc_val; +} pfmlib_ita2_opcm_t; + +typedef struct { + unsigned char btb_used; + + unsigned char btb_ds; + unsigned char btb_tm; + unsigned char btb_ptm; + unsigned char btb_ppm; + unsigned char btb_brt; + unsigned int btb_plm; +} pfmlib_ita2_btb_t; + +typedef enum { + PFMLIB_ITA2_EAR_CACHE_MODE= 0, + PFMLIB_ITA2_EAR_TLB_MODE = 1, + PFMLIB_ITA2_EAR_ALAT_MODE = 2 +} pfmlib_ita2_ear_mode_t; + +typedef struct { + unsigned char ear_used; + + pfmlib_ita2_ear_mode_t ear_mode; + pfmlib_ita2_ism_t ear_ism; + unsigned int ear_plm; + unsigned long ear_umask; +} pfmlib_ita2_ear_t; + +typedef struct { + unsigned int rr_plm; + unsigned long rr_start; + unsigned long rr_end; +} pfmlib_ita2_input_rr_desc_t; + +typedef struct { + unsigned long rr_soff; + unsigned long rr_eoff; +} pfmlib_ita2_output_rr_desc_t; + + +typedef struct { + unsigned int rr_flags; + pfmlib_ita2_input_rr_desc_t rr_limits[4]; + unsigned char rr_used; +} pfmlib_ita2_input_rr_t; + +typedef struct { + unsigned int rr_nbr_used; + pfmlib_ita2_output_rr_desc_t rr_infos[4]; + pfmlib_reg_t rr_br[8]; +} pfmlib_ita2_output_rr_t; + +typedef struct { + pfmlib_ita2_counter_t pfp_ita2_counters[PMU_ITA2_NUM_COUNTERS]; + + unsigned long pfp_ita2_flags; + + pfmlib_ita2_opcm_t pfp_ita2_pmc8; + pfmlib_ita2_opcm_t pfp_ita2_pmc9; + pfmlib_ita2_ear_t pfp_ita2_iear; + pfmlib_ita2_ear_t pfp_ita2_dear; + pfmlib_ita2_btb_t pfp_ita2_btb; + pfmlib_ita2_input_rr_t pfp_ita2_drange; + pfmlib_ita2_input_rr_t pfp_ita2_irange; +} pfmlib_ita2_input_param_t; + +typedef struct { + pfmlib_ita2_output_rr_t pfp_ita2_drange; + pfmlib_ita2_output_rr_t pfp_ita2_irange; +} pfmlib_ita2_output_param_t; + +.fi +.sp +.SH PER-EVENT OPTIONS +.sp +The Itanium 2 processor provides two additional per-event features for +counters: thresholding and instruction set selection. They can be set using the +\fBpfp_ita2_counters\fR data structure for each event. The \fBism\fR +field can be initialized as follows: +.TP +.B PFMLIB_ITA2_ISM_BOTH +The event will be monitored during IA-64 and IA-32 execution +.TP +.B PFMLIB_ITA2_ISM_IA32 +The event will only be monitored during IA-32 execution +.TP +.B PFMLIB_ITA2_ISM_IA64 +The event will only be monitored during IA-64 execution +.sp +.LP +If \fBism\fR has a value of zero, it will default to PFMLIB_ITA2_ISM_BOTH. + +The \fBthres\fR indicates the threshold for the event. A threshold of \fBn\fR means +that the counter will be incremented by one only when the event occurs more than \fBn\fR +times per cycle. + +The \fBflags\fR field contains event-specific flags. The currently defined flags are: +.sp +.TP +PFMLIB_ITA2_FL_EVT_NO_QUALCHECK +When this flag is set it indicates that the library should ignore the qualifiers constraints +for this event. Qualifiers includes opcode matching, code and data range restrictions. When an +event is marked as not supporting a particular qualifier, it usually means that it is ignored, i.e., +the extra level of filtering is ignored. For instance, the CPU_CYCLES event does not support code +range restrictions and by default the library will refuse to program it if range restriction is also +requested. Using the flag will override the check and the call to the \fBpfm_dispatch_events()\fR function will succeed. +In this case, CPU_CYCLES will be measured for the entire program and not just for the code range requested. +For certain measurements this is perfectly acceptable as the range restriction will only be applied relevant +to events which support it. Make sure you understand which events do not support certain qualifiers before +using this flag. +.LP + +.SH OPCODE MATCHING +.sp +The \fBpfp_ita2_pmc8\fR and \fBpfp_ita2_pmc9\fR fields of type \fBpfmlib_ita2_opcm_t\fR contain +the description of what to do with the opcode matchers. Itanium 2 supports opcode matching via +PMC8 and PMC9. When this feature is used the \fBopcm_used\fR field must be set to 1, otherwise +it is ignored by the library. The \fBpmc_val\fR simply contains the raw value to store in +PMC8 or PMC9. The library may adjust the value to enable/disable some options depending on the set +of features being used. The final value for PMC8 and PMC9 will be stored in the \fBpfp_pmcs\fR +table of the generic output parameters. + +.SH EVENT ADDRESS REGISTERS +.sp +The \fBpfp_ita2_iear\fR field of type \fBpfmlib_ita2_ear_t\fR describes what to do with instruction +Event Address Registers (I-EARs). Again if this feature is used the \fBear_used\fR must be set to 1, +otherwise it will be ignored by the library. The \fBear_mode\fR must be set to either one of +\fBPFMLIB_ITA2_EAR_TLB_MODE\fR, \fBPFMLIB_ITA2_EAR_CACHE_MODE\fRto indicate the type of EAR to program. +The umask to store into PMC10 must be in \fBear_umask\fR. The privilege level mask at which the I-EAR will be +monitored must be set in \fBear_plm\fR which can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, +\fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBear_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +Finally the instruction set for which to monitor is in \fBear_ism\fR and can be any one of +\fBPFMLIB_ITA2_ISM_BOTH\fR, \fBPFMLIB_ITA2_ISM_IA32\fR, or \fBPFMLIB_ITA2_ISM_IA64\fR. +.sp +The \fBpfp_ita2_dear\fR field of type \fBpfmlib_ita2_ear_t\fR describes what to do with data Event Address +Registers (D-EARs). The description is identical to the I-EARs except that it applies to PMC11 and +that a \fBear_mode\fR of \fBPFMLIB_ITA2_EAR_ALAT_MODE\fR is possible. + +In general, there are four different methods to program the EAR (data or instruction): +.TP +.B Method 1 +There is an EAR event in the list of events to monitor and \fBear_used\fR is cleared. In this +case the EAR will be programmed (PMC10 or PMC11) based on the information encoded in the event. +A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBDATA_EAR_EVENT\fR or \fBL1I_EAR_EVENTS\fR +depending on the type of EAR. +.TP +.B Method 2 +There is an EAR event in the list of events to monitor and \fBear_used\fR is set. In this +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita2_iear\fR or +\fBpfp_ita2_dear\fR structure because it contains more detailed information, such as privilege level and +instruction set. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count DATA_EAR_EVENT or +L1I_EAR_EVENTS depending on the type of EAR. +.TP +.B Method 3 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is cleared. In this case +no EAR is programmed. +.TP +.B Method 4 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is set. In this case +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita2_iear\fR or +\fBpfp_ita2_dear\fR structure. This is the free running mode for the EAR. +.sp +.SH BRANCH TRACE BUFFER +The \fBpfp_ita2_btb\fR of type \fBpfmlib_ita2_btb_t\fR field is used to configure the Branch Trace Buffer (BTB). If the +\fBbtb_used\fR is set, then the library will take the configuration into account, otherwise any BTB configuration will be ignored. +The various fields in this structure provide means to filter out the kind of branches that gets recorded in the BTB. +Each one represents an element of the branch architecture of the Itanium 2 processor. Refer to the Itanium 2 specific +documentation for more details on the branch architecture. The fields are as follows: +.TP +.B btb_ds +If the value of this field is 1, then detailed information about the branch prediction are recorded in place of information about the target +address. If the value is 0, then information about the target address of the branch is recorded instead. +.TP +.B btb_tm +If this field is 0, then no branch is captured. If this field is 1, then non taken branches are captured. If this field is 2, then +taken branches are captured. Finally if this field is 3 then all branches are captured. +.TP +.B btb_ptm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted target address are captured. If this field +is 2, then branches with correctly predicted target address are captured. Finally if this field is 3 then all branches are captured regardless of +target address prediction. +.TP +.B btb_ppm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted path (taken/non taken) are captured. If this field +is 2, then branches with correctly predicted path are captured. Finally if this field is 3 then all branches are captured regardless of +their path prediction. +.TP +.B btb_brt +If this field is 0, then all branches are captured. If this field is 1, then only IP-relative branches are captured. If this field +is 2, then only return branches are captured. Finally if this field is 3 then only non-return indirect branches are captured. +.TP +.B btb_plm +This is the privilege level mask at which the BTB captures branches. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, +\fBPFM_PLM3\fR. If \fBbtb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +.sp +There are 4 methods to program the BTB and they are as follows: +.sp +.TP +.B Method 1 +The \fBBRANCH_EVENT\fR is in the list of event to monitor and \fBbtb_used\fR is cleared. In this case, +the BTB will be configured (PMC12) to record ALL branches. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to +count \fBBRANCH_EVENT\fR. +.TP +.B Method 2 +The \fBBRANCH_EVENT\fR is in the list of events to monitor and \fBbtb_used\fR is set. In this case, +the BTB will be configured (PMC12) using the information in the \fBpfp_ita2_btb\fR structure. A counting monitor +(PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBBRANCH_EVENT\fR. +.TP +.B Method 3 +The \fBBRANCH_EVENT\fR is not in the list of events to monitor and \fBbtb_used\fR is set. In this case, +the BTB will be configured (PMC12) using the information in the \fBpfp_ita2_btb\fR structure. This is the +free running mode for the BTB. +.TP +.B Method 4 +The \fBBRANCH_EVENT\fR is not in the list of events to monitor and \fBbtb_used\fR is cleared. In this case, +the BTB is not programmed. + +.SH DATA AND CODE RANGE RESTRICTIONS +The \fBpfp_ita2_drange\fR and \fBpfp_ita2_irange\fR fields control the range restrictions for the data and +code respectively. The idea is that the application passes a set of ranges, each designated by a start +and end address. Upon return from the \fBpfm_dispatch_events()\fR function, the application gets back the set of +registers and their values that needs to be programmed via a kernel interface. + +Range restriction is implemented using the debug registers. There is a limited number of debug registers and they go in pair. With +8 data debug registers, a maximum of 4 distinct ranges can be specified. The same applies to code range restrictions. Moreover, there +are some severe constraints on the alignment and size of the ranges. Given that the size of a range is specified using a bitmask, there can +be situations where the actual range is larger than the requested range. For code ranges, the Itanium 2 processor can use what is called a fine mode, +where a range is designated using two pairs of code debug registers. In this mode, the bitmask is not used, the start and end +addresses are directly specified. Not all code ranges qualify for fine mode, the size of the range must be 4KB or less and the range +cannot cross a 4KB page boundary. The library will make a best effort in choosing the right mode for each range. For code ranges, +it will try the fine mode first and will default to using the bitmask mode otherwise. Fine mode applies to all code debug +registers or none, i.e., you cannot have a range using fine mode and another using the bitmask. the Itanium 2 processor somehow limits the use +of multiple pairs to accurately cover a code range. This can only be done for \fBIA64_INST_RETIRED\fR and even then, you need several +events to collect the counts. For all other events, only one pair can be used, which leads to more inaccuracy due to +approximation. Data ranges can used multiple debug register pairs to gain more accuracy. The library will never cover less than what is +requested. The algorithm will use more than one pair of debug registers +whenever possible to get a more precise range. Hence, up to the 4 pairs can be used to describe a single range. + +If range restriction is to be used, the \fBrr_used\fR field must be set to one, otherwise settings will be ignored. +The ranges are described by the \fBpfmlib_ita2_input_rr_t\fR structure. Up to 4 ranges can be defined. Each +range is described in by a entry in \fBrr_limits\fR. Some flags for all ranges can be defined in \fBrr_flags\fR. +Currently defined flags are: +.sp +.TP +.B PFMLIB_ITA2_RR_INV +Inverse the code ranges. The qualifying events will be measurement when executing outside the specified +ranges. +.TP +.B PFMLIB_ITA2_RR_NO_FINE_MODE +Force non fine mode for all code ranges (mostly for debug) +.sp +.LP +The \fBpfmlib_ita2_input_rr_desc_t\fR structure is defined as follows: + +.TP +.B rr_plm +The privilege level at which the range is active. It can be any combinations of +\fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBbtb_plm\fR is 0 then the +default privilege level mask in \fBpfp_dfl_plm\fR is used. The privilege level is only relevant +for code ranges, data ranges ignores the setting. +.TP +.B rr_start +This is the start address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.TP +.B rr_end +This is the end address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.sp +.LP + +The library will provide the values for the debug registers as well as some information +about the actual ranges in the output parameters and more precisely in the \fBpfmlib_ita2_output_rr_t\fR +structure for each range. The structure is defined as follows: +.TP +.B rr_nbr_used +Contains the number of debug registers used to cover the range. This is necessarily an even number +as debug registers always go in pair. The value of this field is between 0 and 7. +.TP +.B rr_br +This table contains the list of debug registers necessary to cover the ranges. Each element is +of type \fBpfmlib_reg_t\fR. The \fBreg_num\fR field contains the debug register index while +\fBreg_value\fR contains the debug register value. Both the index and value must be copied +into the kernel specific argument to program the debug registers. The library never programs them. +.TP +.B rr_infos +Contains information about the ranges defined. Because of alignment restrictions, the actual range +covered by the debug registers may be larger than the requested range. This table describe the differences +between the requested and actual ranges expressed as offsets: +.TP +.B rr_soff +Contains the start offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the beginning of the range. Otherwise it represents the number +of byte by which the actual range precedes the requested range. +.TP +.B rr_eoff +Contains the end offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the end of the range. Otherwise it represents the number of +bytes by which the actual range exceeds the requested range. +.sp +.LP +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors when using the Itanium 2 +specific input and output arguments. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_montecito.3 b/src/libpfm-3.y/docs/man3/libpfm_montecito.3 new file mode 100644 index 0000000..d376ed7 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_montecito.3 @@ -0,0 +1,496 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +libpfm_montecito - support for Itanium 2 9000 (Montecito) processor specific PMU features +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.BI "int pfm_mont_is_ear(unsigned int " i ");" +.BI "int pfm_mont_is_dear(unsigned int " i ");" +.BI "int pfm_mont_is_dear_tlb(unsigned int " i ");" +.BI "int pfm_mont_is_dear_cache(unsigned int " i ");" +.BI "int pfm_mont_is_dear_alat(unsigned int " i ");" +.BI "int pfm_mont_is_iear(unsigned int " i ");" +.BI "int pfm_mont_is_iear_tlb(unsigned int " i ");" +.BI "int pfm_mont_is_iear_cache(unsigned int " i ");" +.BI "int pfm_mont_is_etb(unsigned int " i ");" +.BI "int pfm_mont_support_opcm(unsigned int " i ");" +.BI "int pfm_mont_support_iarr(unsigned int " i ");" +.BI "int pfm_mont_support_darr(unsigned int " i ");" +.BI "int pfm_mont_get_event_maxincr(unsigned int "i ", unsigned int *"maxincr ");" +.BI "int pfm_mont_get_event_umask(unsigned int "i ", unsigned long *"umask ");" +.BI "int pfm_mont_get_event_group(unsigned int "i ", int *"grp ");" +.BI "int pfm_mont_get_event_set(unsigned int "i ", int *"set ");" +.BI "int pfm_mont_get_event_type(unsigned int "i ", int *"type ");" +.BI "int pfm_mont_get_ear_mode(unsigned int "i ", pfmlib_mont_ear_mode_t *"mode ");" +.BI "int pfm_mont_irange_is_fine(pfmlib_output_param_t *"outp ", pfmlib_mont_output_param_t *"mod_out ");" +.sp +.SH DESCRIPTION +The libpfm library provides full support for all the Itanium 2 900 (Montecito) +processor specific features of the PMU. The interface is defined in \fBpfmlib_montecito.h\fR. +It consists of a set of functions and structures which describe and allow access to the +model specific PMU features. +.sp +The Itanium 2 900 (Montecito) processor specific functions presented here are mostly used to retrieve +the characteristics of an event. Given a opaque event descriptor, obtained +by the \fBpfm_find_event()\fR or its derivative functions, they return a boolean value +indicating whether this event support this feature or is of a particular +kind. +.sp +The \fBpfm_mont_is_ear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a EAR event, i.e., an Event Address Register +type of events. Otherwise 0 is returned. For instance, \fBDATA_EAR_CACHE_LAT4\fR is an ear event, but +\fBCPU_OP_CYCLES_ALL\fR is not. It can be a data or instruction EAR event. +.sp +The \fBpfm_mont_is_dear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an Data EAR event. Otherwise 0 is returned. +It can be a cache or TLB EAR event. +.sp +The \fBpfm_mont_is_dear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_mont_is_dear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a Data EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_mont_is_dear_alat()\fR function returns 1 if the event +designated by \fBi\fR corresponds to a ALAT EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_mont_is_iear()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR event. Otherwise 0 is returned. +It can be a cache or TLB instruction EAR event. +.sp +The \fBpfm_mont_is_iear_tlb()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR TLB event. Otherwise 0 is returned. +.sp +The \fBpfm_mont_is_iear_cache()\fR function returns 1 if the event +designated by \fBi\fR corresponds to an instruction EAR cache event. Otherwise 0 is returned. +.sp +The \fBpfm_mont_support_opcm()\fR function returns 1 if the event +designated by \fBi\fR supports opcode matching, i.e., can this event be measured accurately +when opcode matching via PMC32/PMC34 is active. Not all events supports this feature. +.sp +The \fBpfm_mont_support_iarr()\fR function returns 1 if the event +designated by \fBi\fR supports code address range restrictions, i.e., can this event be measured accurately when +code range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_mont_support_darr()\fR function returns 1 if the event +designated by \fBi\fR supports data address range restrictions, i.e., can this event be measured accurately when +data range restriction is active. Otherwise 0 is returned. Not all events supports this feature. +.sp +The \fBpfm_mont_get_event_maxincr()\fR function returns in \fBmaxincr\fR the maximum number of +occurrences per cycle for the event designated by \fBi\fR. Certain Itanium 2 9000 (Montecito) events +can occur more than once per cycle. When an event occurs more than once per cycle, the PMD counter will be incremented accordingly. +It is possible to restrict measurement when event occur more than once per cycle. For instance, +\fBNOPS_RETIRED\fR can happen up to 6 times/cycle which means that the threshold can be adjusted between 0 and 5, +where 5 would mean that the PMD counter would be incremented by 1 only when the nop instruction is executed more +than 5 times/cycle. This function returns the maximum number of occurrences of the event per cycle, and +is the non-inclusive upper bound for the threshold to program in the PMC register. +.sp +The \fBpfm_mont_get_event_umask()\fR function returns in \fBumask\fR the umask for the event +designated by \fBi\fR. +.sp +The \fBpfm_mont_get_event_grp()\fR function returns in \fBgrp\fR the group to which the +event designated by \fBi\fR belongs. The notion of group is used for L1D and L2D cache events only. +For all other events, a group is irrelevant and can be ignored. If the event is +an L2D cache event then the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_L2D_CACHE_GRP\fR. Similarly, +if the event is an L1D cache event, the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_L1D_CACHE_GRP\fR. +In any other cases, the value of \fBgrp\fR will be \fBPFMLIB_MONT_EVT_NO_GRP\fR. +.sp +The \fBpfm_mont_get_event_set()\fR function returns in \fBset\fR the set to which the +event designated by \fBi\fR belongs. A set is a subdivision of a group and is therefore +only relevant for L1 and L2 cache events. An event can only belong to one group and +one set. This partitioning of the cache events is due to some hardware limitations which +impose some restrictions on events. For a given group, events from different sets +cannot be measured at the same time. If the event does not belong to a group +then the value of \fBset\fR is \fBPFMLIB_MONT_EVT_NO_SET\fR. +.sp +The \fBpfm_mont_get_event_type()\fR function returns in \fBtype\fR the type of +the event designated by \fBi\fR belongs. The itanium2 9000 (Montecito) events +can have any one of the following types: +.sp +.TP +.B PFMLIB_MONT_EVT_ACTIVE +The event can only occur when the processor thread that generated it is +currently active +.TP +.B PFMLIB_MONT_EVT_FLOATING +The event can be generated when the processor thread is inactive +.TP +.B PFMLIB_MONT_EVT_CAUSAL +The event does not belong to a processor thread +.TP +.B PFMLIB_MONT_EVT_SELF_FLOATING +Hybrid event. It is floating if measured with .me. If is causal otherwise. +.LP +.sp +The \fBpfm_mont_irange_is_fine()\fR function returns 1 if the configuration description passed +in \fBoutp\fR, the generic output parameters and \fBmod_out\fR, the Itanium 2 +9000 (Montecito) specific output parameters, +use code range restriction in fine mode. Otherwise the function returns 0. This function can only be +called after a call to the \fBpfm_dispatch_events()\fR function returns successfully and had the data +structures pointed to by \fBoutp\fR and \fBmod_out\fR as output parameters. +.sp +The \fBpfm_mont_get_event_ear_mode()\fR function returns in \fBmode\fR the EAR mode of the +event designated by \fBi\fR. If the event is not an EAR event, then \fBPFMLIB_ERR_INVAL\fR +is returned and mode is not updated. Otherwise mode can have the following values: +.TP +.B PFMLIB_MONT_EAR_TLB_MODE +The event is an EAR TLB mode. It can be either data or instruction TLB EAR. +.TP +.B PFMLIB_MONT_EAR_CACHE_MODE +The event is a cache EAR. It can be either data or instruction cache EAR. +.TP +.B PFMLIB_MONT_EAR_ALAT_MODE +The event is an ALAT EAR. It can only be a data EAR event. +.sp +.LP +When the Itanium 2 9000 (Montecito) specific features are needed to support a measurement their +descriptions must be passed as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. +The Itanium 2 9000 (Montecito) specific input arguments are described in the \fBpfmlib_mont_input_param_t\fR +structure and the output parameters in \fBpfmlib_mont_output_param_t\fR. They are defined as follows: +.sp +.nf +typedef struct { + unsigned int flags; + unsigned int thres; +} pfmlib_mont_counter_t; + +typedef struct { + unsigned char opcm_used; + unsigned char opcm_m; + unsigned char opcm_i; + unsigned char opcm_f; + unsigned char opcm_b; + unsigned long opcm_match; + unsigned long opcm_mask; +} pfmlib_mont_opcm_t; + +typedef struct { + unsigned char etb_used; + unsigned int etb_plm; + unsigned char etb_ds; + unsigned char etb_tm; + unsigned char etb_ptm; + unsigned char etb_ppm; + unsigned char etb_brt; +} pfmlib_mont_etb_t; + +typedef struct { + unsigned char ipear_used; + unsigned int ipear_plm; + unsigned short ipear_delay; +} pfmlib_mont_ipear_t; + +typedef enum { + PFMLIB_MONT_EAR_CACHE_MODE= 0, + PFMLIB_MONT_EAR_TLB_MODE = 1, + PFMLIB_MONT_EAR_ALAT_MODE = 2 +} pfmlib_mont_ear_mode_t; + +typedef struct { + unsigned char ear_used; + pfmlib_mont_ear_mode_t ear_mode; + unsigned int ear_plm; + unsigned long ear_umask; +} pfmlib_mont_ear_t; + +typedef struct { + unsigned int rr_plm; + unsigned long rr_start; + unsigned long rr_end; +} pfmlib_mont_input_rr_desc_t; + +typedef struct { + unsigned long rr_soff; + unsigned long rr_eoff; +} pfmlib_mont_output_rr_desc_t; + + +typedef struct { + unsigned int rr_flags; + pfmlib_mont_input_rr_desc_t rr_limits[4]; + unsigned char rr_used; +} pfmlib_mont_input_rr_t; + +typedef struct { + unsigned int rr_nbr_used; + pfmlib_mont_output_rr_desc_t rr_infos[4]; + pfmlib_reg_t rr_br[8]; +} pfmlib_mont_output_rr_t; + +typedef struct { + pfmlib_mont_counter_t pfp_mont_counters[PMU_MONT_NUM_COUNTERS]; + + unsigned long pfp_mont_flags; + + pfmlib_mont_opcm_t pfp_mont_opcm1; + pfmlib_mont_opcm_t pfp_mont_opcm2; + pfmlib_mont_ear_t pfp_mont_iear; + pfmlib_mont_ear_t pfp_mont_dear; + pfmlib_mont_ipear_t pfp_mont_ipear; + pfmlib_mont_etb_t pfp_mont_etb; + pfmlib_mont_input_rr_t pfp_mont_drange; + pfmlib_mont_input_rr_t pfp_mont_irange; +} pfmlib_mont_input_param_t; + +typedef struct { + pfmlib_mont_output_rr_t pfp_mont_drange; + pfmlib_mont_output_rr_t pfp_mont_irange; +} pfmlib_mont_output_param_t; + +.fi +.sp +.SH PER-EVENT OPTIONS +.sp +The Itanium 2 9000 (Montecito) processor provides one per-event feature for +counters: thresholding. It can be set using the +\fBpfp_mont_counters\fR data structure for each event. +.sp +The \fBthres\fR indicates the threshold for the event. A threshold of \fBn\fR means +that the counter will be incremented by one only when the event occurs more than \fBn\fR +times per cycle. + +The \fBflags\fR field contains event-specific flags. The currently defined flags are: +.sp +.TP +PFMLIB_MONT_FL_EVT_NO_QUALCHECK +When this flag is set it indicates that the library should ignore the qualifiers constraints +for this event. Qualifiers includes opcode matching, code and data range restrictions. When an +event is marked as not supporting a particular qualifier, it usually means that it is ignored, i.e., +the extra level of filtering is ignored. For instance, the FE_BUBBLE_ALL event does not support code +range restrictions and by default the library will refuse to program it if range restriction is also +requested. Using the flag will override the check and the call to the \fBpfm_dispatch_events()\fR function will succeed. +In this case, FE_BUBBLE_ALL will be measured for the entire program and not just for the code range requested. +For certain measurements this is perfectly acceptable as the range restriction will only be applied relevant +to events which support it. Make sure you understand which events do not support certain qualifiers before +using this flag. +.LP + +.SH OPCODE MATCHING +.sp +The \fBpfp_mont_opcm1\fR and \fBpfp_mont_opcm2\fR fields of type \fBpfmlib_mont_opcm_t\fR contain +the description of what to do with the opcode matchers. The Itanium 2 9000 (Montecito) processor supports +opcode matching via PMC32 and PMC34. When this feature is used the \fBopcm_used\fR field must be set to 1, +otherwise it is ignored by the library. The Itanium 2 9000 (Montecito) processor implements two full +41-bit opcode matchers. As such, it is possible to match all instructions individually. +It is possible to match a single instruction or an instruction pattern based on +opcode or slot type. The slots are specified in: +.TP +.B opcm_m +Match when the instruction is in a M-slot (memory) +.TP +.B opcm_i +Match when the instruction is in an I-slot (ALU) +.TP +.B opcm_f +Match when the instruction is in an F-slot (FPU) +.TP +.B opcm_b +Match when the instruction is in a B-slot (Branch) +.sp +.LP +Any combinations of slot settings is supported. To match all slot types, simply +set all fields to 1. +.sp +The 41-bit opcode is specified in \fBopcm_match\fR and a 41-bit mask is passed in +\fBopcm_mask\fR. When a bit is set in \fBopcm_mask\fR the corresponding bit is +ignored in \fBopcm_match\fR. + +.SH EVENT ADDRESS REGISTERS +.sp +The \fBpfp_mont_iear\fR field of type \fBpfmlib_mont_ear_t\fR describes what to do with instruction +Event Address Registers (I-EARs). Again if this feature is used the \fBear_used\fR must be set to 1, +otherwise it will be ignored by the library. The \fBear_mode\fR must be set to either one of +\fBPFMLIB_MONT_EAR_TLB_MODE\fR, \fBPFMLIB_MONT_EAR_CACHE_MODE\fRto indicate the type of EAR to program. +The umask to store into PMC10 must be in \fBear_umask\fR. The privilege level mask at which the I-EAR will be +monitored must be set in \fBear_plm\fR which can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, +\fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBear_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +.sp +The \fBpfp_mont_dear\fR field of type \fBpfmlib_mont_ear_t\fR describes what to do with data Event Address +Registers (D-EARs). The description is identical to the I-EARs except that it applies to PMC11 and +that a \fBear_mode\fR of \fBPFMLIB_MONT_EAR_ALAT_MODE\fR is possible. + +In general, there are four different methods to program the EAR (data or instruction): +.TP +.B Method 1 +There is an EAR event in the list of events to monitor and \fBear_used\fR is cleared. In this +case the EAR will be programmed (PMC10 or PMC11) based on the information encoded in the event. +A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBDATA_EAR_EVENT\fR or \fBL1I_EAR_EVENTS\fR +depending on the type of EAR. +.TP +.B Method 2 +There is an EAR event in the list of events to monitor and \fBear_used\fR is set. In this +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_ita_iear\fR or +\fBpfp_ita_dear\fR structure because it contains more detailed information, such as privilege level and +instruction set. A counting monitor (PMC4/PMD4-PMC7/PMD7) will be programmed to count DATA_EAR_EVENT or +L1I_EAR_EVENTS depending on the type of EAR. +.TP +.B Method 3 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is cleared. In this case +no EAR is programmed. +.TP +.B Method 4 +There is no EAR event in the list of events to monitor and and \fBear_used\fR is set. In this case +case the EAR will be programmed (PMC10 or PMC11) using the information in the \fBpfp_mont_iear\fR or +\fBpfp_mont_dear\fR structure. This is the free running mode for the EAR. +.sp +.SH EXECUTION TRACE BUFFER +The \fBpfp_mont_etb\fR of type \fBpfmlib_mont_etb_t\fR field is used to configure the Execution Trace Buffer (ETB). If the +\fBetb_used\fR is set, then the library will take the configuration into account, otherwise any ETB configuration will be ignored. +The various fields in this structure provide means to filter out the kind of changes in the control flow (branches, traps, rfi, ...) +that get recorded in the ETB. Each one represents an element of the branch architecture of the Itanium 2 9000 (Montecito) processor. +Refer to the Itanium 2 9000 (Montecito) specific documentation for more details on the branch architecture. The fields are as follows: +.TP +.B etb_tm +If this field is 0, then no branch is captured. If this field is 1, then non taken branches are captured. If this field is 2, then +taken branches are captured. Finally if this field is 3 then all branches are captured. +.TP +.B etb_ptm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted target address are captured. If this field +is 2, then branches with correctly predicted target address are captured. Finally if this field is 3 then all branches are captured regardless of +target address prediction. +.TP +.B etb_ppm +If this field is 0, then no branch is captured. If this field is 1, then branches with a mispredicted path (taken/non taken) are captured. If this field +is 2, then branches with correctly predicted path are captured. Finally if this field is 3 then all branches are captured regardless of +their path prediction. +.TP +.B etb_brt +If this field is 0, then no branch is captured. If this field is 1, then only IP-relative branches are captured. If this field +is 2, then only return branches are captured. Finally if this field is 3 then only non-return indirect branches are captured. +.TP +.B etb_plm +This is the privilege level mask at which the ETB captures branches. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, +\fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +.sp +There are 4 methods to program the ETB and they are as follows: +.sp +.TP +.B Method 1 +The \fBETB_EVENT\fR is in the list of event to monitor and \fBetb_used\fR is cleared. In this case, +the ETB will be configured (PMC39) to record ALL branches. A counting monitor will be programmed to +count \fBETB_EVENT\fR. +.TP +.B Method 2 +The \fBETB_EVENT\fR is in the list of events to monitor and \fBetb_used\fR is set. In this case, +the BTB will be configured (PMC39) using the information in the \fBpfp_mont_etb\fR structure. A counting monitor +(PMC4/PMD4-PMC7/PMD7) will be programmed to count \fBBRANCH_EVENT\fR. +.TP +.B Method 3 +The \fBETB_EVENT\fR is not in the list of events to monitor and \fBetb_used\fR is set. In this case, +the ETB will be configured (PMC39) using the information in the \fBpfp_mont_etb\fR structure. This is the +free running mode for the ETB. +.TP +.B Method 4 +The \fBETB_EVENT\fR is not in the list of events to monitor and \fBetb_used\fR is cleared. In this case, +the ETB is not programmed. + +.SH DATA AND CODE RANGE RESTRICTIONS +The \fBpfp_mont_drange\fR and \fBpfp_mont_irange\fR fields control the range restrictions for the data and +code respectively. The idea is that the application passes a set of ranges, each designated by a start +and end address. Upon return from the \fBpfm_dispatch_events()\fR function, the application gets back the set of +registers and their values that needs to be programmed via a kernel interface. + +Range restriction is implemented using the debug registers. There is a limited number of debug registers and they go in pair. With +8 data debug registers, a maximum of 4 distinct ranges can be specified. The same applies to code range restrictions. Moreover, there +are some severe constraints on the alignment and size of the ranges. Given that the size of a range is specified using a bitmask, there can +be situations where the actual range is larger than the requested range. For code ranges, Itanium 2 9000 (Montecito) processor can use what is called a fine mode, +where a range is designated using two pairs of code debug registers. In this mode, the bitmask is not used, the start and end +addresses are directly specified. Not all code ranges qualify for fine mode, the size of the range must be 64KB or less and the range +cannot cross a 64KB page boundary. The library will make a best effort in choosing the right mode for each range. For code ranges, +it will try the fine mode first and will default to using the bitmask mode otherwise. Fine mode applies to all code debug +registers or none, i.e., you cannot have a range using fine mode and another using the bitmask. The Itanium 2 9000 (Montecito) processor +somehow limits the use of multiple pairs to accurately cover a code range. This can only be done for \fBIA64_INST_RETIRED\fR and even then, you need several +events to collect the counts. For all other events, only one pair can be used, which leads to more inaccuracy due to +approximation. Data ranges can used multiple debug register pairs to gain more accuracy. The library will never cover less than what is requested. +The algorithm will use more than one pair of debug registers whenever possible to get a more precise range. Hence, up to the 4 pairs can be used to describe a single range. + +If range restriction is to be used, the \fBrr_used\fR field must be set to one, otherwise settings will be ignored. +The ranges are described by the \fBpfmlib_mont_input_rr_t\fR structure. Up to 4 ranges can be defined. Each +range is described in by a entry in \fBrr_limits\fR. Some flags for all ranges can be defined in \fBrr_flags\fR. +Currently defined flags are: +.sp +.TP +.B PFMLIB_MONT_RR_INV +Inverse the code ranges. The qualifying events will be measurement when executing outside the specified +ranges. +.TP +.B PFMLIB_MONT_RR_NO_FINE_MODE +Force non fine mode for all code ranges (mostly for debug) +.sp +.LP +The \fBpfmlib_mont_input_rr_desc_t\fR structure is defined as follows: + +.TP +.B rr_plm +The privilege level at which the range is active. It can be any combinations of +\fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, \fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the +default privilege level mask in \fBpfp_dfl_plm\fR is used. The privilege level is only relevant +for code ranges, data ranges ignores the setting. +.TP +.B rr_start +This is the start address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.TP +.B rr_end +This is the end address of the range. Any address is supported but for code range it +must be bundle aligned, i.e., 16-byte aligned. +.sp +.LP + +The library will provide the values for the debug registers as well as some information +about the actual ranges in the output parameters and more precisely in the \fBpfmlib_mont_output_rr_t\fR +structure for each range. The structure is defined as follows: +.TP +.B rr_nbr_used +Contains the number of debug registers used to cover the range. This is necessarily an even number +as debug registers always go in pair. The value of this field is between 0 and 7. +.TP +.B rr_br +This table contains the list of debug registers necessary to cover the ranges. Each element is +of type \fBpfmlib_reg_t\fR. The \fBreg_num\fR field contains the debug register index while +\fBreg_value\fR contains the debug register value. Both the index and value must be copied +into the kernel specific argument to program the debug registers. The library never programs them. +.TP +.B rr_infos +Contains information about the ranges defined. Because of alignment restrictions, the actual range +covered by the debug registers may be larger than the requested range. This table describe the differences +between the requested and actual ranges expressed as offsets: +.TP +.B rr_soff +Contains the start offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the beginning of the range. Otherwise it represents the number +of byte by which the actual range precedes the requested range. +.TP +.B rr_eoff +Contains the end offset of the actual range described by the debug registers. If zero, it means +the library was able to match exactly the end of the range. Otherwise it represents the number of +bytes by which the actual range exceeds the requested range. +.sp +.LP +.SH IP EVENT CAPTURE (IP-EAR) +The Execution Trace Buffer (ETB) can be configured to record the addresses of +consecutive retiring instructions. In this case the ETB contains IP addresses +and not branches related information. This feature cannot be used in conjunction +with regular branch captures as described above. To active this feature the +\fBipear_used\fR field of the \fBpfmlib_mont_ipear_t\fR must be set to 1. +The other fields in this structure are used as follows: +.sp +.TP +.B ipear_plm +The privilege level of the instructions to capture. It can be any combination of \fBPFM_PLM0\fR, \fBPFM_PLM1\fR, \fBPFM_PLM2\fR, +\fBPFM_PLM3\fR. If \fBetb_plm\fR is 0 then the default privilege level mask in \fBpfp_dfl_plm\fR is used. +.TP +.B ipear_delay +The number of cycles by which to delay the freeze of the ETB after a PMU interrupt (which freeze the rest of counters). +.LP +.sp +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors when using +the Itanium 2 9000 (Montecito) specific input and output arguments. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_nehalem.3 b/src/libpfm-3.y/docs/man3/libpfm_nehalem.3 new file mode 100644 index 0000000..59a3ce6 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_nehalem.3 @@ -0,0 +1,146 @@ +.TH LIBPFM 3 "January, 2009" "" "Linux Programmer's Manual" +.SH NAME +libpfm_nehalem - support for Intel Nehalem processor family +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the Intel Nehalem processor family, such as +Intel Core i7. The interface is defined in \fBpfmlib_intel_nhm.h\fR. It consists of a set +of functions and structures describing the Intel Nehalem processor specific PMU features. +The Intel Nehalem processor is a quad core, dual thread processor. It includes two types +of PMU: core and uncore. The latter measures events at the socket level and is therefore +disconnected from any of the four cores. The core PMU implements Intel architectural +perfmon version 3 with four generic counters and three fixed counters. The uncore has +eight generic counters and one fixed counter. Each Intel Nehalem core also implement +a 16-deep branch trace buffer, called Last Branch Record (LBR), which can be used in +combination with the core PMU. Intel Nehalem implements a newer version of the +Precise Event-Based Sampling (PEBS) mechanism which has the ability to capture +where cache misses occur. + +.sp +When Intel Nehalem processor specific features are needed to support a measurement, their +descriptions must be passed as model-specific input arguments to the +\fBpfm_dispatch_events()\fR function. The Intel Nehalem processors specific input +arguments are described in the \fBpfmlib_nhm_input_param_t\fR structure. No +output parameters are currently defined. The input parameters are defined as follows: +.sp +.nf +typedef struct { + unsigned long cnt_mask; + unsigned int flags; +} pfmlib_nhm_counter_t; + +typedef struct { + unsigned int lbr_used; + unsigned int lbr_plm; + unsigned int lbr_filter; +} pfmlib_nhm_lbr_t; + +typedef struct { + unsigned int pebs_used; + unsigned int ld_lat_thres; +} pfmlib_nhm_pebs_t; + +typedef struct { + pfmlib_nhm_counter_t pfp_nhm_counters[PMU_NHM_NUM_COUNTERS]; + pfmlib_nhm_pebs_t pfp_nhm_pebs; + pfmlib_nhm_lbr_t pfm_nhm_lbr; + uint64_t reserved[4]; +} pfmlib_nhm_input_param_t; +.fi +.sp +.sp +The Intel Nehalem processor provides a few additional per-event features for +counters: thresholding, inversion, edge detection, monitoring of both +threads, occupancy. They can be set using the \fBpfp_nhm_counters\fR data +structure for each event. The \fBflags\fR field can be initialized with +the following values, depending on the event: +.TP +.B PFMLIB_NHM_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set. This +flag is supported for core and uncore PMU events. +.TP +.B PFMLIB_NHM_SEL_EDGE +Enables edge detection of events. This +flag is supported for core and uncore PMU events. +.TP +.B PFMLIB_NHM_SEL_ANYTHR +Enable measuring the event in any of the two processor threads assuming hyper-threading +is enabled. By default, only the current thread is measured. This flag is restricted +to core PMU events. +.TP +.B PFMLIB_NHM_SEL_OCC_RST +When set, the queue occupancy counter associated with the event is cleared. This flag +is only available to uncore PMU events. +.LP +The \fBcnt_mask\fR field is used to set the event threshold. +The value of the counter is incremented for each cycle in which the +number of occurrences of the event is greater or equal to the value of +the field. Thus, the event is modified to actually measure the number +of qualifying cycles. When zero all occurrences are counted (this is the default). +This flag is supported for core and uncore PMU events. +.sp +.SH Support for Precise-Event Based Sampling (PEBS) +The library can be used to setup the PMC registers associated with PEBS. In this case, +the \fBpfp_nhm_pebs_t\fR structure must be used and the \fBpebs_used\fR field must +be set to 1. +.sp +To enable the PEBS load latency filtering capability, it is necessary to program the +\fBMEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD\fR event into one generic counter. The +latency threshold must be passed to the library in the \fBld_lat_thres\fR field. +It is expressed in core cycles and \fBmust\fR greater than 3. Note that \fBpebs_used\fR +must be set as well. + +.SH Support for Last Branch Record (LBR) +The library can be used to setup LBR registers. On Intel Nehalem processors, the +LBR is 16-entry deep and it is possible to filter branches, based on privilege level +or type. To configure the LBR, the \fBpfm_nhm_lbr_t\fR structure must be used. +.sp +Like core PMU counters, LBR only distinguishes two privilege levels, 0 and the rest (1,2,3). +When running Linux natively, the kernel is at privilege level 0, applications at level 3. +It is possible to specify the privilege level of LBR using the \fBlbr_plm\fR. Any attempt +to pass \fBPFM_PLM1\fB or \fBPFM_PLM2\fR will be rejected. If \fB\lbr_plm\fR is 0, then the global +value in \fBpfmlib_input_param_t\fR and the \fBpfp_dfl_plm\fR is used. +.sp +By default, LBR captures all branches. It is possible to filter out branches by passing +a set of flags in \fBlbr_select\fR. The flags are as follows: +.TP +.B PFMLIB_NHM_LBR_JCC +When set, LBR does not capture conditional branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_REL_CALL +When set, LBR does not capture near calls. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_IND_CALL +When set, LBR does not capture indirect calls. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_RET +When set, LBR does not capture return branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_IND_JMP +When set, LBR does not capture indirect branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_REL_JMP +When set, LBR does not capture relative branches. Default: off. +.TP +.B PFM_NHM_LBR_FAR_BRANCH +When set, LBR does not capture far branches. Default: off. + +.SH Support for uncore PMU + +By nature, the uncore PMU does not distinguish privilege levels, therefore +it captures events at all privilege levels. To avoid any misinterpretation, +the library enforces that uncore events be measured with both \fBPFM_PLM0\fR +and \fBPFM_PLM3\fR set. + +Tools and operating system kernel interfaces may impose further restrictions +on how the uncore PMU can be accessed. + +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_p6.3 b/src/libpfm-3.y/docs/man3/libpfm_p6.3 new file mode 100644 index 0000000..3a562b3 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_p6.3 @@ -0,0 +1,65 @@ +.TH LIBPFM 3 "September, 2005" "" "Linux Programmer's Manual" +.SH NAME +libpfm_i386_p6 - support for Intel P6 processor family +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the P6 processor family, including +the Pentium M processor. The interface is defined in \fBpfmlib_i386_p6.h\fR. It consists +of a set of functions and structures which describe and allow access to the +P6 processors specific PMU features. +.sp +When P6 processor specific features are needed to support a measurement, their descriptions must be passed +as model-specific input arguments to the \fBpfm_dispatch_events()\fR function. The P6 processors specific +input arguments are described in the \fBpfmlib_i386_p6_input_param_t\fR structure and the output +parameters in \fBpfmlib_i386_p6_output_param_t\fR. They are defined as follows: +.sp +.nf +typedef struct { + unsigned int cnt_mask; + unsigned int flags; +} pfmlib_i386_p6_counter_t; + +typedef struct { + pfmlib_i386_p6_counter_t pfp_i386_p6_counters[PMU_I386_P6_NUM_COUNTERS]; + uint64_t reserved[4]; +} pfmlib_i386_p6_input_param_t; + +typedef struct { + uint64_t reserved[8]; +} pfmlib_i386_p6_output_param_t; +.fi +.sp +.sp +The P6 processor provides a few additional per-event features for +counters: thresholding, inversion, edge detection. They can be set using the +\fBpfp_i386_p6_counters\fR data structure for each event. The \fBflags\fR +field can be initialized as follows: +.TP +.B PFMLIB_I386_P6_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set +.TP +.B PFMLIB_I386_P6_SEL_EDGE +Enables edge detection of events. +.LP +The \fBcnt_mask\fR field contains is used to set the event threshold. +The value of the counter is incremented each time the number of occurrences +per cycle of the event is greater or equal to the value of the field. When +zero all occurrences are counted. +.sp +.SH Handling of Pentium M +The library provides full support for the Pentium M PMU. A Pentium implements +more events than a generic P6 processor. The library autodetects the host +processor and can distinguish generic P6 processor from a Pentium. Thus no +special call is needed. +.sp +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_powerpc.3 b/src/libpfm-3.y/docs/man3/libpfm_powerpc.3 new file mode 100644 index 0000000..a5abb0c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_powerpc.3 @@ -0,0 +1,38 @@ +.TH LIBPFM 3 "October, 2007" "" "Linux Programmer's Manual" +.SH NAME +libpfm_powerpc - support for IBM PowerPC and POWER processor families +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides support for the IBM PowerPC and POWER processor +families. Specifically, it currently provides support for the following +processors: PPC970(FX,GX), PPC970MP POWER4, POWER4+, POWER5, POWER5+, and POWER6. +.sp +.SH MODEL-SPECIFIC PARAMETERS +At present, the model_in and model_out model-specific input and output +parameters are not used by \fBpfm_dispatch_events()\fR function. For future +compatibility, NULLs must be passed for these arguments. +.sp +.SH COMBINING EVENTS IN A SET +As with many architecture's PMU hardware design, events can not be combined +together arbitrarily in the same event set, even if there are a sufficient +number of counters available. This implementation for IBM PowerPC/POWER +bases the event compatibility on a set of previously-defined compatible +event groups. If the events placed in an event set are all members of one +of the predefined event groups, a call to the \fBpfm_dispatch_events()\fR function will be +successful. With the current interface, there is no way to discover +apriori which events are compatible, so application software that wishes to +combine events must do so by trial and error, possibly using multiplexed +event sets to count events that cannot otherwise be combined in the same +set. +.sp +.SH ERRORS +Refer to the description of the \fBpfm_dispatch_events()\fR function for errors. +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Corey Ashford +.PP diff --git a/src/libpfm-3.y/docs/man3/libpfm_westmere.3 b/src/libpfm-3.y/docs/man3/libpfm_westmere.3 new file mode 100644 index 0000000..59a3ce6 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/libpfm_westmere.3 @@ -0,0 +1,146 @@ +.TH LIBPFM 3 "January, 2009" "" "Linux Programmer's Manual" +.SH NAME +libpfm_nehalem - support for Intel Nehalem processor family +.SH SYNOPSIS +.nf +.B #include +.B #include +.sp +.SH DESCRIPTION +The libpfm library provides full support for the Intel Nehalem processor family, such as +Intel Core i7. The interface is defined in \fBpfmlib_intel_nhm.h\fR. It consists of a set +of functions and structures describing the Intel Nehalem processor specific PMU features. +The Intel Nehalem processor is a quad core, dual thread processor. It includes two types +of PMU: core and uncore. The latter measures events at the socket level and is therefore +disconnected from any of the four cores. The core PMU implements Intel architectural +perfmon version 3 with four generic counters and three fixed counters. The uncore has +eight generic counters and one fixed counter. Each Intel Nehalem core also implement +a 16-deep branch trace buffer, called Last Branch Record (LBR), which can be used in +combination with the core PMU. Intel Nehalem implements a newer version of the +Precise Event-Based Sampling (PEBS) mechanism which has the ability to capture +where cache misses occur. + +.sp +When Intel Nehalem processor specific features are needed to support a measurement, their +descriptions must be passed as model-specific input arguments to the +\fBpfm_dispatch_events()\fR function. The Intel Nehalem processors specific input +arguments are described in the \fBpfmlib_nhm_input_param_t\fR structure. No +output parameters are currently defined. The input parameters are defined as follows: +.sp +.nf +typedef struct { + unsigned long cnt_mask; + unsigned int flags; +} pfmlib_nhm_counter_t; + +typedef struct { + unsigned int lbr_used; + unsigned int lbr_plm; + unsigned int lbr_filter; +} pfmlib_nhm_lbr_t; + +typedef struct { + unsigned int pebs_used; + unsigned int ld_lat_thres; +} pfmlib_nhm_pebs_t; + +typedef struct { + pfmlib_nhm_counter_t pfp_nhm_counters[PMU_NHM_NUM_COUNTERS]; + pfmlib_nhm_pebs_t pfp_nhm_pebs; + pfmlib_nhm_lbr_t pfm_nhm_lbr; + uint64_t reserved[4]; +} pfmlib_nhm_input_param_t; +.fi +.sp +.sp +The Intel Nehalem processor provides a few additional per-event features for +counters: thresholding, inversion, edge detection, monitoring of both +threads, occupancy. They can be set using the \fBpfp_nhm_counters\fR data +structure for each event. The \fBflags\fR field can be initialized with +the following values, depending on the event: +.TP +.B PFMLIB_NHM_SEL_INV +Inverse the results of the \fBcnt_mask\fR comparison when set. This +flag is supported for core and uncore PMU events. +.TP +.B PFMLIB_NHM_SEL_EDGE +Enables edge detection of events. This +flag is supported for core and uncore PMU events. +.TP +.B PFMLIB_NHM_SEL_ANYTHR +Enable measuring the event in any of the two processor threads assuming hyper-threading +is enabled. By default, only the current thread is measured. This flag is restricted +to core PMU events. +.TP +.B PFMLIB_NHM_SEL_OCC_RST +When set, the queue occupancy counter associated with the event is cleared. This flag +is only available to uncore PMU events. +.LP +The \fBcnt_mask\fR field is used to set the event threshold. +The value of the counter is incremented for each cycle in which the +number of occurrences of the event is greater or equal to the value of +the field. Thus, the event is modified to actually measure the number +of qualifying cycles. When zero all occurrences are counted (this is the default). +This flag is supported for core and uncore PMU events. +.sp +.SH Support for Precise-Event Based Sampling (PEBS) +The library can be used to setup the PMC registers associated with PEBS. In this case, +the \fBpfp_nhm_pebs_t\fR structure must be used and the \fBpebs_used\fR field must +be set to 1. +.sp +To enable the PEBS load latency filtering capability, it is necessary to program the +\fBMEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD\fR event into one generic counter. The +latency threshold must be passed to the library in the \fBld_lat_thres\fR field. +It is expressed in core cycles and \fBmust\fR greater than 3. Note that \fBpebs_used\fR +must be set as well. + +.SH Support for Last Branch Record (LBR) +The library can be used to setup LBR registers. On Intel Nehalem processors, the +LBR is 16-entry deep and it is possible to filter branches, based on privilege level +or type. To configure the LBR, the \fBpfm_nhm_lbr_t\fR structure must be used. +.sp +Like core PMU counters, LBR only distinguishes two privilege levels, 0 and the rest (1,2,3). +When running Linux natively, the kernel is at privilege level 0, applications at level 3. +It is possible to specify the privilege level of LBR using the \fBlbr_plm\fR. Any attempt +to pass \fBPFM_PLM1\fB or \fBPFM_PLM2\fR will be rejected. If \fB\lbr_plm\fR is 0, then the global +value in \fBpfmlib_input_param_t\fR and the \fBpfp_dfl_plm\fR is used. +.sp +By default, LBR captures all branches. It is possible to filter out branches by passing +a set of flags in \fBlbr_select\fR. The flags are as follows: +.TP +.B PFMLIB_NHM_LBR_JCC +When set, LBR does not capture conditional branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_REL_CALL +When set, LBR does not capture near calls. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_IND_CALL +When set, LBR does not capture indirect calls. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_RET +When set, LBR does not capture return branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_IND_JMP +When set, LBR does not capture indirect branches. Default: off. +.TP +.B PFM_NHM_LBR_NEAR_REL_JMP +When set, LBR does not capture relative branches. Default: off. +.TP +.B PFM_NHM_LBR_FAR_BRANCH +When set, LBR does not capture far branches. Default: off. + +.SH Support for uncore PMU + +By nature, the uncore PMU does not distinguish privilege levels, therefore +it captures events at all privilege levels. To avoid any misinterpretation, +the library enforces that uncore events be measured with both \fBPFM_PLM0\fR +and \fBPFM_PLM3\fR set. + +Tools and operating system kernel interfaces may impose further restrictions +on how the uncore PMU can be accessed. + +.SH SEE ALSO +pfm_dispatch_events(3) and set of examples shipped with the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_dispatch_events.3 b/src/libpfm-3.y/docs/man3/pfm_dispatch_events.3 new file mode 100644 index 0000000..0717509 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_dispatch_events.3 @@ -0,0 +1,296 @@ +.TH LIBPFM 3 "July , 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_dispatch_events \- determine PMC registers values for a set of events to measure +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_dispatch_events(pfmlib_input_param_t *"p ", void *" mod_in ", pfmlib_output_param_t *" q, "void *" mod_out ");" +.sp +.SH DESCRIPTION +This function is the central piece of the library. It is important to understand +that the library does not effectively program the PMU, i.e., it does not make +the operating system calls. The PMU is never actually accessed by the +library. Instead, the library helps applications prepare the arguments to pass to +the kernel. In particular, it sets up the values to program into the PMU +configuration registers (PMC). The list of used data registers (PMD) is also +returned. +.sp +The input argument are divided into two categories: the generic arguments in \fBp\fR +and the optional PMU model specific arguments in \fBmod_in\fR. +The same applies for the output arguments: \fBq\fR contains the generic +output arguments and \fBmod_out\fR the optional PMU model specific arguments. +.sp +An application describes what it wants to measure in the \fBin\fR and if it uses some model +specific features, such as opcode matching on Itanium 2 processors, it must pass a pointer to the +relevant model-specific input parameters in \fBmod_in\fR. The generic output parameters +contains the register index and values for the PMC and PMD registers needed to +make the measurement. The index mapping is guaranteed to match the mapping used +by the Linux perfmon2 interface. In case the library is not used on this system, +the hardware register addresses or indexes can also be retrieved from the output +structure. +.sp +The \fBpfmlib_input_param_t\fR structure is defined as follows: +.sp +.nf +typedef struct + int event; + unsigned int plm; + unsigned long flags; + unsigned int unit_masks[PFMLIB_MAX_MASKS_PER_EVENT]; + unsigned int num_masks; +} pfmlib_event_t; + +typedef struct { + unsigned int pfp_event_count; + unsigned int pfp_dfl_plm; + unsigned int pfp_flags; + pfmlib_event_t pfp_events[PFMLIB_MAX_PMCS]; + pfmlib_regmask_t pfp_unavail_pmcs; +} pfmlib_input_param_t; +.fi +.sp +The structure mostly contains one table, called \fBpfp_events\fR which describes +the events to be measured. The number of submitted events is indicated by +\fBpfp_event_count\fR. + +Each event is described in the \fBpfp_events\fR table by an opaque descriptor stored in +the \fBevent\fR field. This descriptor is obtained with the \fBpfm_find_full_event()\fR +or derivative functions. For some events, it may be necessary to specify at least one +unit mask in the \fBunit_masks\fR table. A unit mask is yet another opaque descriptor +obtained via the \fBpfm_find_event_mask()\fR or \fBpfm_find_full_event()\fR functions. Typically, if +an event supports multiple unit masks, they can be combined in which case more than one +entry in \fBunit_masks\fR must be specified. The actual number of unit mask descriptors +passed must be indicated in \fBnum_masks\fR. When no unit mask is used, this +field must be set to 0. + +A privilege level mask for the event can be provided in \fBplm\fR. This is a bitmask where +each bit indicates a privilege level at which to monitor, more than one bit can be set. +The library supports up to four levels, but depending on the PMU model, some levels may not be +available. The levels are as follows: +.TP +.B PFM_PLM0 +monitor at the privilege level 0. For many architectures, this means kernel level +.TP +.B PFM_PLM1 +monitor at privilege level 1 +.TP +.B PFM_PLM2 +monitor at privilege level 2 +.TP +.B PFM_PLM3 +monitor at the privilege level 3. For many architectures, this means user level +.LP +.sp +.sp +Events with a \fBplm\fR value of 0 will use the default privilege level mask +as indicated by \fBpfp_dfl_plm\fR which must be set to any combinations of +values described above. It is illegal to have a value of 0 for this field. +.sp +The \fBpfp_flags\fR field contains a set of flags that affect the whole +set of events to be monitored. The currently defined flags are: +.TP +.B PFMLIB_PFP_SYSTEMWIDE +indicates that the monitors are to be used in a system-wide monitoring session. +This could influence the way the library sets up some register values. +.sp +.LP +The \fBpfp_unavail_pmcs\fR bitmask can be used by applications to communicate +to the library the list of PMC registers which are not available on the system. +Some kernels may allocate certain PMC registers (and associated data registers) +for other purposes. Those registers must not be used by the library +otherwise the assignment of events to PMC registers may be rejected by the +kernel. Applications must figure out which registers are available using +a kernel interface at their disposal, the library does not provide this +service. The library expect the restrictions to be expressed using the Linux +perfmon2 PMC register mapping. +.LP +Refer to the PMU specific manual for a description of the model-specific +input parameters to be passed in \fBmod_in\fR. + +The generic output parameters are contained in the fBpfmlib_output_param_t\fR +structure which is defined as: +.sp +.nf +typedef struct { + unsigned long long reg_value; + unsigned int reg_num; + unsigned long reg_addr; +} pfmlib_reg_t; + +typedef struct { + unsigned int pfp_pmc_count; + unsigned int pfp_pmd_count; + pfmlib_reg_t pfp_pmcs[PFMLIB_MAX_PMCS]; + pfmlib_reg_t pfp_pmds[PFMLIB_MAX_PMDS]; +} pfmlib_output_param_t; +.fi +.sp +The number of valid entries in the \fBpfp_pmcs\fR table is indicated by \fBpfp_pmc_count\fR. +The number of valid entries in the \fBpfp_pmds\fR table is indicated by \fBpfp_pmd_count\fR. +Each entry in both tables is of type \fBpfmlib_reg_t\fR. +.sp +In the \fBpfp_pmcs\fR table, the \fBreg_num\fR contains the PMC register index (perfmon2 mapping), +and the \fBreg_value\fR contains a 64-bit value to be used to program the PMC register. +The \fBreg_addr\fR indicates the hardware address or index for the PMC register. +.sp +In the \fBpfp_pmds\fR table, the \fBreg_num\fR contains the PMD register index +(perfmon2 mapping). the \fBreg_value\fR is ignored. The \fBreg_addr\fR indicates the hardware +address or index for the PMC register. +.sp +Refer to the PMU specific manual for a description of the model-specific +output parameters to be returned in \fBmod_out\fR. +.sp +The current implementation of the \fBpfm_dispatch_events()\fR function completely overwrites +the \fBpfmlib_output_param\fR structure. In other words, results do not accumulate +into the \fBpfp_pmcs\fR table across multiple calls. Unused fields are +guaranteed to be zeroed upon successful return. +.sp +Depending on the PMU model, there may not always be a one to one mapping between +a PMC register and a data register. Register dependencies may be more intricate. +However the \fBpfm_dispatch_events()\fR function guarantees certain ordering between the +\fBpfp_pmcs\fR and \fBpfp_pmds\fR tables. In particular, it guarantees that +the \fBpfp_pmds\fR table always starts with the counters corresponding, in +the same order, to the events as provided in the \fBpfp_event\fR table on input. +There is always one counter per event. Additional PMD registers, if any, come +after. +.SH EXAMPLE +Here is a typical sequence using the perfmon2 interface: +.nf + #include + ... + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_ctx_t ctx; + pfarg_pmd_t pd[1]; + pfarg_pmc_t pc[1]; + pfarg_load_t load_arg; + int fd, i; + int ret; + + if (pfm_initialize() != PFMLIB_SUCCESS) { + fprintf(stderr, "can't initialize library\\n"); + exit(1); + } + memset(&ctx,0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&load_arg, 0, sizeof(load_arg)); + + ret = pfm_get_cycle_event(&inp.pfp_events[0]); + if (ret != PFMLIB_SUCCESS) { + fprintf(stderr, "cannot find cycle event\\n"); + exit(1); + } + inp.pfp_dfl_plm = PFM_PLM3; + inp.pfp_event_count = 1; + + ret = pfm_dispatch_events(&inp, NULL, &outp, NULL); + if (ret != PFMLIB_SUCCESS) { + fprintf(stderr, "cannot dispatch events: %s\\n", pfm_strerror(ret)); + exit(1); + } + /* propagate pmc value to perfmon2 structures */ + for(i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for(i=0; i < outp.pfp_pmd_count; i++) { + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + pd[i].reg_value = 0; + } + ... + if (pfm_create_context(&ctx, NULL, 0) == -1 ) { + ... + } + fd = ctx.ctx_fd; + + if (pfm_write_pmcs(fd, pc, outp.pfp_pmc_count) == -1) { + ... + } + if (pfm_write_pmds(fd, pd, outp.pfp_pmd_count) == -1) { + ... + } + + load_arg.load_pid = getpid(); + if (pfm_load_context(fd, &load_arg) == -1) { + ... + } + + pfm_start(fd, NULL); + /* code to monitor */ + pfm_stop(fd); + + if (pfm_read_pmds(fd, pd, evt.pfp_event_count) == -1) { + ... + } + printf("results: %llu\n", pd[0].reg_value); + ... + close(fd); + ... +.fi + +.SH RETURN +The function returns whether or not the call was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +The library has not been initialized properly. +.TP +.B PFMLIB_ERR_INVAL +Some arguments were invalid. For instance the value of *count is zero. +This can also be due to he content of the \fBpfmlib_param_t\fR structure. +.TP +.B PFMLIB_ERR_NOTFOUND +No matching event was found. +.TP +.B PFMLIB_ERR_TOOMANY +The number of events to monitor exceed the number of implemented counters. +.TP +.B PFMLIB_ERR_NOASSIGN +The events cannot be dispatched to the PMC because events have conflicting constraints. +.TP +.B PFMLIB_ERR_MAGIC +The model specific extension does not have the right magic number. +.TP +.B PFMLIB_ERR_FEATCOMB +The set of events and features cannot be combined. +.TP +.B PFMLIB_ERR_EVTMANY +An event has been supplied more than once and is causing resource (PMC) conflicts. +.TP +.B PFMLIB_ERR_IRRINVAL +Invalid code range restriction (Itanium, Itanium 2). +.TP +.B PFMLIB_ERR_IRRALIGN +Code range has invalid alignment (Itanium, Itanium 2). +.TP +.B PFMLIB_ERR_IRRTOOMANY +Cannot satisfy all the code ranges (Itanium, Itanium 2). +.TP +.B PFMLIB_ERR_DRRTOOMANY +Cannot satisfy all the data ranges (Itanium, Itanium 2). +.TP +.B PFMLIB_ERR_DRRINVAL +Invalid data range restriction (Itanium, Itanium 2). +.TP +.B PFMLIB_ERR_EVTSET +Some events belong to incompatible sets (Itanium 2). +.TP +.B PFMLIB_ERR_EVTINCOMP +Some events cannot be measured at the same time (Itanium 2). +.TP +.B PFMLIB_ERR_IRRTOOBIG +Code range is too big (Itanium 2). +.TP +.B PFMLIB_ERR_UMASK +Invalid or missing unit mask. +.SH SEE ALSO +libpfm_itanium(3), libpfm_itanium2(3), pfm_regmask_set(3), pfm_regmask_clr(3), +pfm_find_event_code_mask(3) +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_find_event.3 b/src/libpfm-3.y/docs/man3/pfm_find_event.3 new file mode 100644 index 0000000..2842c1c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_find_event.3 @@ -0,0 +1,84 @@ +.TH LIBPFM 3 "August, 2006" "" "Linux Programmer's Manual" +.SH NAME +pfm_find_event, pfm_find_full_event, pfm_find_event_bycode, +pfm_find_event_bycode_next, pfm_find_event_mask \- search for events and unit +masks +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_find_event(const char *"str ", unsigned int *"desc ");" +.BI "int pfm_find_full_event(const char *"str ", pfmlib_event_t *"e ");" +.BI "int pfm_find_event_bycode(int "code ", unsigned int *"desc ");" +.BI "int pfm_find_event_bycode_next(unsigned int "desc1 ", int "code ", unsigned int *"desc ");" +.BI "int pfm_find_event_mask(unsigned int "idx ", const char *"str ", unsigned int *"mask_idx ");" + +.sp +.SH DESCRIPTION +The PMU counters can be programmed to count the number of occurrences +of certain events. The number of events varies from one PMU model +to the other. Each event has a name and a code which is used to program +the actual PMU register. Some event may need to be further qualified +with unit masks. +.sp +The library does not directly expose the event code, nor unit mask code, +to user applications because it is not necessary. Instead applications +use names to query the library for particular information about events. +Given an event name, the library returns an opaque descriptor. +Each descriptor is unique and has no relationship to the event code. +.sp +The set of functions described here can be used to get an event descriptor +given either the name of the event or its code. Several events may +share the same code. An event name is a string structured as: event_name[:unit_mask1[:unit_mask2]]. +.sp +The \fBpfm_find_event()\fR function is a general purpose search routine. +Given an event name in \fBstr\fR, it returns the descriptor for the +corresponding event. If unit masks are provided, they are not taken +into account. This function is being \fBdeprecated\fR in favor of +the \fBpfm_find_full_event()\fR function. +.sp +The \fBpfm_find_full_event()\fR function is the general purpose search routine. +Given an event name in \fBstr\fR, it returns in \fBev\fR, the full event descriptor that +includes the event descriptor in \fBev->event\fR and the unit mask descriptors +in \fBev->unit_masks\fR. The number of unit masks descriptors returned is +indicated in \fBev->num_masks\fR. Unit masks are specified as a colon +separated list of unit mask names, exact values or value combinations. +For instance, if event A supports unit masks M1 (0x1) and M2 (0x40), and +both unit masks are to be measured, then the following values for +\fBstr\fR are valid: "A:M1:M2", "A:M1:0x40", "A:M2:0x1", "A:0x1:0x40", "A:0x41". +.sp +The \fBpfm_find_event_bycode()\fR function searches for an event given +its \fBcode\fR represented as an integer. It returns in \fBdesc\fR, +the event code. Unit masks are ignored. + +.sp +Because there can be several events with the same code, the library +provides the \fBpfm_find_event_bycode_next()\fR function to search for other +events with the same code. Given an event \fBdesc1\fR and a \fBcode\fR, +this function will look for the next event with the same code. If +such an event exists, its descriptor will be stored into \fBdesc\fR. +It is not necessary to have called the \fBpfm_find_event_bycode()\fR function prior +to calling this function. This function is fully threadsafe as it does +not maintain any state between calls. +.sp +The \fBpfm_find_event_mask()\fR function is used to find the unit mask descriptor +based on its name or numerical value passed in \fBstr\fR for the event specified +in \fBidx\fR. The numeric value must be an exact match of an existing unit mask value, +i.e., all bits must match. Some events do not have unit masks, in which case this function +returns an error. +.SH RETURN +All functions return whether or not the call was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +the library has not been initialized properly. +.TP +.B PFMLIB_ERR_INVAL +the event descriptor is invalid, or the pointer argument is NULL. +.TP +.B PFMLIB_ERR_NOTFOUND +no matching event or unit mask was found. +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_find_event_bycode.3 b/src/libpfm-3.y/docs/man3/pfm_find_event_bycode.3 new file mode 100644 index 0000000..1acf3ef --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_find_event_bycode.3 @@ -0,0 +1 @@ +.so man3/pfm_find_event.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_find_event_bycode_next.3 b/src/libpfm-3.y/docs/man3/pfm_find_event_bycode_next.3 new file mode 100644 index 0000000..1acf3ef --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_find_event_bycode_next.3 @@ -0,0 +1 @@ +.so man3/pfm_find_event.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_find_event_mask.3 b/src/libpfm-3.y/docs/man3/pfm_find_event_mask.3 new file mode 100644 index 0000000..1acf3ef --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_find_event_mask.3 @@ -0,0 +1 @@ +.so man3/pfm_find_event.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_find_full_event.3 b/src/libpfm-3.y/docs/man3/pfm_find_full_event.3 new file mode 100644 index 0000000..1acf3ef --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_find_full_event.3 @@ -0,0 +1 @@ +.so man3/pfm_find_event.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_force_pmu.3 b/src/libpfm-3.y/docs/man3/pfm_force_pmu.3 new file mode 100644 index 0000000..8cb351c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_force_pmu.3 @@ -0,0 +1 @@ +.so man3/pfm_get_pmu_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_cycle_event.3 b/src/libpfm-3.y/docs/man3/pfm_get_cycle_event.3 new file mode 100644 index 0000000..e2623fe --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_cycle_event.3 @@ -0,0 +1,54 @@ +.TH LIBPFM 3 "September, 2006" "" "Linux Programmer's Manual" +.SH NAME +pfm_get_cycle_event, pfm_get_inst_retired_event - get basic event +descriptors + +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_get_cycle_event(pfmlib_event_t *"ev ");" +.BI "int pfm_get_inst_retired_event(pfmlib_event_t *"ev ");" +.sp +.SH DESCRIPTION +In order to build very simple generic examples that work across +all PMU models, the library provides a way to retrieve information +about two basic events that are present in most PMU models: cycles +and instruction retired. The first event, cycles, counts the number +of elapsed cycles. The second event, instruction retired, counts the +number of instructions that have executed and retired from the processor +pipeline. Depending on the PMU model, there may be variations in the +exact definition of those events. The library provides this information +on a best effort basis. User must refer to PMU model specific documentation +to validate the event definition. +.sp +The \fBpfm_get_cycle_event()\fR function returns in \fBev\fR the event +and optional unit mask descriptors for the event that counts elapsed +cycles. Depending on the PMU model, there may be unit mask(s) necessary +to count cycles. Application must check the value returned in +\fBev->num_masks\fR. + +.sp +The \fBpfm_get_inst_retired_event()\fR function returns in \fBev\fR the event +and optional unit mask descriptors for the event that counts the number +of returned instruction. Depending on the PMU model, there may be unit +mask(s) necessary to count retired instructions. Application must check +the value returned in \fBev->num_masks\fR. + +.SH RETURN +All functions return whether or not the call was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +the library has not been initialized properly. +.TP +.B PFMLIB_ERR_INVAL +the \fBev\fR parameter is NULL. +.TP +.B PFMLIB_ERR_NOTSUPP +the host PMU does not define an event to count cycles or instructions retired. +.TP +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_code.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_code.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_code.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_code_counter.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_code_counter.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_code_counter.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_counters.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_counters.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_counters.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_description.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_description.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_description.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_mask_code.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_code.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_code.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_mask_description.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_description.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_description.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_mask_name.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_name.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_mask_name.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_event_name.3 b/src/libpfm-3.y/docs/man3/pfm_get_event_name.3 new file mode 100644 index 0000000..becbfb2 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_event_name.3 @@ -0,0 +1,133 @@ +.TH LIBPFM 3 "August, 2006" "" "Linux Programmer's Manual" +.SH NAME +pfm_get_event_name, pfm_get_full_event_name, pfm_get_event_mask_name, pfm_get_event_code, +pfm_get_event_mask_code, pfm_get_event_counters, pfm_get_num_events, pfm_get_max_event_name_len, +pfm_get_event_description, pfm_get_event_mask_description \- get event information +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_get_event_name(unsigned int " e ", char *"name ", size_t " maxlen ");" +.BI "int pfm_get_full_event_name(pfmlib_event_t *" ev ", char *"name ", size_t " maxlen ");" +.BI "int pfm_get_event_mask_name(unsigned int " e ", unsigned int "mask ", char *"name ", size_t " maxlen ");" +.BI "int pfm_get_event_code(unsigned int " e ", int *"code ");" +.BI "int pfm_get_event_mask_code(unsigned int " e ", unsigned int "mask ", int *"code ");" +.BI "int pfm_get_event_code_counter(unsigned int " e ", unsigned int " cnt ", int *"code ");" +.BI "int pfm_get_event_counters(int " e ", pfmlib_regmask_t "counters ");" +.BI "int pfm_get_num_events(unsigned int *" count ");" +.BI "int pfm_get_max_event_name_len(size_t *" len ");" +.BI "int pfm_get_event_description(unsigned int " ev ", char **" str ");" +.BI "int pfm_get_event_mask_description(unsigned int " ev ", unsigned int "mask ", char **" str ");" +.sp +.SH DESCRIPTION +The \fBpfm_get_event_name()\fR function returns in \fBname\fR the event +name given its opaque descriptor in \fBe\fR. The \fBmaxlen\fR argument +indicates the maximum length of the buffer provided for \fBname\fR. Up +to \fBmaxlen\fR-1 characters are stored in the buffer. +The buffer size must be large enough to store the event name, otherwise +an error is returned. This behavior is required to avoid returning partial +names with no way for the caller to verify this is not the full name, except +by failing other calls. The buffer can be appropriately sized using the +\fBpfm_get_max_event_name_len()\fR function. The returned name is a +null terminated string with all upper-case characters and no spaces. +.sp +The \fBpfm_get_full_event_name()\fR function returns in \fBname\fR the event +name given the full event description in \fBev\fR. The description contains +the event code in \fBev->event\fR and optional unit masks descriptors in +\fBev->unit_masks\fR. The \fBmaxlen\fR argument indicates the maximum length +of the buffer provided for \fBname\fR. If more than \fBmaxlen\fR-1 characters +are needed to represent the event, an error is returned. Applications may use +the \fBpfm_get_max_event_name_len()\fR function to size the buffer correctly. +In case unit masks are provided, the final event name string is structured as: +event_name:unit_masks1[:unit_masks2]. Event names and unit masks names are +returned in all upper case. +.sp +The \fBpfm_get_event_code()\fR function returns the event code in \fBcode\fR +given its opaque descriptor \fBe\fR. +.sp +On some PMU models, the code associated with an event is different based +on the counter it is programmed into. The \fBpfm_get_event_code_counter()\fR +function is used to retrieve the event code in \fBcode\fR when the event \fBe\fR +is programmed into counter \fBcnt\fR. The counter index \fBcnt\fR must correspond +to of a counting PMD register. +.sp +Given an opaque event descriptor \fBe\fR, the \fBpfm_get_event_counters()\fR +function returns in \fBcounters\fR a bitmask of type \fBpfmlib_regmask_t\fR where +each bit set represents a PMU config register which can be used to program this +event. The bitmask must be accessed using accessor macros defined by the library. +.so +The \fBpfm_get_num_events()\fR function returns in \fBcount\fR the +total number of events available for the PMU model. On some PMU +models, however, not all events in the table may be useable due +to processor stepping changes. However, The library guarantees that +no more that \fBcount\fR events are available. +.sp +It is possible to list all existing events for the detected host PMU +using accessor functions as the full table of events is not accessible +to the applications. The index of the first event is always zero, +then using the \fBpfm_get_num_events()\fR function you get the total number of events. +On some PMU models, e.g., AMD64, not all events are necessarily supported by the host +PMU, therefore the count returned by this calls may not be the actual number of available +events. Event descriptors are contiguous therefore a simple loop will allow +complete scanning. The typical scan loop is constructed as +follows: +.sp +.nf +unsigned int i, count; +char name[256]; +int ret; +pfm_get_num_events(&count); +for(i=0;i < count; i++) +{ + ret = pfm_get_event_name(i, name, 256); + if (ret != PFMLIB_SUCCESS) + continue; + printf("%s\\n", name); +} +.fi + +.sp +The \fBpfm_get_max_event_name_len()\fR function returns in \fBlen\fR +the maximum length in bytes for the name of the events or its unit masks, if any, +available on one PMU implementation. The value excludes the string termination +character ('\\0'). +.sp +The \fBpfm_get_event_description()\fR function returns in \fBstr\fR the +description string associated with the event specified in \fBev\fR. +The description is returned into a buffer that is allocated to hold the entire +description text. It is the responsibility of the caller to free the buffer when +it becomes useless by calling the \fBfree(3)\fR function. +.sp +The \fBpfm_get_event_mask_code()\fR function must be used to retrieve the actual +unit mask value given a event descriptor in \fBe\fR and a unit mask descriptor +in \fBmask\fR. The value is returned in \fBcode\fR. +.sp +The \fBpfm_get_event_mask_name()\fR function must be used to retrieve the name +associated with a unit mask specified in \fBmask\fR for event \fBe\fR. The +name is returned in the buffer specified in \fBname\fR. The maximum size +of the buffer must be specified in \fBmaxlen\fR. +.sp +The \fBpfm_get_event_mask_description()\fR function returns in \fBstr\fR the +description string associated with the unit mask specified in \fBmask\fR for +the event specified in \fBev\fR. The description is returned into a buffer that +is allocated to hold the entire description text. It is the responsibility of +the caller to free the buffer when it becomes useless by calling the \fBfree(3)\fR +function. + +.SH RETURN +All functions return whether or not the call was successful. A return value of +\fBPFMLIB_SUCCESS\fR indicates success, otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +the library has not been initialized properly. +.TP +.B PFMLIB_ERR_FULL +the string buffer provided is too small +.TP +.B PFMLIB_ERR_INVAL +the event or unit mask descriptor, or the \fBcnt\fR argument is invalid, or a pointer argument is NULL. +.SH SEE ALSO +pfm_get_impl_counters(3), pfm_get_max_event_name_len(3), free(3) +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_get_full_event_name.3 b/src/libpfm-3.y/docs/man3/pfm_get_full_event_name.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_full_event_name.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_hw_counter_width.3 b/src/libpfm-3.y/docs/man3/pfm_get_hw_counter_width.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_hw_counter_width.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_impl_counters.3 b/src/libpfm-3.y/docs/man3/pfm_get_impl_counters.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_impl_counters.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_impl_pmcs.3 b/src/libpfm-3.y/docs/man3/pfm_get_impl_pmcs.3 new file mode 100644 index 0000000..f1bb21b --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_impl_pmcs.3 @@ -0,0 +1,69 @@ +.TH LIBPFM 3 "July, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_get_impl_pmcs, pfm_get_impl_pmds, pfm_get_impl_counters, +pfm_get_num_counters, pfm_get_num_pmcs, pfm_get_num_pmds, +pfm_get_hw_counter_width \- return +bitmask of implemented PMU registers or number of PMU registers +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_get_impl_pmcs(pfmlib_regmask_t *" impl_pmcs ");" +.BI "int pfm_get_impl_pmds(pfmlib_regmask_t *" impl_pmds ");" +.BI "int pfm_get_impl_counters(pfmlib_regmask_t *" impl_counters ");" +.BI "int pfm_get_num_counters(unsigned int *"num ");" +.BI "int pfm_get_num_pmcs(unsigned int *"num ");" +.BI "int pfm_get_num_pmds(unsigned int *"num ");" +.BI "int pfm_get_num_counters(unsigned int *"num ");" +.BI "int pfm_get_hw_counter_width(unsigned int *"width ");" +.sp +.SH DESCRIPTION +The \fBpfm_get_impl_*()\fR functions can be used to figure out which +PMU registers are implemented on the host CPU. All implemented registers +may not necessarily be available to applications. Programs need to query +the operating system kernel monitoring interface to figure out the list +of available registers. +.sp +The \fBpfm_get_impl_*()\fR functions all return a bitmask of registers corresponding to +the query. The bitmask pointer passed as argument is reset to zero by each function. +The returned bitmask must be accessed using the set of functions provided by the +library to ensure portability. See related man pages below. +.sp +The \fBpfm_get_num_*()\fR functions return the number of implemented PMC or PMD +registers. Those numbers may be different from the actual number of registers +available to applications. +.sp +The \fBpfm_get_impl_pmcs()\fR function returns in \fBimpl_pmcs\fR the bitmask of implemented PMCS. + +The \fBpfm_get_impl_pmds()\fR function returns in \fBimpl_pmds\fR the bitmask of implemented PMDS. + +The \fBpfm_get_impl_counters()\fR function returns in \fBimpl_counters\fR a bitmask of the PMD +registers used as counters. Depending on the PMU mode, not all PMD registers are +necessarily used as counters. +.sp +The \fBpfm_get_num_counters()\fR function returns in \fBnum\fR the number of PMD +used as counters. A counter is a PMD which is used to accumulate the +number of occurrences of an event. + +The \fBpfm_get_num_pmcs()\fR function returns in \fBnum\fR the number of +implemented PMCs by the host PMU. + +The \fBpfm_get_num_pmds()\fR function returns in \fBnum\fR the number of +implemented PMDs by the host PMU. + +The \fBpfm_get_hw_counter_width()\fR function returns the width in bits of the +counters in \fBwidth\fR. PMU implementations can have different number of +bits implemented. For instance, Itanium has 32-bit counters, while Itanium +2 has 47-bits. +.SH RETURN +The function returns whether or not it was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +the library has not been initialized properly. +.SH SEE ALSO +pfm_regmask_set(3), pfm_regmask_isset(3) +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_get_impl_pmds.3 b/src/libpfm-3.y/docs/man3/pfm_get_impl_pmds.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_impl_pmds.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_inst_retired.3 b/src/libpfm-3.y/docs/man3/pfm_get_inst_retired.3 new file mode 100644 index 0000000..ff95958 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_inst_retired.3 @@ -0,0 +1 @@ +.so man3/pfm_get_cycle_event.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_max_event_name_len.3 b/src/libpfm-3.y/docs/man3/pfm_get_max_event_name_len.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_max_event_name_len.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_num_counters.3 b/src/libpfm-3.y/docs/man3/pfm_get_num_counters.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_num_counters.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_num_events.3 b/src/libpfm-3.y/docs/man3/pfm_get_num_events.3 new file mode 100644 index 0000000..2c9dbcc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_num_events.3 @@ -0,0 +1 @@ +.so man3/pfm_get_event_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_num_pmcs.3 b/src/libpfm-3.y/docs/man3/pfm_get_num_pmcs.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_num_pmcs.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_num_pmds.3 b/src/libpfm-3.y/docs/man3/pfm_get_num_pmds.3 new file mode 100644 index 0000000..43ef257 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_num_pmds.3 @@ -0,0 +1 @@ +.so man3/pfm_get_impl_pmcs.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_pmu_name.3 b/src/libpfm-3.y/docs/man3/pfm_get_pmu_name.3 new file mode 100644 index 0000000..5b123c6 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_pmu_name.3 @@ -0,0 +1,193 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_get_pmu_name, pfm_get_pmu_type, pfm_get_pmu_name_bytype, pfm_pmu_is_supported, pfm_force_pmu,pfm_list_supported_pmu \- query library about supported PMU models +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_get_pmu_name(char *"name ", int " maxlen); +.BI "int pfm_get_pmu_type(int *" type); +.BI "int pfm_get_pmu_name_bytype(int " type ", char *" name ", int " maxlen); +.BI "int pfm_pmu_is_supported(int " type); +.BI "int pfm_force_pmu(int " type); +.BI "int pfm_list_supported_pmus(int (*" pf ")(const char *"fmt ",...));" +.sp +.SH DESCRIPTION +These functions retrieve information about the detected host PMU +and the PMU models supported by the library. More than one model +can be supported by the same library. + +Each PMU model is assigned a type and a name. The latter is just +a string and the former is a unique identifier. The currently +supported types are: +.TP +.B PFMLIB_GENERIC_PMU +Intel Itanium default architected PMU model, i.e., the basic model. +.TP +.B PFMLIB_ITANIUM_PMU +Intel Itanium processor PMU model. The model is found in the first +implementation of the IA-64 architecture, code name Merced. +.TP +.B PFMLIB_ITANIUM2_PMU +Intel Itanium 2 processor PMU model. This is the model provided +by McKinley, Madison, and Deerfield processors. +.TP +.B PFMLIB_MONTECITO_PMU +Intel Dual-core Itanium 2 processor PMU model. This is the model provided +by Montecito, Montvale processors. +.TP +.B PFMLIB_AMD64_PMU +AMD AMD64 processors (family 15 and 16) +.TP +.B PFMLIB_GEN_IA32_PMU + Intel X86 architectural PMU v1, v2, v3 +.TP +.B PFMLIB_I386_P6_PMU +Intel P6 processors. That includes Pentium Pro, Pentium II, Pentium III, but excludes Pentium M +.TP +.B PFMLIB_I386_PM_PMU +Intel Pentium M processors. +.TP +.B PFMLIB_INTEL_PII_PMU +Intel Pentium II processors. +.TP +.B PFMLIB_PENTIUM4_PMU +Intel processors based on Netburst micro-architecture. That includes Pentium 4. +.TP +.B PFMLIB_COREDUO_PMU +Intel processors based on Yonah micro-architecture. That includes Intel Core Duo/Core Solo processors +.TP +.B PFMLIB_I386_PM_PMU +Intel Pentium M processors +.TP +.B PFMLIB_INTEL_CORE_PMU +Intel processors based on the Core micro-architecture. That includes Intel Core 2 Duo/Quad processors +.TP +.B PFMLIB_INTEL_ATOM_PMU +Intel processors based on the Atom micro-architecture. +.TP +.B PFMLIB_INTEL_NHM_PMU +Intel processors based on the Nehalem micro-architectures. That includes Intel Core i7 processors. +.TP +.B PFMLIB_MIPS_20KC_PMU +MIPS 20KC processors +.TP +.B PFMLIB_MIPS_24K_PMU +MIPS 24K processors +.TP +.B PFMLIB_MIPS_25KF_PMU +MIPS 25KF processors +.TP +.B PFMLIB_MIPS_34K_PMU +MIPS 34K processors +.TP +.B PFMLIB_MIPS_5KC_PMU +MIPS 5KC processors +.TP +.B PFMLIB_MIPS_74K_PMU +MIPS 74K processors +.TP +.B PFMLIB_MIPS_R10000_PMU +MIPS R10000 processors +.TP +.B PFMLIB_MIPS_R12000_PMU +MIPS R12000 processors +.TP +.B PFMLIB_MIPS_RM7000_PMU +MIPS RM7000 processors +.TP +.B PFMLIB_MIPS_RM9000_PMU +MIPS RM9000 processors +.TP +.B PFMLIB_MIPS_SB1_PMU +MIPS SB1/SB1A processors +.TP +.B PFMLIB_MIPS_VR5432_PMU +MIPS VR5432 processors +.TP +.B PFMLIB_MIPS_VR5500_PMU +MIPS VR5500 processors +.TP +.B PFMLIB_MIPS_ICE9A_PMU +SiCortex ICE9A +.TP +.B PFMLIB_MIPS_ICE9B_PMU +SiCortex ICE9B +.TP +.B PFMLIB_POWERPC_PMU +IBM POWERPC processors +.TP +.B PFMLIB_CRAYX2_PMU +Cray X2 processors +.TP +.B PFMLIB_CELL_PMU +IBM Cell processors +.TP +.B PFMLIB_PPC970_PMU +IBM PowerPC 970(FX,GX) processors +.TP +.B PFMLIB_PPC970MP_PMU +IBM PowerPC 970MP processors +.TP +.B PFMLIB_POWER3_PMU +IBM POWER3 processors +.TP +.B PFMLIB_POWER4_PMU +IBM POWER4 processors +.TP +.B PFMLIB_POWER5_PMU +IBM POWER5 processors +.TP +.B PFMLIB_POWER5p_PMU +BM POWER5+ processors +.TP +.B PFMLIB_POWER6_PMU +IBM POWER6 processors +.LP +The \fBpfm_get_pmu_name()\fR function returns the name of the detected +host PMU. The library must have been initialized properly before making this +call. The name is returned in the \fBname\fR argument. The \fBmaxlen\fR argument +indicates the maximum length of the buffer provided for \fBname\fR. +Up to \fBmaxlen-1\fR characters will be returned, not including the +termination character. + +.sp +The \fBpfm_get_pmu_type()\fR function returns the type of the detected host +PMU. The library must have been initialized properly before making this +call. The type returned in \fBtype\fR can be any one of the three listed above. +.sp +The \fBpfm_get_pmu_name_bytype()\fR function returns the name of a PMU model in +\fBname\fR given a type in the \fBtype\fR argument. The \fBmaxlen\fR argument +indicates the maximum length of the buffer provided for \fBname\fR. +Up to \fBmaxlen-1\fR characters will be returned, not including the +termination character. + +.sp +The \fBpfm_pmu_is_supported()\fR function returns \fBPFMLIB_SUCCESS\fR if the given +PMU type is supported by the library independently of what the host PMU model is. +.sp +The \fBpfm_force_pmu()\fR function is used to forced the library to use a particular +PMU model compared to what it has detected. The library checks that the selected +type can be supported by the host PMU. This is mostly useful to force the +library to the use generic PMU model \fBPFMLIB_GENERIC_PMU\fR. This function can +be called at any time and upon return the library is considered initialized. +.sp +The \fBpfm_list_supported_pmu()\fR function is used to print the list PMU types +that the library supports. The results is printed using the function provided +in the \fBpf\fR argument, which must be a printf-style function. +.SH RETURN +The function returns whether or not it was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_NOINIT +the library has not been initialized properly. +.TP +.B PFMLIB_ERR_INVAL +invalid argument was given, most likely invalid pointer or invalid PMU type. +.TP +.B PFMLIB_ERR_NOTSUPP +the selected PMU type can be used on the host CPU. +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_get_pmu_name_bytype.3 b/src/libpfm-3.y/docs/man3/pfm_get_pmu_name_bytype.3 new file mode 100644 index 0000000..8cb351c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_pmu_name_bytype.3 @@ -0,0 +1 @@ +.so man3/pfm_get_pmu_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_pmu_type.3 b/src/libpfm-3.y/docs/man3/pfm_get_pmu_type.3 new file mode 100644 index 0000000..8cb351c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_pmu_type.3 @@ -0,0 +1 @@ +.so man3/pfm_get_pmu_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_get_version.3 b/src/libpfm-3.y/docs/man3/pfm_get_version.3 new file mode 100644 index 0000000..5d166bc --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_get_version.3 @@ -0,0 +1,33 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_get_version \- get performance monitoring library version +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_get_version(unsigned int *"version); +.sp +.SH DESCRIPTION +This function can be called at any time to get the revision +level of the library. The version is encoded into an +unsigned integer and returned in the \fBversion\fR argument. +A revision number is composed of two fields: a major number +and a minor number. Both can be extracted from the returned +argument using macros provided in the header file: +.TP +.B PFMLIB_MAJ_VERSION(v) +returns the major number encoded in v. +.TP +.B PFMLIB_MIN_VERSION(v) +returns the minor number encoded in v. +.SH RETURN +The function returns whether or not it was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.TP +.B PFMLIB_ERR_INVAL +the argument is invalid, most likely a NULL pointer. +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_initialize.3 b/src/libpfm-3.y/docs/man3/pfm_initialize.3 new file mode 100644 index 0000000..e52b200 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_initialize.3 @@ -0,0 +1,30 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_initialize \- initialize performance monitoring library +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_initialize(void);" +.sp +.SH DESCRIPTION +This is the first function that a program using the library +\fBmust\fR call otherwise the library will not function at all. +This function probes the host PMU and initialize the internal +state of the library. In the case of a multi-threaded application, +this function needs to be called only once, most likely by the initial +thread. + +.SH RETURN +The function returns whether or not it was successful, i.e., the +host PMU has been correctly identified and is supported. A return +value of \fBPFMLIB_SUCCESS\fR indicates success, otherwise the value is +an error code. +.SH ERRORS +.TP +.B PFMLIB_ERR_NOTSUPP +the host +PMU is not supported. +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_list_supported_pmus.3 b/src/libpfm-3.y/docs/man3/pfm_list_supported_pmus.3 new file mode 100644 index 0000000..8cb351c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_list_supported_pmus.3 @@ -0,0 +1 @@ +.so man3/pfm_get_pmu_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_pmu_is_supported.3 b/src/libpfm-3.y/docs/man3/pfm_pmu_is_supported.3 new file mode 100644 index 0000000..8cb351c --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_pmu_is_supported.3 @@ -0,0 +1 @@ +.so man3/pfm_get_pmu_name.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_and.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_and.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_and.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_clr.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_clr.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_clr.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_copy.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_copy.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_copy.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_eq.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_eq.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_eq.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_isset.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_isset.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_isset.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_or.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_or.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_or.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_set.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_set.3 new file mode 100644 index 0000000..add5a60 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_set.3 @@ -0,0 +1,60 @@ +.TH LIBPFM 3 "Apr, 2006" "" "Linux Programmer's Manual" +.SH NAME +pfm_regmask_set, pfm_regmask_isset, pfm_regmask_clr, pfm_regmask_weight, +pfm_regmask_eq, pfm_regmask_and, pfm_regmask_or, pfm_regmask_copy +-\ operations on pfmlib_regmask_t bitmasks +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_regmask_isset(pfmlib_regmask_t *"mask ", unsigned int "b ");" +.BI "int pfm_regmask_set(pfmlib_regmask_t *"mask ", unsigned int "b ");" +.BI "int pfm_regmask_clr(pfmlib_regmask_t *"mask ", unsigned int "b ");" +.BI "int pfm_regmask_weight(pfmlib_regmask_t *"mask ", unsigned int *"w ");" +.BI "int pfm_regmask_eq(pfmlib_regmask_t *"mask1 ", pfmlib_regmask_t *"mask2 ");" +.BI "int pfm_regmask_and(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"m1 ", pmlib_regmask_t *"m2 ");" +.BI "int pfm_regmask_or(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"m1 ", pmlib_regmask_t *"m2 ");" +.BI "int pfm_regmask_copy(pfmlib_regmask_t *"dest ", pfmlib_regmask_t *"src ");" +.sp +.SH DESCRIPTION +This set of function is used to operate of the \fBpfmlib_regmask_t\fR bitmasks +that are returned by certain functions or passed to the \fBpfm_dispatch_events()\fR function. + +To ensure portability, it is important that applications use \fBonly\fR the +functions specified here to access the bitmasks. It is strongly discouraged +to access the internal fields of the \fBpfm_regmask_t\fR structure. + +The \fBpfm_regmask_set()\fR function is used to set bit \fBb\fR in the bitmask +\fBmask\fR. + +The \fBpfm_regmask_clr()\fR function is used to clear bit \fBb\fR in the bitmask +\fBmask\fR. + +The \fBpfm_regmask_isset()\fR function returns a non-zero value if \fBb\fR is set +in the bitmask \fBmask\fR. + +The \fBpfm_regmask_weight()\fR function returns in \fBw\fR the number of bits set +in the bitmask \fBmask\fR. + +The \fBpfm_regmask_eq()\fR function returns a non-zero value if the bitmasks +\fBmask1\fR and \fBmask2\fR are identical. + +The \fBpfm_regmask_and()\fR function returns in bitmask \fBdest\fR the result of +the logical AND operation between bitmask \fBm1\fR and bitmask \fBm2\fR. + +The \fBpfm_regmask_or()\fR function returns in bitmask \fBdest\fR the result of +the logical OR operation between bitmask \fBm1\fR and bitmask \fBm2\fR. + +The \fBpfm_regmask_copy()\fR function copies bitmask \fBsrc\fR into bitmask +\fRdest\fR. + +.SH RETURN +The function returns whether or not it was successful. +A return value of \fBPFMLIB_SUCCESS\fR indicates success, +otherwise the value is the error code. +.SH ERRORS +.B PFMLIB_ERR_INVAL +the bit \fBb\fR exceeds the limit supported by the library +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_regmask_weight.3 b/src/libpfm-3.y/docs/man3/pfm_regmask_weight.3 new file mode 100644 index 0000000..967f4eb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_regmask_weight.3 @@ -0,0 +1 @@ +.so man3/pfm_regmask_set.3 diff --git a/src/libpfm-3.y/docs/man3/pfm_set_options.3 b/src/libpfm-3.y/docs/man3/pfm_set_options.3 new file mode 100644 index 0000000..845e9bb --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_set_options.3 @@ -0,0 +1,59 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_set_options \- set performance monitoring library debug options +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int pfm_set_options(pfmlib_options_t *"opt); +.sp +.SH DESCRIPTION +This function can be called at any time to adjust the level +of debug of the library. In both cases, extra output will be +generated on standard error when the library gets +called. This can be useful to figure out how the PMC +registers are initialized for instance. +.sp +The opt argument to this function is a pointer to a +.B pfmlib_options_t +structure which is defined as follows: +.sp +.nf +typedef struct { + unsigned int pfm_debug:1; + unsigned int pfm_verbose:1; +} pfmlib_options_t; +.fi +.sp +.sp +Setting \fBpfm_debug\fR to 1 will enable debug messages whereas setting +\fBpfm_verbose\fR will enable verbose messages. + +.SH ENVIRONMENT VARIABLES +Setting library options with this function has lower priority than +with environment variables. As such, the call to this function may +not have any actual effects. A user can set the following environment +variables to control verbosity and debug output: +.TP +.B LIBPFM_VERBOSE +Enable verbose output. Value must be 0 or 1. When not set, verbosity level +can be controlled with this function. +.TP +.B LIBPFM_DEBUG +Enable debug output. Value must be 0 or 1. When not set, debug level +can be controlled with this function. +.LP +.SH RETURN +The function returns whether or not it was successful. A return +value of \fBPFMLIB_SUCCESS\fR indicates success, otherwise the +value is the error code. +.sp +When environment variables exist, they take precedence and this +function returns \fBPFMLIB_SUCCESS\fR. +.SH ERRORS +.TP +.B PFMLIB_ERR_INVAL +the argument is invalid, most likely a NULL pointer. +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/docs/man3/pfm_strerror.3 b/src/libpfm-3.y/docs/man3/pfm_strerror.3 new file mode 100644 index 0000000..dcd2234 --- /dev/null +++ b/src/libpfm-3.y/docs/man3/pfm_strerror.3 @@ -0,0 +1,27 @@ +.TH LIBPFM 3 "November, 2003" "" "Linux Programmer's Manual" +.SH NAME +pfm_strerror \- return string describing error code +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "char *pfm_strerror(int "code); +.sp +.SH DESCRIPTION +This function returns a string which describes the libpfm error +value in \fBcode\fR. The string returned by the call must +be considered as read only. + +The function must \fBonly\fR be used on libpfm calls. It is not +designed to handle OS system call errors. + +.SH RETURN +The function returns a pointer to the string describing +the error code. If code is invalid then the default +error message is returned. +.SH ERRORS +If the error code is invalid, then the function returns +a pointer to a string which says "unknown error code". +.SH AUTHOR +Stephane Eranian +.PP diff --git a/src/libpfm-3.y/examples_ia64_v2.0/Makefile b/src/libpfm-3.y/examples_ia64_v2.0/Makefile new file mode 100644 index 0000000..d10a49e --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/Makefile @@ -0,0 +1,73 @@ +# +# Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# This file is part of libpfm, a performance monitoring support library for +# applications on Linux. +# +# +# IMPORTANT: compatibility examples are meant FOR IA-64 host ONLY! +# +TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/.. + +include ../config.mk +include ../rules.mk + +CFLAGS += -pthread +LDFLAGS+=-static + +LIBS += -lm + +INCDIR=-I$(PFMINCDIR) + +TARGETS=self task task_attach task_attach_timeout syst \ + notify_self notify_self2 notify_self3 \ + whichpmu showreset multiplex\ + task_smpl notify_self_fork + +SRCS +=ita_rr.c ita_irr.c ita_opcode.c ita_btb.c ita_dear.c +TARGETS +=ita_rr ita_irr ita_opcode ita_btb ita_dear + +SRCS +=ita2_opcode.c ita2_rr.c ita2_irr.c ita2_dear.c +TARGETS +=ita2_dear ita2_btb ita2_opcode ita2_rr ita2_irr + +SRCS +=mont_opcode.c mont_rr.c mont_irr.c mont_dear.c mont_etb.c +TARGETS +=mont_dear mont_opcode mont_rr mont_irr mont_etb + +PFMLIB=$(PFMLIBDIR)/libpfm.a + +all: $(TARGETS) + +notify_self2.o: notify_self2.c + $(CC) $(INCDIR) $(CFLAGS) -D_GNU_SOURCE -c $*.c + +$(TARGETS): %:%.o $(PFMLIB) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LIBS) +clean: + $(RM) -f *.o $(TARGETS) *~ + +distclean: clean + +depend: + $(MKDEP) $(INCDIR) $(CFLAGS) $(SRCS) + +# +# examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ +# diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita2_btb.c b/src/libpfm-3.y/examples_ia64_v2.0/ita2_btb.c new file mode 100644 index 0000000..1ca0aaf --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita2_btb.c @@ -0,0 +1,542 @@ +/* + * ita2_btb.c - example of how use the BTB with the Itanium 2 PMU + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_default_smpl_hdr_t btb_hdr_t; +typedef pfm_default_smpl_entry_t btb_entry_t; +typedef pfm_default_smpl_ctx_arg_t btb_ctx_arg_t; +#define BTB_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = BTB_FMT_UUID; + + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +/* + * We use a small buffer size to exercise the overflow handler + */ +#define SMPL_BUF_NENTRIES 64 + +#define M_PMD(x) (1UL<<(x)) +#define BTB_REGS_MASK (M_PMD(8)|M_PMD(9)|M_PMD(10)|M_PMD(11)|M_PMD(12)|M_PMD(13)|M_PMD(14)|M_PMD(15)|M_PMD(16)) + +static void *smpl_vaddr; +static unsigned int entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + + + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return 0;} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop; + } + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf +static void +show_btb_reg(int j, pfm_ita2_pmd_reg_t reg, pfm_ita2_pmd_reg_t pmd16) +{ + unsigned long bruflush, b1; + int is_valid = reg.pmd8_15_ita2_reg.btb_b == 0 && reg.pmd8_15_ita2_reg.btb_mp == 0 ? 0 :1; + + b1 = (pmd16.pmd_val >> (4 + 4*(j-8))) & 0x1; + bruflush = (pmd16.pmd_val >> (5 + 4*(j-8))) & 0x1; + + safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d bru=%ld b1=%ld valid=%c\n", + j, + reg.pmd_val, + reg.pmd8_15_ita2_reg.btb_b, + reg.pmd8_15_ita2_reg.btb_mp, + bruflush, b1, + is_valid ? 'Y' : 'N'); + + if (!is_valid) return; + + if (reg.pmd8_15_ita2_reg.btb_b) { + unsigned long addr; + + + addr = (reg.pmd8_15_ita2_reg.btb_addr+b1)<<4; + + addr |= reg.pmd8_15_ita2_reg.btb_slot < 3 ? reg.pmd8_15_ita2_reg.btb_slot : 0; + + safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction: %s\n\n", + addr, + reg.pmd8_15_ita2_reg.btb_slot < 3 ? 'Y' : 'N', + reg.pmd8_15_ita2_reg.btb_mp ? "FE Failure" : + bruflush ? "BE Failure" : "Success"); + } else { + safe_printf("\t Target Address: 0x%016lx\n\n", + ((unsigned long)reg.pmd8_15_ita2_reg.btb_addr<<4)); + } +} + + +static void +show_btb(pfm_ita2_pmd_reg_t *btb, pfm_ita2_pmd_reg_t *pmd16) +{ + int i, last; + + + i = (pmd16->pmd16_ita2_reg.btbi_full) ? pmd16->pmd16_ita2_reg.btbi_bbi : 0; + last = pmd16->pmd16_ita2_reg.btbi_bbi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita2_reg.btbi_bbi, pmd16->pmd16_ita2_reg.btbi_full); + do { + show_btb_reg(i+8, btb[i], *pmd16); + i = (i+1) % 8; + } while (i != last); +} + + +void +process_smpl_buffer(void) +{ + btb_hdr_t *hdr; + btb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_ita2_pmd_reg_t *reg, *pmd16; + unsigned long i; + int ret; + static unsigned long last_ovfl = ~0UL; + + + hdr = (btb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (btb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita2_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd8-pmd15 has the BTB. We have also + * included pmd16 (BTB index) has part of the registers to record. This trick + * allows us to get the index to decode the sequential order of the BTB. + * + * Recorded registers are always recorded in increasing order. So we know + * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. + */ + pmd16 = reg+8; + show_btb(reg, pmd16); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { + perror("PFM_RESTART"); + exit(1); + } +} + + +int +main(void) +{ + int ret; + int type = 0; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + btb_ctx_arg_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interesteed in all the mispredicted branches, + * therefore we program we set the various fields of the BTB config to: + */ + ita2_inp.pfp_ita2_btb.btb_used = 1; + + ita2_inp.pfp_ita2_btb.btb_ds = 0; + ita2_inp.pfp_ita2_btb.btb_tm = 0x3; + ita2_inp.pfp_ita2_btb.btb_ptm = 0x3; + ita2_inp.pfp_ita2_btb.btb_ppm = 0x3; + ita2_inp.pfp_ita2_btb.btb_brt = 0x0; + ita2_inp.pfp_ita2_btb.btb_plm = PFM_PLM3; + + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event BRANCH_EVENT\n"); + } + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx[0].buf_arg.buf_size = 8192; + + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); + + smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; + + /* + * extract our file descriptor + */ + id = ctx[0].ctx_arg.ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * indicate we want notification when buffer is full + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; + + /* + * indicate PMD to collect in each sample + */ + pc[0].reg_smpl_pmds[0] = BTB_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our BTB regs + */ + entry_size = sizeof(btb_entry_t)+(hweight64(BTB_REGS_MASK)<<3); + + /* + * When our counter overflows, we want to BTB index to be reset, so that we keep + * in sync. This is required to make it possible to interpret pmd16 on overflow + * to avoid repeating the same branch several times. + */ + pc[0].reg_reset_pmds[0] = M_PMD(16); + + /* + * reset pmd16 (BTB index), short and long reset value are set to zero as well + * + * We use slot 1 of our pd[] array for this. + */ + pd[1].reg_num = 16; + pd[1].reg_value = 0UL; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + /* + * we use 2 = 1 for the branch_event + 1 for the reset of PMD16. + */ + if (perfmonctl(id, PFM_WRITE_PMDS, pd, 2) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + + process_smpl_buffer(); + + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita2_dear.c b/src/libpfm-3.y/examples_ia64_v2.0/ita2_dear.c new file mode 100644 index 0000000..ee1dbea --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita2_dear.c @@ -0,0 +1,458 @@ +/* + * ita2_dear.c - example of how use the D-EAR with the Itanium 2 PMU + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define SMPL_PERIOD (40) + +#define EVENT_NAME "data_ear_cache_lat4" + +#define M_PMD(x) (1UL<<(x)) +#define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) + +typedef pfm_default_smpl_hdr_t dear_hdr_t; +typedef pfm_default_smpl_entry_t dear_entry_t; +typedef pfm_default_smpl_ctx_arg_t dear_ctx_t; +#define DEAR_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = DEAR_FMT_UUID; + + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + + + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("line = %d No memory available!\n", __LINE__); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita2_pmd_reg_t*)(ent+1); + + safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); + + reg++; + + safe_printf("PMD3 : 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd3_ita2_reg.dear_latency); + + reg++; + + safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd17_ita2_reg.dear_vl ? 'Y': 'N', + (reg->pmd17_ita2_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd17_ita2_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { + perror("PFM_RESTART"); + exit(1); + } +} + +int +main(void) +{ + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_event_t ev; + dear_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &ev) != PFMLIB_SUCCESS) { + fatal_error("cannot find event %s\n", EVENT_NAME); + } + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * propagate the event descriptor + */ + inp.pfp_events[0] = ev; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_context_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_context_t + * with what is needed fot this format. + */ + + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx[0].buf_arg.buf_size = 4096; + + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the file descriptor we will use to + * identify this newly created context + */ + id = ctx[0].ctx_arg.ctx_fd; + + printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); + + smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * indicate we want notification when buffer is full + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * indicate which PMD to include in the sample + */ + pc[0].reg_smpl_pmds[0] = DEAR_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our DEAR regs + */ + entry_size = sizeof(dear_entry_t)+(hweight64(DEAR_REGS_MASK)<<3); + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(10000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita2_irr.c b/src/libpfm-3.y/examples_ia64_v2.0/ita2_irr.c new file mode 100644 index 0000000..85dfa3c --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita2_irr.c @@ -0,0 +1,383 @@ +/* + * ita2_irr.c - example of how to use code range restriction with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define VECTOR_SIZE 1000000UL + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfmlib_ita2_output_param_t ita2_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t ibrs[8]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + memset(pc, 0, sizeof(pc)); + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + memset(&ita2_outp,0, sizeof(ita2_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("cannot find %s event\n", p->event_name); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + * In the case of code range restriction on Itanium 2, the library will try to use the fine + * mode first and then it will default to using multiple pairs to cover the range. + */ + + ita2_inp.pfp_ita2_irange.rr_used = 1; /* indicate we use code range restriction */ + ita2_inp.pfp_ita2_irange.rr_limits[0].rr_start = range_start; + ita2_inp.pfp_ita2_irange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + ita2_outp.pfp_ita2_irange.rr_infos[0].rr_soff, + ita2_outp.pfp_ita2_irange.rr_infos[0].rr_eoff, + ita2_outp.pfp_ita2_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the perfmonctl() syscall. The library does not know the type of the syscall + * anymore. + */ + for (i=0; i < ita2_outp.pfp_ita2_irange.rr_nbr_used; i++) { + ibrs[i].dbreg_num = ita2_outp.pfp_ita2_irange.rr_br[i].reg_num; + ibrs[i].dbreg_value = ita2_outp.pfp_ita2_irange.rr_br[i].reg_value; + } + /* + * Program the code debug registers. + * + * IMPORTANT: programming the debug register MUST always be done before the PMCs + * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. + */ + if (perfmonctl(id, PFM_WRITE_IBRS, ibrs, ita2_outp.pfp_ita2_irange.rr_nbr_used) == -1) { + fatal_error("child: perfmonctl error PFM_WRITE_IBRS errno %d\n",errno); + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("child: perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("child: perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita2_opcode.c b/src/libpfm-3.y/examples_ia64_v2.0/ita2_opcode.c new file mode 100644 index 0000000..aa94689 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita2_opcode.c @@ -0,0 +1,286 @@ +/* + * ita2_opcode.c - example of how to use the opcode matcher with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + int ret; + int type = 0; + int id; + unsigned int i; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + + /* + * We indicate that we are using the PMC8 opcode matcher. This is required + * otherwise the library add PMC8 to the list of PMC to pogram during + * pfm_dispatch_events(). + */ + ita2_inp.pfp_ita2_pmc8.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]=4) and the btype field is 5 (which represents + * bits[6-8]) so it is included in the match/mask fields of PMC8. + * It is necessarily in a B slot. + * + * We don't care which operands are used with br.cloop therefore + * the mask field of pmc8 is set such that only the 4 bits of the + * opcode and 3 bits of btype must match exactly. This is accomplished by + * clearing the top 4 bits and bits [6-8] of the mask field and setting the + * remaining bits. Similarly, the match field only has the opcode value and btype + * set according to the encoding of br.cloop, the + * remaining bits are zero. Bit 60 of PMC8 is set to indicate + * that we look only in B slots (this is the only possibility for + * this instruction anyway). + * + * So the binary representation of the value for PMC8 is as follows: + * + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ---------------------------------------------------------------- + * 0001010000000000000000101000000000000011111111111111000111111000 + * + * which yields a value of 0x1400028003fff1f8. + * + * Depending on the level of optimization to compile this code, it may + * be that the count reported could be zero, if the compiler uses a br.cond + * instead of br.cloop. + * + * + * The 0x1 sets the ig_ad field to make sure we ignore any range restriction. + * Also bit 2 must always be set + */ + ita2_inp.pfp_ita2_pmc8.pmc_val = 0x1400028003fff1fa; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event IA64_TAGGED_INST_RETIRED_IBRP0_PMC8\n"); + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the unique identifier for our context, a regular file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + printf("event_count=%d id=%d\n", inp.pfp_event_count, id); + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100UL); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s\n", + pd[0].reg_num, + pd[0].reg_value, + name); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita2_rr.c b/src/libpfm-3.y/examples_ia64_v2.0/ita2_rr.c new file mode 100644 index 0000000..84455df --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita2_rr.c @@ -0,0 +1,402 @@ +/* + * ita2_rr.c - example of how to use data range restriction with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + + + +#define TEST_DATA_COUNT 16 +#define N_LOOP 100000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * here we capture only misaligned_loads because it cannot + * be measured with misaligned_stores_retired at the same time + */ +static char *event_list[]={ + "misaligned_loads_retired", + NULL +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + char **p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfmlib_ita2_output_param_t ita2_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t dbrs[8]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) { + fatal_error("cannot allocate test data structure"); + } + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) { + fatal_error("cannot allocate test data structure"); + } + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(dbrs, 0, sizeof(dbrs)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + memset(&ita2_outp,0, sizeof(ita2_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields in rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + */ + + ita2_inp.pfp_ita2_drange.rr_used = 1; + ita2_inp.pfp_ita2_drange.rr_limits[0].rr_start = range_start; + ita2_inp.pfp_ita2_drange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + ita2_outp.pfp_ita2_drange.rr_nbr_used >> 1, + ita2_outp.pfp_ita2_drange.rr_infos[0].rr_soff, + ita2_outp.pfp_ita2_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the unique identifier for our context, a regular file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the perfmonctl() syscall. The library does not know the type of the syscall + * anymore. + */ + for (i=0; i < ita2_outp.pfp_ita2_drange.rr_nbr_used; i++) { + dbrs[i].dbreg_num = ita2_outp.pfp_ita2_drange.rr_br[i].reg_num; + dbrs[i].dbreg_value = ita2_outp.pfp_ita2_drange.rr_br[i].reg_value; + } + + /* + * Program the data debug registers. + * + * IMPORTANT: programming the debug register MUST always be done before the PMCs + * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. + */ + if (perfmonctl(id, PFM_WRITE_DBRS, dbrs, ita2_outp.pfp_ita2_drange.rr_nbr_used) == -1) { + fatal_error( "child: perfmonctl error PFM_WRITE_DBRS errno %d\n",errno); + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("child: perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "child: perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + + for(i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for misaligned loads. + * But it can be two when the test_data and test_data_fake + * are allocated very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, N_LOOP); + + if (pd[i].reg_value != N_LOOP) { + printf("error: Result should be 1 for %s\n", name); + break; + } + } + /* + * let's stop this now + */ + close(id); + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita_btb.c b/src/libpfm-3.y/examples_ia64_v2.0/ita_btb.c new file mode 100644 index 0000000..406c56a --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita_btb.c @@ -0,0 +1,529 @@ +/* + * ita_btb.c - example of how use the BTB with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_default_smpl_hdr_t btb_hdr_t; +typedef pfm_default_smpl_entry_t btb_entry_t; +typedef pfm_default_smpl_ctx_arg_t btb_ctx_arg_t; +#define BTB_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = BTB_FMT_UUID; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +/* + * We use a small buffer size to exercise the overflow handler + */ +#define SMPL_BUF_NENTRIES 64 + +#define M_PMD(x) (1UL<<(x)) +#define BTB_REGS_MASK (M_PMD(8)|M_PMD(9)|M_PMD(10)|M_PMD(11)|M_PMD(12)|M_PMD(13)|M_PMD(14)|M_PMD(15)|M_PMD(16)) + +static void *smpl_vaddr; +static unsigned int entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return 0;} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop; + } + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf + +static int +show_btb_reg(int j, pfm_ita_pmd_reg_t reg) +{ + int ret; + int is_valid = reg.pmd8_15_ita_reg.btb_b == 0 && reg.pmd8_15_ita_reg.btb_mp == 0 ? 0 :1; + + ret = safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d valid=%c\n", + j, + reg.pmd_val, + reg.pmd8_15_ita_reg.btb_b, + reg.pmd8_15_ita_reg.btb_mp, + is_valid ? 'Y' : 'N'); + + if (!is_valid) return ret; + + if (reg.pmd8_15_ita_reg.btb_b) { + unsigned long addr; + + addr = reg.pmd8_15_ita_reg.btb_addr<<4; + addr |= reg.pmd8_15_ita_reg.btb_slot < 3 ? reg.pmd8_15_ita_reg.btb_slot : 0; + + ret = safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction: %s\n\n", + addr, + reg.pmd8_15_ita_reg.btb_slot < 3 ? 'Y' : 'N', + reg.pmd8_15_ita_reg.btb_mp ? "Failure" : "Success"); + } else { + ret = safe_printf("\t Target Address: 0x%016lx\n\n", + (unsigned long)reg.pmd8_15_ita_reg.btb_addr<<4); + } + return ret; +} + +static void +show_btb(pfm_ita_pmd_reg_t *btb, pfm_ita_pmd_reg_t *pmd16) +{ + int i, last; + + + i = (pmd16->pmd16_ita_reg.btbi_full) ? pmd16->pmd16_ita_reg.btbi_bbi : 0; + last = pmd16->pmd16_ita_reg.btbi_bbi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita_reg.btbi_bbi, pmd16->pmd16_ita_reg.btbi_full); + do { + show_btb_reg(i+8, btb[i]); + i = (i+1) % 8; + } while (i != last); +} + + +static void +process_smpl_buffer(void) +{ + btb_hdr_t *hdr; + btb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_ita_pmd_reg_t *reg, *pmd16; + unsigned long i; + int ret; + static unsigned long last_ovfl = ~0UL; + + + hdr = (btb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (btb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd8-pmd15 has the BTB. We have also + * included pmd16 (BTB index) has part of the registers to record. This trick + * allows us to get the index to decode the sequential order of the BTB. + * + * Recorded registers are always recorded in increasing order. So we know + * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. + */ + pmd16 = reg+8; + show_btb(reg, pmd16); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { + perror("PFM_RESTART"); + exit(1); + } +} + + +int +main(void) +{ + int ret; + int type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + btb_ctx_arg_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp, 0, sizeof(inp)); + memset(&outp, 0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interesteed in all the mispredicted branches, + * therefore we program we set the various fields of the BTB config to: + */ + ita_inp.pfp_ita_btb.btb_used = 1; + + ita_inp.pfp_ita_btb.btb_tar = 0x1; + ita_inp.pfp_ita_btb.btb_tm = 0x2; + ita_inp.pfp_ita_btb.btb_ptm = 0x3; + ita_inp.pfp_ita_btb.btb_tac = 0x1; + ita_inp.pfp_ita_btb.btb_bac = 0x1; + ita_inp.pfp_ita_btb.btb_ppm = 0x3; + ita_inp.pfp_ita_btb.btb_plm = PFM_PLM3; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event BRANCH_EVENT\n"); + } + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx[0].buf_arg.buf_size = 8192; + + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); + + smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; + + /* + * extract our file descriptor + */ + id = ctx[0].ctx_arg.ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * indicate we want notification when buffer is full + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; + + /* + * indicate PMD to collect in each sample + */ + pc[0].reg_smpl_pmds[0] = BTB_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our BTB regs + */ + entry_size = sizeof(btb_entry_t)+(hweight64(BTB_REGS_MASK)<<3); + + /* + * When our counter overflows, we want to BTB index to be reset, so that we keep + * in sync. This is required to make it possible to interpret pmd16 on overflow + * to avoid repeating the same branch several times. + */ + pc[0].reg_reset_pmds[0] = M_PMD(16); + + /* + * reset pmd16 (BTB index), short and long reset value are set to zero as well + * + * We use slot 1 of our pd[] array for this. + */ + pd[1].reg_num = 16; + pd[1].reg_value = 0UL; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + /* + * we use 2 = 1 for the branch_event + 1 for the reset of PMD16. + */ + if (perfmonctl(id, PFM_WRITE_PMDS, pd, 2) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + + process_smpl_buffer(); + + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita_dear.c b/src/libpfm-3.y/examples_ia64_v2.0/ita_dear.c new file mode 100644 index 0000000..c17fe26 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita_dear.c @@ -0,0 +1,454 @@ +/* + * ita_dear.c - example of how use the D-EAR with the Itanium PMU + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define EVENT_NAME "DATA_EAR_CACHE_LAT4" +#define SMPL_PERIOD (40) + +#define M_PMD(x) (1UL<<(x)) +#define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) + +typedef pfm_default_smpl_hdr_t dear_hdr_t; +typedef pfm_default_smpl_entry_t dear_entry_t; +typedef pfm_default_smpl_ctx_arg_t dear_ctx_t; +#define DEAR_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = DEAR_FMT_UUID; + + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita_pmd_reg_t*)(ent+1); + + safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); + + reg++; + + safe_printf("PMD3 : 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd3_ita_reg.dear_latency); + + reg++; + + safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd17_ita_reg.dear_vl ? 'Y': 'N', + (reg->pmd17_ita_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd17_ita_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART,NULL, 0) == -1) { + perror("PFM_RESTART"); + exit(1); + } +} + +int +main(void) +{ + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_event_t ev; + dear_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &ev) != PFMLIB_SUCCESS) { + fatal_error("cannot find event %s\n", EVENT_NAME); + } + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM0|PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * propagate the event descriptor + */ + inp.pfp_events[0] = ev; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_context_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_context_t + * with what is needed fot this format. + */ + + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx[0].ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx[0].buf_arg.buf_size = 4096; + + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the file descriptor we will use to + * identify this newly created context + */ + id = ctx[0].ctx_arg.ctx_fd; + + printf("Sampling buffer mapped at %p\n", ctx[0].ctx_arg.ctx_smpl_vaddr); + + smpl_vaddr = ctx[0].ctx_arg.ctx_smpl_vaddr; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * indicate we want notification when buffer is full + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * indicate which PMD to include in the sample + */ + pc[0].reg_smpl_pmds[0] = DEAR_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our DEAR regs + */ + entry_size = sizeof(dear_entry_t)+(hweight64(DEAR_REGS_MASK)<<3); + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD +1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD +1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(10000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita_irr.c b/src/libpfm-3.y/examples_ia64_v2.0/ita_irr.c new file mode 100644 index 0000000..b671e41 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita_irr.c @@ -0,0 +1,386 @@ +/* + * ita_irr.c - example of how to use code range restriction with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define VECTOR_SIZE 1000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired_hi", 0UL} , + { "fp_ops_retired_lo", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfmlib_ita_output_param_t ita_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t ibrs[8]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + unsigned long range_start, range_end; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + int ret, type = 0; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&ita_outp,0, sizeof(ita_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", p->event_name); + } + } + + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields of rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + */ + + ita_inp.pfp_ita_irange.rr_used = 1; /* indicate we use code range restriction */ + ita_inp.pfp_ita_irange.rr_limits[0].rr_start = range_start; + ita_inp.pfp_ita_irange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + ita_outp.pfp_ita_irange.rr_infos[0].rr_soff, + ita_outp.pfp_ita_irange.rr_infos[0].rr_eoff, + ita_outp.pfp_ita_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("cannot create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the perfmonctl() syscall. The library does not know the type of the syscall + * anymore. + */ + for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { + ibrs[i].dbreg_num = ita_outp.pfp_ita_irange.rr_br[i].reg_num; + ibrs[i].dbreg_value = ita_outp.pfp_ita_irange.rr_br[i].reg_value; + } + + /* + * Program the code debug registers. + * + * IMPORTANT: programming the debug register MUST always be done before the PMCs + * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. + */ + if (perfmonctl(id, PFM_WRITE_IBRS, ibrs, ita_outp.pfp_ita_irange.rr_nbr_used) == -1) { + fatal_error("perfmonctl error PFM_WRITE_IBRS errno %d\n",errno); + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, + event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita_opcode.c b/src/libpfm-3.y/examples_ia64_v2.0/ita_opcode.c new file mode 100644 index 0000000..8bac461 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita_opcode.c @@ -0,0 +1,282 @@ +/* + * ita_opcode.c - example of how to use the opcode matcher with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + int ret; + int type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&load_args,0, sizeof(load_args)); + + /* + * We indicate that we are using the PMC8 opcode matcher. This is required + * otherwise the library add PMC8 to the list of PMC to pogram during + * pfm_dispatch_events(). + */ + ita_inp.pfp_ita_pmc8.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]=4) and the btype field is 5 (which represents + * bits[6-8]) so it is included in the match/mask fields of PMC8. + * It is necessarily in a B slot. + * + * We don't care which operands are used with br.cloop therefore + * the mask field of pmc8 is set such that only the 4 bits of the + * opcode and 3 bits of btype must match exactly. This is accomplished by + * clearing the top 4 bits and bits [6-8] of the mask field and setting the + * remaining bits. Similarly, the match field only has the opcode value and btype + * set according to the encoding of br.cloop, the + * remaining bits are zero. Bit 60 of PMC8 is set to indicate + * that we look only in B slots (this is the only possibility for + * this instruction anyway). + * + * So the binary representation of the value for PMC8 is as follows: + * + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ---------------------------------------------------------------- + * 0001010000000000000000101000000000000011111111111111000111111000 + * + * which yields a value of 0x1400028003fff1f8. + * + * Depending on the level of optimization to compile this code, it may + * be that the count reported could be zero, if the compiler uses a br.cond + * instead of br.cloop. + */ + ita_inp.pfp_ita_pmc8.pmc_val = 0x1400028003fff1f8; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("Cannot find event IA64_TAGGED_INST_RETIRED_PMC8\n"); + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100UL); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s\n", + pd[0].reg_num, + pd[0].reg_value, + name); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/ita_rr.c b/src/libpfm-3.y/examples_ia64_v2.0/ita_rr.c new file mode 100644 index 0000000..1147279 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/ita_rr.c @@ -0,0 +1,414 @@ +/* + * ita_rr.c - example of how to use data range restriction with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define N_LOOP 100000000U + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + +#define TEST_DATA_COUNT 16 + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_PMU_NAME_LEN 32 +#define MAX_EVT_NAME_LEN 128 + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + + +static event_desc_t event_list[]={ + { "misaligned_loads_retired", N_LOOP }, + { "misaligned_stores_retired", N_LOOP }, + { NULL, 0UL} +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfmlib_ita_output_param_t ita_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t dbrs[8]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) { + fatal_error("cannot allocate test data structure"); + } + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) { + fatal_error("cannot allocate test data structure"); + } + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(dbrs,0, sizeof(dbrs)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&ita_outp,0, sizeof(ita_outp)); + + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", p->event_name); + } + } + + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out, the actual + * start and end offsets of the generated range by checking the rr_soff and rr_eoff fields + * in the pfmlib_ita_output_param_t structure when coming back from the library call. + * + * Upon return, the pfmlib_ita_output_param_t.pfp_ita_drange.rr_dbr array is programmed and + * the number of entries used to cover the range is in rr_nbr_used. + */ + + /* + * We indicate that we are using a Data Range Restriction feature. + * In this particular case this will cause, pfm_dispatch_events() to + * add pmc13 to the list of PMC registers to initialize and the + */ + + ita_inp.pfp_ita_drange.rr_used = 1; + ita_inp.pfp_ita_drange.rr_limits[0].rr_start = range_start; + ita_inp.pfp_ita_drange.rr_limits[0].rr_end = range_end; + + + /* + * use the library to find the monitors to use + * + * upon return, cnt contains the number of entries + * used in pc[]. + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + ita_outp.pfp_ita_drange.rr_nbr_used >> 1, + ita_outp.pfp_ita_drange.rr_infos[0].rr_soff, + ita_outp.pfp_ita_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("cannot create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the perfmonctl() syscall. The library does not know the type of the syscall + * anymore. + */ + for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { + dbrs[i].dbreg_num = ita_outp.pfp_ita_drange.rr_br[i].reg_num; + dbrs[i].dbreg_value = ita_outp.pfp_ita_drange.rr_br[i].reg_value; + } + + /* + * Program the data debug registers. + * + * IMPORTANT: programming the debug register MUST always be done before the PMCs + * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. + */ + if (perfmonctl(id, PFM_WRITE_DBRS, dbrs, ita_outp.pfp_ita_drange.rr_nbr_used) == -1) { + fatal_error("perfmonctl error PFM_WRITE_DBRS errno %d\n",errno); + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + for (i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for both misaligned loads + * and misaligned stores. But it can be two when the test_data and test_data_fake + * are allocate very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + + if (pd[i].reg_value != event_list[i].expected_value) { + printf("error: Result should be %lu for %s\n", event_list[i].expected_value, name); + break; + } + } + /* + * let's stop this now + */ + close(id); + + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/mont_dear.c b/src/libpfm-3.y/examples_ia64_v2.0/mont_dear.c new file mode 100644 index 0000000..d197999 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/mont_dear.c @@ -0,0 +1,446 @@ +/* + * mont_dear.c - example of how use the D-EAR with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define SMPL_PERIOD (40) + +#define EVENT_NAME "data_ear_cache_lat4" + +#define M_PMD(x) (1UL<<(x)) +#define DEAR_REGS_MASK (M_PMD(32)|M_PMD(33)|M_PMD(36)) + +typedef pfm_default_smpl_hdr_t dear_hdr_t; +typedef pfm_default_smpl_entry_t dear_entry_t; +typedef pfm_default_smpl_ctx_arg_t dear_ctx_t; +#define DEAR_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = DEAR_FMT_UUID; + + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("line = %d No memory available!\n", __LINE__); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + count = hdr->hdr_count; + + /* + * walk through all the entries recored in the buffer + */ + while(count--) { + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_mont_pmd_reg_t*)(ent+1); + + safe_printf("PMD32: 0x%016lx\n", reg->pmd32_mont_reg.dear_daddr); + + reg++; + + safe_printf("PMD33: 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd33_mont_reg.dear_latency); + + reg++; + + safe_printf("PMD36: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd36_mont_reg.dear_vl ? 'Y': 'N', + (reg->pmd36_mont_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd36_mont_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART, NULL, 0)) + fatal_error("pfm_restart"); +} + +int +main(void) +{ + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_event_t ev; + dear_ctx_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &ev) != PFMLIB_SUCCESS) + fatal_error("cannot find event %s\n", EVENT_NAME); + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * propagate the event descriptor + */ + inp.pfp_events[0] = ev; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_ctx_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_ctx_t + * with what is needed fot this format. + */ + + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx.buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the file descriptor we will use to + * identify this newly created context + */ + id = ctx.ctx_arg.ctx_fd; + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr =ctx.ctx_arg.ctx_smpl_vaddr; + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * indicate we want notification when buffer is full + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * indicate which PMD to include in the sample + */ + pc[0].reg_smpl_pmds[0] = DEAR_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our DEAR regs + */ + entry_size = sizeof(dear_entry_t)+(hweight64(DEAR_REGS_MASK)<<3); + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/mont_etb.c b/src/libpfm-3.y/examples_ia64_v2.0/mont_etb.c new file mode 100644 index 0000000..bb692f8 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/mont_etb.c @@ -0,0 +1,528 @@ +/* + * mont_btb.c - example of how use the BTB with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_default_smpl_hdr_t etb_hdr_t; +typedef pfm_default_smpl_entry_t etb_entry_t; +typedef pfm_default_smpl_ctx_arg_t etb_ctx_arg_t; +#define BTB_FMT_UUID PFM_DEFAULT_SMPL_UUID + +static pfm_uuid_t buf_fmt_id = BTB_FMT_UUID; + + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +/* + * We use a small buffer size to exercise the overflow handler + */ +#define SMPL_BUF_NENTRIES 64 + +#define M_PMD(x) (1UL<<(x)) + +#define ETB_REGS_MASK (M_PMD(38)| M_PMD(39)| \ + M_PMD(48)|M_PMD(49)|M_PMD(50)|M_PMD(51)|M_PMD(52)|M_PMD(53)|M_PMD(54)|M_PMD(55)|\ + M_PMD(56)|M_PMD(57)|M_PMD(58)|M_PMD(59)|M_PMD(60)|M_PMD(61)|M_PMD(62)|M_PMD(63)) +static void *smpl_vaddr; +static size_t entry_size; +static int id; + +#if defined(__ECC) && defined(__INTEL_COMPILER) +/* if you do not have this file, your compiler is too old */ +#include + +#define hweight64(x) _m64_popcnt(x) + +#elif defined(__GNUC__) + +static __inline__ int +hweight64 (unsigned long x) +{ + unsigned long result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (x)); + return (int)result; +} + +#else +#error "you need to provide inline assembly from your compiler" +#endif + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return random();} +long func2(void) { return random();} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop + func2(); + } + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf + +static void +show_etb_reg(int j, pfm_mont_pmd_reg_t reg, pfm_mont_pmd_reg_t pmd39) +{ + unsigned long bruflush, b1, etb_ext; + unsigned long addr; + int is_valid; + + is_valid = reg.pmd48_63_etb_mont_reg.etb_s == 0 && reg.pmd48_63_etb_mont_reg.etb_mp == 0 ? 0 : 1; + + /* + * the joy of the ETB extension register layout! + */ + if (j < 8) + etb_ext = (pmd39.pmd_val>>(8*j)) & 0xf; + else + etb_ext = (pmd39.pmd_val>>(4+8*(j-1))) & 0xf; + + b1 = etb_ext & 0x1; + bruflush = (etb_ext >> 1) & 0x1; + + safe_printf("\tPMD%-2d: 0x%016lx s=%d mp=%d bru=%ld b1=%ld valid=%c\n", + j+48, + reg.pmd_val, + reg.pmd48_63_etb_mont_reg.etb_s, + reg.pmd48_63_etb_mont_reg.etb_mp, + bruflush, b1, + is_valid ? 'Y' : 'N'); + + + if (!is_valid) return; + + if (reg.pmd48_63_etb_mont_reg.etb_s) { + addr = (reg.pmd48_63_etb_mont_reg.etb_addr+b1)<<4; + addr |= reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? reg.pmd48_63_etb_mont_reg.etb_slot : 0; + + safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction:%s\n\n", + addr, + reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? 'Y' : 'N', + reg.pmd48_63_etb_mont_reg.etb_mp ? "FE Failure" : + bruflush ? "BE Failure" : "Success"); + } else { + safe_printf("\t Target Address:0x%016lx\n\n", + (unsigned long)(reg.pmd48_63_etb_mont_reg.etb_addr<<4)); + } +} + +static void +show_etb(pfm_mont_pmd_reg_t *etb) +{ + int i, last; + pfm_mont_pmd_reg_t pmd38, pmd39; + + pmd38.pmd_val = etb[0].pmd_val; + pmd39.pmd_val = etb[1].pmd_val; + + i = pmd38.pmd38_mont_reg.etbi_full ? pmd38.pmd38_mont_reg.etbi_ebi : 0; + last = pmd38.pmd38_mont_reg.etbi_ebi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", + i, + last, + pmd38.pmd38_mont_reg.etbi_ebi, + pmd38.pmd38_mont_reg.etbi_full); + + do { + show_etb_reg(i, etb[i], pmd39); + i = (i+1) % 16; + } while (i != last); +} + +void +process_smpl_buffer(void) +{ + etb_hdr_t *hdr; + etb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_mont_pmd_reg_t *reg; + size_t count; + static unsigned long last_ovfl = ~0UL; + + + hdr = (etb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + count = hdr->hdr_count; + /* + * walk through all the entries recored in the buffer + */ + while(count--) { + + ent = (etb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_mont_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd48-pmd63 has the ETB. We have also + * included pmd38/pmd39 (ETB index and extenseion) has part of the registers + * to record. This trick allows us to get the index to decode the sequential + * order of the BTB. + * + * Recorded registers are always recorded in increasing index order. So we know + * that where to find pmd38/pmd39. + */ + show_etb(reg); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (perfmonctl(id, PFM_RESTART, NULL, 0)) + fatal_error("pfm_restart errno %d\n", errno); +} + + +int +main(void) +{ + int ret; + int type = 0; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + etb_ctx_arg_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interested in all taken + * branches * therefore we program we set the various fields to: + */ + mont_inp.pfp_mont_etb.etb_used = 1; + + mont_inp.pfp_mont_etb.etb_tm = 0x2; + mont_inp.pfp_mont_etb.etb_ptm = 0x3; + mont_inp.pfp_mont_etb.etb_ppm = 0x3; + mont_inp.pfp_mont_etb.etb_brt = 0x0; + mont_inp.pfp_mont_etb.etb_plm = PFM_PLM3; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event BRANCH_EVENT\n"); + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx.buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx.ctx_arg.ctx_fd; + + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = ctx.ctx_arg.ctx_smpl_vaddr; + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + * PMD38 returned as used PMD by libpfm, will be reset + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * indicate we want notification when buffer is full and randomization + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * indicate PMD to collect in each sample (good up to PMD63) + */ + pc[0].reg_smpl_pmds[0] = ETB_REGS_MASK; + + /* + * compute size of each sample: fixed-size header + all our BTB regs + */ + entry_size = sizeof(etb_entry_t)+(hweight64(ETB_REGS_MASK)<<3); + + /* + * When our counter overflows, we want to ETB index to be reset, so that we keep + * in sync. + */ + pc[0].reg_reset_pmds[0] = M_PMD(38); + + /* + * Now program the registers + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + /* + * we use 2 registers = 1 for the branch_event + 1 to reset PMD38 + */ + if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(1000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + process_smpl_buffer(); + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/mont_irr.c b/src/libpfm-3.y/examples_ia64_v2.0/mont_irr.c new file mode 100644 index 0000000..01d197c --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/mont_irr.c @@ -0,0 +1,362 @@ +/* + * mont_irr.c - example of how to use code range restriction with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define VECTOR_SIZE 1000000UL + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfmlib_mont_output_param_t mont_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t ibrs[8]; + pfarg_context_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + memset(pc, 0, sizeof(pc)); + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + memset(&mont_outp,0, sizeof(mont_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("cannot find %s event\n", p->event_name); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + * In the case of code range restriction on Itanium 2, the library will try to use the fine + * mode first and then it will default to using multiple pairs to cover the range. + */ + + mont_inp.pfp_mont_irange.rr_used = 1; /* indicate we use code range restriction */ + mont_inp.pfp_mont_irange.rr_limits[0].rr_start = range_start; + mont_inp.pfp_mont_irange.rr_limits[0].rr_end = range_end; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + mont_outp.pfp_mont_irange.rr_infos[0].rr_soff, + mont_outp.pfp_mont_irange.rr_infos[0].rr_eoff, + mont_outp.pfp_mont_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract our file descriptor + */ + id = ctx.ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * propagate IBR settings. IBRS are mapped to PMC256-PMC263 + */ + for (i=0; i < mont_outp.pfp_mont_irange.rr_nbr_used; i++) { + ibrs[i].dbreg_num = mont_outp.pfp_mont_irange.rr_br[i].reg_num; + ibrs[i].dbreg_value = mont_outp.pfp_mont_irange.rr_br[i].reg_value; + } + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_IBRS, ibrs, mont_outp.pfp_mont_irange.rr_nbr_used) == -1) + fatal_error("child: perfmonctl error PFM_WRITE_IBRS errno %d\n",errno); + + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count) == -1) + fatal_error("child: pfm_write_pmds error errno %d\n",errno); + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "pfm_read_pmds error errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/mont_opcode.c b/src/libpfm-3.y/examples_ia64_v2.0/mont_opcode.c new file mode 100644 index 0000000..f9fff3a --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/mont_opcode.c @@ -0,0 +1,268 @@ +/* + * mont_opcode.c - example of how to use the opcode matcher with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define OPCM_EVENT "IA64_TAGGED_INST_RETIRED_IBRP0_PMC32_33" + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define NLOOP 200UL + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_context_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + int ret; + int type = 0; + int id; + unsigned int i; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + + /* + * We indicate that we are using the first opcode matcher (PMC32/PMC33). + */ + mont_inp.pfp_mont_opcm1.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]) is 4 and the btype field (bits[6-8]) is 5. + * We ignore all the other fields in the opcode. + * + * On Montecito, the opcode matcher covers the full 41 bits of each + * instruction but we'll ignore them in this example. Hence the + * match value is: + * + * match = (4<<37)| (5<<6) = 0x8000000140 + * + * On Montecito, the match field covers the full 41 bits of each instruction. + * But for this example, we only care about the major and btype field, + * and we ignore all other bits. When a bit is set in the mask it means + * that the corresponding match bit value is a "don't care". A bit + * with value of zero indicates that the corresponding match bit + * must match. Hence we build the following mask: + * + * mask = ~((0xf<<37) | (0x3<<6)) = 0x1fffffff3f; + * + * The 0xf comes from the fact that major opcode is 4-bit wide. + * The 0x3 comes from the fact that btype is 3-bit wide. + */ + mont_inp.pfp_mont_opcm1.opcm_b = 1; + mont_inp.pfp_mont_opcm1.opcm_match = 0x8000000140; + mont_inp.pfp_mont_opcm1.opcm_mask = 0x1fffffff3f; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(OPCM_EVENT, &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event %s\n", OPCM_EVENT); + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the unique identifier for our context, a regular file descriptor + */ + id = ctx.ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(id, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(NLOOP); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count)) + fatal_error("pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[0].reg_num, + pd[0].reg_value, + name, NLOOP); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/mont_rr.c b/src/libpfm-3.y/examples_ia64_v2.0/mont_rr.c new file mode 100644 index 0000000..fdf0ce0 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/mont_rr.c @@ -0,0 +1,387 @@ +/* + * mont_rr.c - example of how to use data range restriction with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if defined(__ECC) && defined(__INTEL_COMPILER) +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + + + +#define TEST_DATA_COUNT 16 +#define N_LOOP 100000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * here we capture only misaligned_loads because it cannot + * be measured with misaligned_stores_retired at the same time + */ +static char *event_list[]={ + "misaligned_loads_retired", + NULL +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + char **p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfmlib_mont_output_param_t mont_outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_dbreg_t dbrs[8]; + pfarg_context_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id, num_pmcs = 0; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) + fatal_error("cannot allocate test data structure"); + + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) + fatal_error("cannot allocate test data structure"); + + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + memset(&mont_outp,0, sizeof(mont_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields in rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + */ + + mont_inp.pfp_mont_drange.rr_used = 1; + mont_inp.pfp_mont_drange.rr_limits[0].rr_start = range_start; + mont_inp.pfp_mont_drange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + mont_outp.pfp_mont_drange.rr_nbr_used >> 1, + mont_outp.pfp_mont_drange.rr_infos[0].rr_soff, + mont_outp.pfp_mont_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the unique identifier for our context, a regular file descriptor + */ + id = ctx.ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++, num_pmcs++) { + pc[num_pmcs].reg_num = outp.pfp_pmcs[i].reg_num; + pc[num_pmcs].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * propagate the setup for the data debug registers. DBRS are mapped + * at PMC264-PMC271 + */ + memset(dbrs, 0, sizeof(dbrs)); + for (i=0; i < mont_outp.pfp_mont_drange.rr_nbr_used; i++) { + dbrs[i].dbreg_num = mont_outp.pfp_mont_drange.rr_br[i].reg_num; + dbrs[i].dbreg_value = mont_outp.pfp_mont_drange.rr_br[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + + if (perfmonctl(id, PFM_WRITE_DBRS, dbrs, mont_outp.pfp_mont_drange.rr_nbr_used) == -1) { + fatal_error( "child: perfmonctl error PFM_WRITE_DBRS errno %d\n",errno); + } + + if (perfmonctl(id, PFM_WRITE_PMCS, pc, num_pmcs)) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + if (perfmonctl(id, PFM_WRITE_PMDS, pd, outp.pfp_pmd_count)) + fatal_error( "child: pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(id, PFM_LOAD_CONTEXT, &load_args, 1)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + + for(i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (perfmonctl(id, PFM_READ_PMDS, pd, inp.pfp_event_count)) + fatal_error("pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for misaligned loads. + * But it can be two when the test_data and test_data_fake + * are allocated very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, N_LOOP); + + if (pd[i].reg_value != N_LOOP) { + printf("error: Result should be 1 for %s\n", name); + break; + } + } + /* + * let's stop this now + */ + close(id); + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/multiplex.c b/src/libpfm-3.y/examples_ia64_v2.0/multiplex.c new file mode 100644 index 0000000..9198423 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/multiplex.c @@ -0,0 +1,711 @@ +/* + * multiplex.c - example of user-level event multiplexing + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This file is part of pfmon, a sample tool to measure performance + * of applications on Linux/ia64. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ + +#ifndef _GNU_SOURCE + #define _GNU_SOURCE /* for getline */ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define MULTIPLEX_VERSION "0.1" + +#define MIN_FULL_PERIODS 100 + +#define SMPL_FREQ_IN_HZ 100 + +#define NUM_PMCS PMU_MAX_PMCS +#define NUM_PMDS PMU_MAX_PMDS + +#define MAX_NUM_COUNTERS 32 +#define MAX_PMU_NAME_LEN 32 +typedef struct { + struct { + int opt_plm; /* which privilege level to monitor (more than one possible) */ + int opt_debug; /* print debug information */ + int opt_verbose; /* verbose output */ + int opt_us_format; /* print large numbers with comma for thousands */ + } program_opt_flags; + + unsigned long max_counters; /* maximum number of counter for the platform */ + unsigned long smpl_freq; + unsigned long smpl_period; + + unsigned long cpu_mhz; + unsigned long full_periods; +} program_options_t; + +#define opt_plm program_opt_flags.opt_plm +#define opt_debug program_opt_flags.opt_debug +#define opt_verbose program_opt_flags.opt_verbose +#define opt_us_format program_opt_flags.opt_us_format + +typedef struct { + char *event_names[MAX_NUM_COUNTERS]; + pfmlib_input_param_t pfm_inp; + pfmlib_output_param_t pfm_outp; + pfarg_reg_t pmcs[MAX_NUM_COUNTERS]; + pfarg_reg_t pmds[MAX_NUM_COUNTERS]; + unsigned long values[MAX_NUM_COUNTERS]; + unsigned long n_runs; + unsigned int n_counters; + unsigned int n_pmcs; +} event_set_t; + +typedef int pfm_ctxid_t; + +static pfm_ctxid_t ctxid; +static int current_set; +static program_options_t options; + +/* + * NO MORE THAN MAX_COUNTERS-1 (3) EVENTS PER SET + */ +static event_set_t events[]={ + { {"BACK_END_BUBBLE_ALL","BACK_END_BUBBLE_L1D_FPU_RSE","BE_EXE_BUBBLE_ALL", },}, + { {"BACK_END_BUBBLE_FE", "BACK_END_BUBBLE_L1D_FPU_RSE", "BE_RSE_BUBBLE_ALL",},}, + { {"BE_L1D_FPU_BUBBLE_ALL", "BE_L1D_FPU_BUBBLE_L1D", "BE_EXE_BUBBLE_FRALL",},}, + { {"BE_EXE_BUBBLE_GRALL", "BE_EXE_BUBBLE_GRGR", },}, + { {"NOPS_RETIRED", "CPU_CYCLES", },} +}; +#define N_SETS (sizeof(events)/sizeof(event_set_t)) + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +vbprintf(char *fmt, ...) +{ + va_list ap; + + if (options.opt_verbose == 0) return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +static unsigned long +get_cpu_speed(void) +{ + FILE *fp1; + unsigned long f = 0; + char buffer[128], *p, *value; + + memset(buffer, 0, sizeof(buffer)); + + fp1 = fopen("/proc/cpuinfo", "r"); + if (fp1 == NULL) return 0; + + for (;;) { + buffer[0] = '\0'; + + p = fgets(buffer, 127, fp1); + if (p == NULL) goto end; + + /* skip blank lines */ + if (*p == '\n') continue; + + p = strchr(buffer, ':'); + if (p == NULL) goto end; + + /* + * p+2: +1 = space, +2= firt character + * strlen()-1 gets rid of \n + */ + *p = '\0'; + value = p+2; + + value[strlen(value)-1] = '\0'; + + if (!strncmp("cpu MHz", buffer, 7)) { + sscanf(value, "%lu", &f); + goto end; + } + } +end: + fclose(fp1); + return f; +} + + +static void +update_set(pfm_ctxid_t ctxid, int set_idx) +{ + event_set_t *cset = events + set_idx; + int count; + int ret; + int i; + + + /* + * we do not read the last counter (cpu_cycles) to avoid overwriting + * the reg_value field which will be used for next round + * + * We need to retry the read in case we get EBUSY because it means that + * the child task context is not yet available from inspection by PFM_READ_PMDS. + * + */ + count = cset->n_counters - 1; + + ret = perfmonctl(ctxid, PFM_READ_PMDS, cset->pmds, count); + if (ret == -1) { + fatal_error("update_set reading set %d: %s\n", set_idx, strerror(errno)); + } + + /* update counts for this set */ + for (i=0; i < count; i++) { + cset->values[i] += cset->pmds[i].reg_value; + cset->pmds[i].reg_value = 0UL; /* reset for next round */ + } +} + + +#if 0 +static void +update_last_set(pfm_ctxid_t ctxid, int set_idx) +{ + event_set_t *cset = events + set_idx; + unsigned long cycles; + int i; + + /* + * this time we read ALL the counters (including CPU_CYCLES) because we + * need it to scale the last period + */ + if (perfmonctl(ctxid, PFM_READ_PMDS, cset->pmds, cset->n_counters) == -1) { + fatal_error("update_last_set reading set %d\n", set_idx); + } + + cycles = ~0UL - cset->pmds[cset->n_counters-1].reg_value; + + printf("last period = %4.1f%% of full period\n", (cycles*100.0)/options.smpl_period); + + /* this time we scale the value to the length of this last period */ + for (i=0; i < cset->n_counters-1; i++) { + cset->values[i] += (cset->pmds[i].reg_value*cycles)/options.smpl_period; + } +} +#endif + +int +child(char **arg) +{ + ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + execvp(arg[0], arg); + /* not reached */ + + exit(1); +} + +static void +dec2sep(char *str2, char *str, char sep) +{ + int i, l, b, j, c=0; + + l = strlen(str2); + if (l <= 3) { + strcpy(str, str2); + return; + } + b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */ + for(i=l, j=0; i >= 0; i--, j++) { + if (j) c++; + str[b-j] = str2[i]; + if (c == 3) { + str[b-++j] = sep; + c = 0; + } + } +} + +static void +print_results(void) +{ + unsigned int i, j; + event_set_t *e; + char tmp1[32], tmp2[32]; + char mtotal_str[32], *mtotal; + char stotal_str[32], *stotal; + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + printf("%lu Hz period = %lu cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz); + printf("%lu full periods\n", options.full_periods); + printf("%lu event sets\n", N_SETS); + printf("set measured total #runs scaled total event name\n"); + printf("-------------------------------------------------------------------\n"); + + for (i=0; i < N_SETS; i++) { + e = events + i; + for(j=0; j < e->n_counters-1; j++) { + + sprintf(tmp1, "%"PRIu64, e->values[j]); + + if (options.opt_us_format) { + dec2sep(tmp1, mtotal_str, ','); + mtotal = mtotal_str; + } else { + mtotal = tmp1; + } + sprintf(tmp2, "%"PRIu64, (e->values[j]*options.full_periods)/e->n_runs); /* stupid scaling */ + + if (options.opt_us_format) { + dec2sep(tmp2, stotal_str, ','); + stotal = stotal_str; + } else { + stotal = tmp2; + } + + printf("%03d: %20s %8"PRIu64" %20s %s\n", + i, + mtotal, + e->n_runs, + stotal, + e->event_names[j]); + } + } +} + +static void +switch_sets(void) +{ + event_set_t *cset; + + update_set(ctxid, current_set); + current_set = (current_set+1) % N_SETS; + + + cset = events+current_set; + cset->n_runs++; + + vbprintf("starting run %lu for set %d n_pmcs=%d pmd=%"PRIu64"\n", + cset->n_runs, current_set, cset->n_pmcs, + cset->pmds[cset->n_counters-1].reg_value); + + /* + * if one set as less events than another one, the left-over events will continue + * to count for nothing. That's fine because we will restore their values when + * the correspinding set is reloaded + */ + if (perfmonctl(ctxid, PFM_WRITE_PMCS, cset->pmcs, cset->n_pmcs) == -1) { + fatal_error("overflow handler writing pmcs set %d : %d\n", current_set, errno); + } + + if (perfmonctl(ctxid, PFM_WRITE_PMDS, cset->pmds, cset->n_counters) == -1) { + fatal_error("overflow handler writing pmds set %d\n", current_set); + } + + options.full_periods++; + + if (perfmonctl(ctxid, PFM_RESTART,NULL, 0) == -1) { + perror("PFM_RESTART"); + exit(1); + } + +} + +int +parent(char **arg) +{ + event_set_t *e; + pfarg_context_t ctx[1]; + pfarg_load_t load_arg; + event_set_t *cset; + pfm_msg_t msg; + struct pollfd ctx_pollfd; + pfmlib_regmask_t impl_counters, used_pmcs; + pfmlib_event_t cycle_event; + unsigned int i, j, k, l,idx; + int r, status, ret; + unsigned int max_counters, allowed_counters; + pid_t pid; + + pfm_get_num_counters(&max_counters); + if (max_counters < 2) + fatal_error("not enough counter to do anything meaningful\n"); + + allowed_counters = max_counters-1; /* reserve one slot for our sampling period */ + + memset(&used_pmcs, 0, sizeof(used_pmcs)); + memset(&impl_counters, 0, sizeof(impl_counters)); + + pfm_get_impl_counters(&impl_counters); + + memset(ctx, 0, sizeof(ctx)); + memset(&load_arg, 0, sizeof(load_arg)); + + if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find cycle event\n"); + } + + options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq; + + vbprintf("%lu Hz period = %lu cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz); + + for (i=0; i < N_SETS; i++) { + + e = events+i; + + memset(&e->pfm_inp,0, sizeof(pfmlib_input_param_t)); + memset(&e->pfm_outp,0, sizeof(pfmlib_output_param_t)); + + for(j=0; e->event_names[j] && j < allowed_counters; j++) { + + if (pfm_find_event(e->event_names[j], &idx) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", e->event_names[j]); + } + e->pfm_inp.pfp_events[j].event = idx; + } + + if (e->event_names[j]) { + fatal_error("cannot have more than %d events per set (CPU_CYCLES uses 1 slot)\n", allowed_counters); + } + e->pfm_inp.pfp_events[j] = cycle_event; + e->pfm_inp.pfp_event_count = j+1; + e->pfm_inp.pfp_dfl_plm = options.opt_plm; + + e->n_pmcs = j+1; /* used pmcs +1=sampling period */ + e->n_counters = j+1; /* used pmd/pmc counter pairs +1=sampling period */ + + vbprintf("PMU programming for set %d\n", i); + + if ((ret=pfm_dispatch_events(&e->pfm_inp, NULL, &e->pfm_outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret)); + } + /* + * propagate from libpfm to kernel data structures + */ + for (j=0; j < e->n_counters; j++) { + e->pmcs[j].reg_num = e->pfm_outp.pfp_pmcs[j].reg_num; + e->pmcs[j].reg_value = e->pfm_outp.pfp_pmcs[j].reg_value; + + e->pmds[j].reg_num = e->pmcs[j].reg_num; + + pfm_regmask_set(&used_pmcs, e->pmcs[j].reg_num); + } + + /* last counter contains our sampling counter */ + e->pmcs[j-1].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + e->pmds[j-1].reg_value = (~0) - options.smpl_period + 1; + e->pmds[j-1].reg_short_reset = (~0) - options.smpl_period + 1; + e->pmds[j-1].reg_long_reset = (~0) - options.smpl_period + 1; + + for (j=0; j < e->n_counters-1; j++) { + vbprintf("[pmd[%u]=0x%"PRIx64"/0x%"PRIx64"/0x%"PRIx64"]\n", + e->pmds[j].reg_num, + e->pmds[j].reg_value, + e->pmds[j].reg_short_reset, + e->pmds[j].reg_long_reset); + } + vbprintf("[pmd[%u]=0x%"PRIx64"/0x%"PRIx64"/0x%"PRIx64"]\n", + e->pmds[j].reg_num, + e->pmds[j].reg_value, + e->pmds[j].reg_short_reset, + e->pmds[j].reg_long_reset); + + /* + * we blank the unused pmcs to make sure every set uses all the counters, i.e., + * cannot overflow due to some previous sampling periods that uses a counter + * beyond the number used by the current set + */ + for(j=0, k=e->n_pmcs, l=0; l < max_counters; j++) { + if (pfm_regmask_isset(&impl_counters, j) == 0) continue; + l++; + if (pfm_regmask_isset(&used_pmcs, j)) continue; + e->pmcs[k].reg_num = j; + e->pmcs[k].reg_value = 0UL; + k++; + } + e->n_pmcs= k; + } + /* + * point to first set of counters + */ + current_set = 0; + + /* + * we block on counter overflow + */ + ctx[0].ctx_flags = PFM_FL_NOTIFY_BLOCK; + + /* + * attach the context to the task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * extract context id + */ + ctxid = ctx[0].ctx_fd; + + /* + * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. + */ + if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) + fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); + + ctx_pollfd.fd = ctxid; + ctx_pollfd.events = POLLIN; + + cset = events + current_set; + cset->n_runs++; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(ctxid, PFM_WRITE_PMCS, cset->pmcs, cset->n_pmcs) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + /* + * initialize the PMDs + */ + if (perfmonctl(ctxid, PFM_WRITE_PMDS, cset->pmds, cset->n_counters) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now launch the child code + */ + if ((pid= fork()) == -1) fatal_error("Cannot fork process\n"); + if (pid == 0) exit(child(arg)); + + /* + * wait for the child to exec + */ + r = waitpid(pid, &status, WUNTRACED); + + if (r < 0 || WIFEXITED(status)) + fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); + + vbprintf("child created and stopped\n"); + + /* + * the child is stopped, load context + */ + load_arg.load_pid = pid; + if (perfmonctl(ctxid, PFM_LOAD_CONTEXT, &load_arg, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * make sure monitoring will be activated when the execution is resumed + */ + if (perfmonctl(ctxid, PFM_START, NULL, 0) == -1) { + fatal_error("perfmonctl error PFM_START errno %d\n",errno); + } + + /* + * resume execution + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + + /* + * mainloop + */ + for(;;) { + ret = read(ctxid, &msg, sizeof(msg)); + if (ret < 0) break; + switch(msg.type) { + case PFM_MSG_OVFL: + switch_sets(); + break; + case PFM_MSG_END: + goto finish_line; + default: printf("unknown message type %d\n", msg.type); + } + } +finish_line: + + if (options.full_periods < MIN_FULL_PERIODS) { + fatal_error("Not enough periods (%lu) to print results\n", options.full_periods); + } + + //update_last_set(pid, current_set); + + waitpid(pid, &status, 0); + + print_results(); + + if (ctxid) close(ctxid); + + return 0; +} + + + +static struct option multiplex_options[]={ + { "help", 0, 0, 1}, + { "freq", 1, 0, 2 }, + { "kernel-level", 0, 0, 3 }, + { "user-level", 0, 0, 4 }, + { "version", 0, 0, 5 }, + + { "verbose", 0, &options.opt_verbose, 1 }, + { "debug", 0, &options.opt_debug, 1 }, + { "us-counter-format", 0, &options.opt_us_format, 1}, + { 0, 0, 0, 0} +}; + +static void +print_usage(char **argv) +{ + printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]); + + printf( "-h, --help\t\t\t\tdisplay this help and exit\n" + "-V, --version\t\t\t\toutput version information and exit\n" + "-u, --user-level\t\t\tmonitor at the user level for all events\n" + "-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n" + "-c, --us-counter-format\tprint large counts with comma for thousands\n" + "--freq=number\t\t\t\tset sampling frequency in Hz\n" + "--verbose\t\t\t\tprint more information during execution\n" + ); +} + + +int +main(int argc, char **argv) +{ + char *endptr = NULL; + pfmlib_options_t pfmlib_options; + int c, type; + + + while ((c=getopt_long(argc, argv,"+vhkuVc", multiplex_options, 0)) != -1) { + switch(c) { + case 0: continue; /* fast path for options */ + + case 1: + print_usage(argv); + exit(0); + + case 'v': options.opt_verbose = 1; + break; + case 'c': + options.opt_us_format = 1; + break; + case 2: + case 'V': + if (options.smpl_freq) fatal_error("sampling frequency set twice\n"); + options.smpl_freq = strtoul(optarg, &endptr, 10); + if (*endptr != '\0') + fatal_error("invalid freqyency: %s\n", optarg); + break; + case 3: + case 'k': + options.opt_plm |= PFM_PLM0; + break; + case 4: + case 'u': + options.opt_plm |= PFM_PLM3; + break; + case 5: + printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n" + "Copyright (C) 2002 Hewlett-Packard Company\n"); + exit(0); + default: + fatal_error(""); /* just quit silently now */ + } + } + + if (optind == argc) fatal_error("you need to specify a command to measure\n"); + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("can't initialize library\n"); + } + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + if ((options.cpu_mhz = get_cpu_speed()) == 0) { + fatal_error("can't get CPU speed\n"); + } + if (options.smpl_freq == 0UL) options.smpl_freq = SMPL_FREQ_IN_HZ; + if (options.opt_plm == 0) options.opt_plm = PFM_PLM3; + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + return parent(argv+optind); +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/notify_self.c b/src/libpfm-3.y/examples_ia64_v2.0/notify_self.c new file mode 100644 index 0000000..790de12 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/notify_self.c @@ -0,0 +1,308 @@ +/* + * notify_self.c - example of how you can use overflow notifications + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#define SMPL_PERIOD 1000000000UL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_reg_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + pfm_msg_t msg; + int fd = ctx_fd; + int r; + + if (fd != ctx_fd) { + fatal_error("handler does not get valid file descriptor\n"); + } + + if (event1_name && perfmonctl(fd, PFM_READ_PMDS, pd+1, 1) == -1) { + fatal_error("PFM_READ_PMDS: %s", strerror(errno)); + } + + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) { + fatal_error("unexpected msg type: %d\n",msg.type); + } + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %lu\n", notification_received); + + /* + * At this point, the counter used for the sampling period has already + * be reset by the kernel because we are in non-blocking mode, self-monitoring. + */ + + /* + * increment our notification counter + */ + notification_received++; + + /* + * And resume monitoring + */ + if (perfmonctl(fd, PFM_RESTART,NULL, 0) == -1) { + fatal_error("PFM_RESTART: %s", strerror(errno)); + } +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 3;) ; +} + + +int +main(int argc, char **argv) +{ + pfarg_context_t ctx[1]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)sigio_handler; + sigaction (SIGIO, &act, 0); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + pfm_get_max_event_name_len(&len); + + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + ctx_fd = ctx->ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We use pfp_pmc_count to determine the number of registers to + * setup. Note that this field can be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + pc[0].reg_reset_pmds[0] |= 1UL << outp.pfp_pmcs[1].reg_num; + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD + 1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than counting monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/notify_self2.c b/src/libpfm-3.y/examples_ia64_v2.0/notify_self2.c new file mode 100644 index 0000000..a95b19f --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/notify_self2.c @@ -0,0 +1,326 @@ +/* + * notify_self2.c - example of how you can use overflow notifications with F_SETSIG + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define SMPL_PERIOD 1000000000UL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_reg_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +static void +sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + pfm_msg_t msg; + int fd = info->si_fd; + int r; + + if (fd != ctx_fd) { + fatal_error("handler does not get valid file descriptor\n"); + } + + if (perfmonctl(fd, PFM_READ_PMDS, pd+1, 1) == -1) { + fatal_error("PFM_READ_PMDS: %s", strerror(errno)); + } + + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) { + fatal_error("unexpected msg type: %d\n",msg.type); + } + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %lu\n", notification_received); + + /* + * At this point, the counter used for the sampling period has already + * be reset by the kernel because we are in non-blocking mode, self-monitoring. + */ + + /* + * increment our notification counter + */ + notification_received++; + + /* + * And resume monitoring + */ + if (perfmonctl(fd, PFM_RESTART,NULL, 0) == -1) { + fatal_error("PFM_RESTART: %s", strerror(errno)); + } +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 3;) ; +} + + +int +main(int argc, char **argv) +{ + pfarg_context_t ctx[1]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)sigio_handler; + sigaction (SIGIO, &act, 0); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + if (i > 1) { + pfm_get_max_event_name_len(&len); + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + ctx_fd = ctx->ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We use pfp_pmc_count to determine the number of registers to + * setup. Note that this field can be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + pc[0].reg_reset_pmds[0] |= 1UL << outp.pfp_pmcs[1].reg_num; + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD + 1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than counting monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } +#ifndef _GNU_SOURCE +#error "this program must be compiled with -D_GNU_SOURCE" +#else + /* + * when you explicitely declare that you want a particular signal, + * even with you use the default signal, the kernel will send more + * information concerning the event to the signal handler. + * + * In particular, it will send the file descriptor from which the + * event is originating which can be quite useful when monitoring + * multiple tasks from a single thread. + */ + ret = fcntl(ctx_fd, F_SETSIG, SIGIO); + if (ret == -1) { + fatal_error("cannot setsig: %s\n", strerror(errno)); + } +#endif + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/notify_self3.c b/src/libpfm-3.y/examples_ia64_v2.0/notify_self3.c new file mode 100644 index 0000000..364e774 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/notify_self3.c @@ -0,0 +1,316 @@ +/* + * notify_self3.c - example of how you can use overflow notifications with no messages + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#define SMPL_PERIOD 1000000000UL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_reg_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +static void +sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd+1, 1) == -1) { + fatal_error("PFM_READ_PMDS: %s", strerror(errno)); + } + + /* + * we do not need to extract the overflow message, we know + * where it is coming from. + */ + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %02lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %02lu:\n", notification_received); + + /* + * At this point, the counter used for the sampling period has already + * be reset by the kernel because we are in non-blocking mode, self-monitoring. + */ + + /* + * increment our notification counter + */ + notification_received++; + + /* + * And resume monitoring + */ + if (perfmonctl(ctx_fd, PFM_RESTART,NULL, 0) == -1) { + fatal_error("PFM_RESTART: %s", strerror(errno)); + } +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 40;) ; +} + + +int +main(int argc, char **argv) +{ + pfarg_context_t ctx[1]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + size_t len; + unsigned int i, num_counters; + int ret; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)sigio_handler; + sigaction (SIGIO, &act, 0); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * how many counters we use + */ + if (i > 1) { + inp.pfp_event_count = i; + + pfm_get_max_event_name_len(&len); + + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * when we know we are self-monitoring and we have only one context, then + * when we get an overflow we know where it is coming from. Therefore we can + * save the call to the kernel to extract the notification message. By default, + * a message is generated. The queue of messages has a limited size, therefore + * it is important to clear the queue by reading the message on overflow. Failure + * to do so may result in a queue full and you will lose notification messages. + * + * With the PFM_FL_OVFL_NO_MSG, no message will be queue, but you will still get + * the signal. Similarly, the PFM_MSG_END will be generated. + */ + ctx[0].ctx_flags = PFM_FL_OVFL_NO_MSG; + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + ctx_fd = ctx->ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We use pfp_pmc_count to determine the number of registers to + * setup. Note that this field can be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + pc[0].reg_reset_pmds[0] |= 1UL << outp.pfp_pmcs[1].reg_num; + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD + 1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than counting monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/notify_self_fork.c b/src/libpfm-3.y/examples_ia64_v2.0/notify_self_fork.c new file mode 100755 index 0000000..42fe614 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/notify_self_fork.c @@ -0,0 +1,323 @@ +/* + * notify_self.c - example of how you can use overflow notifications + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * Modified by Phil Mucci to add the fork() + * Adapted to v2.0 interface by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#define SMPL_PERIOD 1000000000UL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_reg_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + pfm_msg_t msg; + int fd = ctx_fd; + int r; + + if (fd != ctx_fd) { + fatal_error("handler does not get valid file descriptor\n"); + } + + if (event1_name && perfmonctl(fd, PFM_READ_PMDS, pd+1, 1) == -1) { + fatal_error("PFM_READ_PMDS: %s", strerror(errno)); + } + + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) { + fatal_error("unexpected msg type: %d\n",msg.type); + } + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %lu\n", notification_received); + + /* + * At this point, the counter used for the sampling period has already + * be reset by the kernel because we are in non-blocking mode, self-monitoring. + */ + + /* + * increment our notification counter + */ + notification_received++; + + /* + * And resume monitoring + */ + if (perfmonctl(fd, PFM_RESTART,NULL, 0) == -1) { + fatal_error("PFM_RESTART: %s", strerror(errno)); + } +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 3;) ; + + /* + * forking causes the context to be shared with the child + * When the child terminates, it closes its descriptor. + * The parent's remains and notification keep on coming. + */ + if (fork() == 0) { + printf("child terminates\n"); + fflush(stdout); + exit(0); + } + printf("after fork\n"); + fflush(stdout); + for(;notification_received < 6;) ; +} + +int +main(int argc, char **argv) +{ + pfarg_context_t ctx[1]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)sigio_handler; + sigaction (SIGIO, &act, 0); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + pfm_get_max_event_name_len(&len); + + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + ctx_fd = ctx->ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We use pfp_pmc_count to determine the number of registers to + * setup. Note that this field can be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + pc[0].reg_reset_pmds[0] |= 1UL << outp.pfp_pmcs[1].reg_num; + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_long_reset = (~0UL) - SMPL_PERIOD + 1; + pd[0].reg_short_reset = (~0UL) - SMPL_PERIOD + 1; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than counting monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) { + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + } + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) { + fatal_error("cannot setown: %s\n", strerror(errno)); + } + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/self.c b/src/libpfm-3.y/examples_ia64_v2.0/self.c new file mode 100644 index 0000000..a370f19 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/self.c @@ -0,0 +1,271 @@ +/* + * self.c - example of a simple self monitoring task + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 + +static volatile int quit; +void sig_handler(int n) +{ + quit = 1; +} + +/* + * our test code (function cannot be made static otherwise it is optimized away) + */ +void +noploop(void) +{ + for(;quit == 0;); +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +int +main(int argc, char **argv) +{ + char **p; + unsigned int i; + int ret, ctx_fd; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int num_counters; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + pfm_get_num_counters(&num_counters); + + /* + * check that the user did not specify too many events + */ + if ((unsigned int)(argc-1) > num_counters) { + printf("Too many events specified\n"); + exit(1); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * be nice to user! + */ + if (argc > 1) { + p = argv+1; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + } else { + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + i = 2; + } + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * now create a new context, per process context. + * This just creates a new context with some initial state, it is not + * active nor attached to any process. + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * extract the unique identifier for our context, a regular file descriptor + */ + ctx_fd = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the same variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events (pmd) we specified, i.e., contains more than counting + * monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * Let's roll now + */ + signal(SIGALRM, sig_handler); + pfm_self_start(ctx_fd); + alarm(10); + noploop(); + pfm_self_stop(ctx_fd); + + /* + * now read the results + */ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + return -1; + } + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20"PRIu64" %s\n", + pd[i].reg_num, + pd[i].reg_value, + name); + } + /* + * and destroy our context + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/showreset.c b/src/libpfm-3.y/examples_ia64_v2.0/showreset.c new file mode 100644 index 0000000..320bd46 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/showreset.c @@ -0,0 +1,95 @@ +/* + * showreset.c - getting the PAL reset values for the PMCs + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + unsigned int i, cnum = 0; + pfarg_reg_t pc[NUM_PMCS]; + pfmlib_regmask_t impl_pmcs; + unsigned int num_pmcs; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + memset(&impl_pmcs, 0, sizeof(impl_pmcs)); + memset(pc, 0, sizeof(pc)); + + pfm_get_impl_pmcs(&impl_pmcs); + pfm_get_num_pmcs(&num_pmcs); + + for(i=0; num_pmcs ; i++) { + if (pfm_regmask_isset(&impl_pmcs, i) == 0) continue; + pc[cnum++].reg_num = i; + num_pmcs--; + } + + if (perfmonctl(0, PFM_GET_PMC_RESET_VAL, pc, cnum) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("cannot get reset values: %s\n", strerror(errno)); + } + + for (i=0; i < cnum; i++) { + printf("PMC%u 0x%lx\n", pc[i].reg_num, pc[i].reg_value); + + } + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/syst.c b/src/libpfm-3.y/examples_ia64_v2.0/syst.c new file mode 100644 index 0000000..9d268ec --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/syst.c @@ -0,0 +1,308 @@ +/* + * syst.c - example of a simple system wide monitoring program + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +#ifndef __NR_sched_setaffinity +#ifdef __ia64__ +#define __NR_sched_setaffinity 1231 +#endif +#endif +/* + * Hack to get this to work without libc support + */ +int +my_setaffinity(pid_t pid, unsigned int len, unsigned long *mask) +{ + return syscall(__NR_sched_setaffinity, pid, len, mask); +} + + +int +main(int argc, char **argv) +{ + char **p; + unsigned long my_mask; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_context_t ctx[1]; + pfarg_load_t load_args; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_options_t pfmlib_options; + unsigned int which_cpu; + int ret, ctx_fd; + unsigned int i; + unsigned int num_counters; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + pfm_get_num_counters(&num_counters); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + memset(pc, 0, sizeof(pc)); + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * be nice to user! + */ + if (argc > 1) { + p = argv+1; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + } else { + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find cycle event\n"); + } + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) { + fatal_error("cannot find inst retired event\n"); + } + i = 2; + } + /* + * set the privilege mode: + * PFM_PLM3 : user level + * PFM_PLM0 : kernel level + */ + inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * indicate we are using the monitors for a system-wide session. + * This may impact the way the library sets up the PMC values. + */ + inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * In system wide mode, the perfmon context cannot be inherited. + * Also in this mode, we cannot use the blocking form of user level notification. + */ + ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE; + + /* + * pick a random CPU. Assumes CPU are numbered with no holes + */ + srandom(getpid()); + + which_cpu = random() % sysconf(_SC_NPROCESSORS_ONLN); + + /* + * perfmon relies on the application to have the task pinned + * on one CPU by the time the PFM_CONTEXT_LOAD command is issued. + * The perfmon context will record the active CPU at the time of PFM_CONTEXT_LOAD + * and will reject any access coming from another CPU. Therefore it + * is advisable to pin the task ASAP before doing any perfmon calls. + * + * On RHAS and 2.5/2.6, this can be easily achieved using the + * sched_setaffinity() system call. + */ + my_mask = 1UL << which_cpu; + + ret = my_setaffinity(getpid(), sizeof(unsigned long), &my_mask); + if (ret == -1) { + fatal_error("cannot set affinity to 0x%lx: %s\n", my_mask, strerror(errno)); + } + /* + * after the call the task is pinned to which_cpu + */ + + /* + * now create the context for self monitoring/per-task + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extact our file descriptor + */ + ctx_fd = ctx->ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = outp.pfp_pmcs[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * for system wide session, we can only attached to ourself + */ + load_args.load_pid = getpid(); + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * start monitoring. We must go to the kernel because psr.pp cannot be + * changed at the user level. + */ + if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) { + fatal_error("perfmonctl error PFM_START errno %d\n",errno); + } + printf("\n", which_cpu); + + printf("\n"); + getchar(); + + /* + * stop monitoring. We must go to the kernel because psr.pp cannot be + * changed at the user level. + */ + if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) { + fatal_error("perfmonctl error PFM_STOP errno %d\n",errno); + } + + printf("\n\n", which_cpu); + + /* + * now read the results + */ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error( "perfmonctl error READ_PMDS errno %d\n",errno); + return -1; + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("CPU%-2d PMD%u %20"PRIu64" %s\n", + which_cpu, + pd[i].reg_num, + pd[i].reg_value, + name); + } + + /* + * let's stop this now + */ + close(ctx_fd); + + return 0; +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/task.c b/src/libpfm-3.y/examples_ia64_v2.0/task.c new file mode 100644 index 0000000..c5af075 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/task.c @@ -0,0 +1,303 @@ +/* + * task.c - example of a task monitoring another one + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +child(char **arg) +{ + /* + * will cause the program to stop before executing the first + * user level instruction. We can only attach (load) a context + * if the task is in the STOPPED state. + */ + ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + /* + * execute the requested command + */ + execvp(arg[0], arg); + + fatal_error("cannot exec: %s\n", arg[0]); + /* not reached */ +} + +int +parent(char **arg) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_context_t ctx[1]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_load_t load_args; + unsigned int i, num_counters; + int status, ret; + int ctx_fd; + pid_t pid; + char name[MAX_EVT_NAME_LEN]; + + memset(pc, 0, sizeof(ctx)); + memset(pd, 0, sizeof(ctx)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&load_args,0, sizeof(load_args)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + i = 2; + + if (num_counters < i) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level + * PFM_PLM0 : kernel level + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * now create a context. we will later attach it to the task we are creating. + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the identifier for our context + */ + ctx_fd = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann counting monitors. + */ + + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * Create the child task + */ + if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); + + /* + * and launch the child code + */ + if (pid == 0) exit(child(arg)); + + /* + * wait for the child to exec + */ + waitpid(pid, &status, WUNTRACED); + + /* + * check if process exited early + */ + if (WIFEXITED(status)) { + fatal_error("command %s exited too early with status %d\n", arg[0], WEXITSTATUS(status)); + } + /* + * the task is stopped at this point + */ + + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = pid; + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * activate monitoring. The task is still STOPPED at this point. Monitoring + * will not take effect until the execution of the task is resumed. + */ + if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) { + fatal_error("perfmonctl error PFM_START errno %d\n",errno); + } + + /* + * now resume execution of the task, effectively activating + * monitoring. + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + + /* + * now the task is running + */ + + /* + * simply wait for completion + */ + waitpid(pid, &status, 0); + + /* + * the task has disappeared at this point but our context is still + * present and contains all the latest counts. + */ + + /* + * now simply read the results. + */ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); + return -1; + } + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20"PRIu64" %s\n", + pd[i].reg_num, + pd[i].reg_value, + name); + } + /* + * free the context + */ + close(ctx_fd); + + return 0; +} + +int +main(int argc, char **argv) +{ + pfmlib_options_t pfmlib_options; + + if (argc < 2) { + fatal_error("You must specify a command to execute\n"); + } + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + return parent(argv+1); +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/task_attach.c b/src/libpfm-3.y/examples_ia64_v2.0/task_attach.c new file mode 100644 index 0000000..3015297 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/task_attach.c @@ -0,0 +1,303 @@ +/* + * task_attach.c - example of how to attach to another task for monitoring + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +parent(pid_t pid) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_context_t ctx[1]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_load_t load_args; + pfm_msg_t msg; + unsigned int i, num_counters; + int status, ret; + int ctx_fd; + char name[MAX_EVT_NAME_LEN]; + + + memset(pc, 0, sizeof(ctx)); + memset(pd, 0, sizeof(ctx)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&load_args,0, sizeof(load_args)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + i = 2; + + /* + * set the privilege mode: + * PFM_PLM3 : user level + * PFM_PLM0 : kernel level + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (num_counters < i) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * now create a context. we will later attach it to the task we are creating. + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the identifier for our context + */ + ctx_fd = ctx[0].ctx_fd; + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann counting monitors. + */ + + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret == -1) { + fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); + } + + /* + * wait for the child to be actually stopped + */ + waitpid(pid, &status, WUNTRACED); + + /* + * check if process exited early + */ + if (WIFEXITED(status)) { + fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status)); + } + + /* + * the task is stopped at this point + */ + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = pid; + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * activate monitoring. The task is still STOPPED at this point. Monitoring + * will not take effect until the execution of the task is resumed. + */ + if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) { + fatal_error("perfmonctl error PFM_START errno %d\n",errno); + } + + /* + * now resume execution of the task, effectively activating + * monitoring. + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + + /* + * now the task is running + */ + + /* + * We cannot simply do a waitpid() because we may be attaching to a process + * totally unrelated to our program. Instead we use a perfmon facility that + * notifies us when the monitoring task is exiting. + * + * When a task with a monitoring context attached to it exits, a PFM_MSG_END + * is generated. It can be retrieve with a simple read() on the context's descriptor. + * + * Another reason why you might return from the read is if there was a counter + * overflow, unlikely in this example. + * + * To measure only for short period of time, use select or poll with a timeout, + * see task_attach_timeout.c + * + */ + ret = read(ctx_fd, &msg, sizeof(msg)); + if (ret == -1) { + fatal_error("cannot read from descriptor: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_END) { + fatal_error("unexpected msg type : %d\n", msg.type); + } + + /* + * the task has exited, we can simply read the results + */ + + /* + * now simply read the results. + */ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); + return -1; + } + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20"PRIu64" %s\n", + pd[i].reg_num, + pd[i].reg_value, + name); + } + /* + * free the context + */ + close(ctx_fd); + + return 0; +} + +int +main(int argc, char **argv) +{ + pfmlib_options_t pfmlib_options; + pid_t pid; + + if (argc < 2) { + fatal_error("usage: %s pid\n", argv[0]); + } + + pid = atoi(argv[1]); + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + return parent(pid); +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/task_attach_timeout.c b/src/libpfm-3.y/examples_ia64_v2.0/task_attach_timeout.c new file mode 100644 index 0000000..e350f21 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/task_attach_timeout.c @@ -0,0 +1,356 @@ +/* + * task_attach_timeout.c - attach to another task for monitoring for a short while + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +parent(pid_t pid, unsigned long delay) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_context_t ctx[1]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_load_t load_args; + struct pollfd pollfd; + pfm_msg_t msg; + unsigned int i, num_counters; + int status, ret; + int ctx_fd; + char name[MAX_EVT_NAME_LEN]; + + + memset(pc, 0, sizeof(ctx)); + memset(pd, 0, sizeof(ctx)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&load_args,0, sizeof(load_args)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + i = 2; + + if (num_counters < i) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level + * PFM_PLM0 : kernel level + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * now create a context. we will later attach it to the task we are creating. + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, ctx, 1) == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * extract the identifier for our context + */ + ctx_fd = ctx[0].ctx_fd; + + /* + * use our file descriptor for the poll. + * we are interested in read events only. + */ + pollfd.fd = ctx_fd; + pollfd.events = POLLIN; + + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann counting monitors. + */ + + if (perfmonctl(ctx_fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + + if (perfmonctl(ctx_fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret == -1) { + fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); + } + + /* + * wait for the child to be actually stopped + */ + waitpid(pid, &status, WUNTRACED); + + /* + * check if process exited early + */ + if (WIFEXITED(status)) { + fatal_error("command process %d exited too early with status %d\n", pid, WEXITSTATUS(status)); + } + + /* + * the task is stopped at this point + */ + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = pid; + + if (perfmonctl(ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + + /* + * activate monitoring. The task is still STOPPED at this point. Monitoring + * will not take effect until the execution of the task is resumed. + */ + if (perfmonctl(ctx_fd, PFM_START, NULL, 0) == -1) { + fatal_error("perfmonctl error PFM_START errno %d\n",errno); + } + + /* + * now resume execution of the task, effectively activating + * monitoring. + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + printf("attached to [%d], timeout set to %lu seconds\n", pid, delay); + + /* + * now the task is running + */ + + /* + * We cannot simply do a waitpid() because we may be attaching to a process + * totally unrelated to our program. Instead we use a perfmon facility that + * notifies us when the monitoring task is exiting. + * + * When a task with a monitoring context attached to it exits, a PFM_MSG_END + * is generated. It can be retrieve with a simple read() on the context's descriptor. + * + * Another reason why you might return from the read is if there was a counter + * overflow, unlikely in this example. + * + * To measure only for short period of time, use select or poll with a timeout, + * see task_attach_timeout.c + * + */ + ret = poll(&pollfd, 1, delay*1000); + switch( ret ) { + case -1: + fatal_error("cannot read from descriptor: %s\n", strerror(errno)); + /* no return */ + case 1: + /* + * there is a message, i.e., the program exited before our timeout + */ + if (ret == 1) { + /* + * extract message + */ + ret = read(ctx_fd, &msg, sizeof(msg)); + + if (msg.type != PFM_MSG_END) { + fatal_error("unexpected msg type : %d\n", msg.type); + } + } + break; + case 0: + /* + * we timed out, we need to stop the task to unload + */ + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret == -1) { + fatal_error("cannot attach to %d: %s\n", pid, strerror(errno)); + } + /* + * wait for task to be actually stopped + */ + waitpid(pid, &status, WUNTRACED); + + /* + * check if process exited, then no need to unload + */ + if (WIFEXITED(status)) goto read_results; + + if (perfmonctl(ctx_fd, PFM_UNLOAD_CONTEXT, NULL, 0) == -1) { + fatal_error("perfmonctl error PFM_UNLOAD_CONTEXT errno %d\n",errno); + } + + /* + * let it run free again + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + break; + default: + fatal_error("unexpected return from poll: %d\n", ret); + } + +read_results: + /* + * now simply read the results. + */ + if (perfmonctl(ctx_fd, PFM_READ_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error READ_PMDS errno %d\n",errno); + return -1; + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20"PRIu64" %s\n", + pd[i].reg_num, + pd[i].reg_value, + name); + } + /* + * free the context + */ + close(ctx_fd); + + return 0; +} + +int +main(int argc, char **argv) +{ + pfmlib_options_t pfmlib_options; + unsigned long delay; + pid_t pid; + + if (argc < 2) { + fatal_error("usage: %s pid [timeout]\n", argv[0]); + } + + pid = atoi(argv[1]); + delay = argc > 2 ? strtoul(argv[2], NULL, 10) : 10; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + exit(1); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + return parent(pid, delay); +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/task_smpl.c b/src/libpfm-3.y/examples_ia64_v2.0/task_smpl.c new file mode 100644 index 0000000..3d9493a --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/task_smpl.c @@ -0,0 +1,498 @@ +/* + * task_smpl.c - example of a task sampling another one using a randomized sampling period + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_default_smpl_arg_t smpl_fmt_arg_t; +typedef pfm_default_smpl_hdr_t smpl_hdr_t; +typedef pfm_default_smpl_entry_t smpl_entry_t; +typedef pfm_default_smpl_ctx_arg_t ctx_arg_t; +typedef int ctxid_t; +#define FMT_UUID PFM_DEFAULT_SMPL_UUID + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define FIRST_COUNTER 4 + +static unsigned long collect_samples; +static void *buf_addr; +static pfm_uuid_t buf_fmt_id = FMT_UUID; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +warning(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +child(char **arg) +{ + /* + * force the task to stop before executing the first + * user level instruction + */ + ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + execvp(arg[0], arg); + /* not reached */ + exit(1); +} + +static __inline__ int +bit_weight(unsigned long x) +{ + int sum = 0; + for (; x ; x>>=1) { + if (x & 0x1UL) sum++; + } + return sum; + +} + +static void +process_smpl_buf(int id, unsigned long smpl_pmd_mask, int need_restart) +{ + static unsigned long last_overflow = ~0UL; /* initialize to biggest value possible */ + smpl_hdr_t *hdr = (smpl_hdr_t *)buf_addr; + smpl_entry_t *ent; + unsigned long count, entry, *reg, pos, msk; + unsigned long entry_size; + int j; + + + + printf("processing %s buffer at %p\n", need_restart==0 ? "leftover" : "", hdr); + if (hdr->hdr_overflows <= last_overflow && last_overflow != ~0UL) { + warning("skipping identical set of samples %lu <= %lu\n", + hdr->hdr_overflows, last_overflow); + return; + } + last_overflow = hdr->hdr_overflows; + + count = hdr->hdr_count; + + ent = (smpl_entry_t *)(hdr+1); + pos = (unsigned long)ent; + entry = collect_samples; + + /* + * in this example program, we use fixed-size entries, therefore we + * can compute the entry size in advance. Perfmon-2 supports variable + * size entries. + */ + entry_size = sizeof(smpl_entry_t)+(bit_weight(smpl_pmd_mask)<<3); + + while(count--) { + printf("entry %ld PID:%d CPU:%d IIP:0x%016lx\n", + entry, + ent->pid, + ent->cpu, + ent->ip); + + printf("\tOVFL: %d LAST_VAL: %lu\n", ent->ovfl_pmd, -ent->last_reset_val); + + /* + * print body: additional PMDs recorded + * PMD are recorded in increasing index order + */ + reg = (unsigned long *)(ent+1); + + for(j=0, msk = smpl_pmd_mask; msk; msk >>=1, j++) { + if ((msk & 0x1) == 0) continue; + printf("PMD%-2d = 0x%016lx\n", j, *reg); + reg++; + } + /* + * we could have removed this and used: + * ent = (smpl_entry_t *)reg + * instead. + */ + pos += entry_size; + ent = (smpl_entry_t *)pos; + entry++; + } + collect_samples = entry; + + /* + * reactivate monitoring once we are done with the samples + * + * Note that this call can fail with EBUSY in non-blocking mode + * as the task may have disappeared while we were processing + * the samples. + */ + if (need_restart && perfmonctl(id, PFM_RESTART, 0, 0) == -1) { + if (errno != EBUSY) + fatal_error("perfmonctl error PFM_RESTART errno %d\n",errno); + else + warning("PFM_RESTART: task has probably terminated \n"); + } +} + +int +mainloop(char **arg) +{ + ctx_arg_t ctx; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_reg_t pd[NUM_PMDS]; + pfarg_reg_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfm_msg_t msg; + unsigned long ovfl_count = 0UL; + unsigned long sample_period; + unsigned long smpl_pmd_mask = 0UL; + pid_t pid; + int status, ret, fd; + unsigned int i, num_counters; + + /* + * intialize all locals + */ + memset(&ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + + /* + * locate events + */ + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + /* + * set the privilege mode: + * PFM_PLM3 : user level + * PFM_PLM0 : kernel level + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + /* build sampling mask */ + smpl_pmd_mask |= 1UL << pc[i].reg_num; + } + + printf("smpl_pmd_mask=0x%lx\n", smpl_pmd_mask); + + /* + * now we indicate what to record when each counter overflows. + * In our case, we only have one sampling period and it is set for the + * first event. Here we indicate that when the sampling period expires + * then we want to record the value of all the other counters. + * + * We exclude the first counter in this case. + */ + smpl_pmd_mask &= ~(1UL << pc[0].reg_num); + + pc[0].reg_smpl_pmds[0] = smpl_pmd_mask; + + /* + * we our sampling counter overflow, we want to be notified. + * The notification will come ONLY when the sampling buffer + * becomes full. + * + * We also activate randomization of the sampling period. + */ + pc[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; + + /* + * we also want to reset the other PMDs on + * every overflow. If we do not set + * this, the non-overflowed counters + * will be untouched. + */ + pc[0].reg_reset_pmds[0] |= smpl_pmd_mask; + + sample_period = 1000000UL; + + pd[0].reg_value = (~0) - sample_period + 1; + pd[0].reg_short_reset = (~0) - sample_period + 1; + pd[0].reg_long_reset = (~0) - sample_period + 1; + /* + * setup randomization parameters, we allow a range of up to +256 here. + */ + pd[0].reg_random_seed = 5; + pd[0].reg_random_mask = 0xff; + + + printf("programming %u PMCS and %u PMDS\n", outp.pfp_pmc_count, inp.pfp_event_count); + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_context_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_context_t + * with what is needed fot this format. + */ + + /* + * We initialize the format specific information. + * The format is identified by its UUID which must be copied + * into the ctx_buf_fmt_id field. + */ + memcpy(ctx.ctx_arg.ctx_smpl_buf_id, buf_fmt_id, sizeof(pfm_uuid_t)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + ctx.buf_arg.buf_size = 8192; + + /* + * now create our perfmon context. + */ + if (perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1) == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * extract the file descriptor we will use to + * identify this newly created context + */ + fd = ctx.ctx_arg.ctx_fd; + + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + buf_addr = ctx.ctx_arg.ctx_smpl_vaddr; + + printf("context [%d] buffer mapped @%p\n", fd, buf_addr); + + /* + * Now program the registers + */ + if (perfmonctl(fd, PFM_WRITE_PMCS, pc, outp.pfp_pmc_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMCS errno %d\n",errno); + } + /* + * initialize the PMDs + */ + if (perfmonctl(fd, PFM_WRITE_PMDS, pd, inp.pfp_event_count) == -1) { + fatal_error("perfmonctl error PFM_WRITE_PMDS errno %d\n",errno); + } + + /* + * Create the child task + */ + if ((pid=fork()) == -1) fatal_error("Cannot fork process\n"); + + /* + * In order to get the PFM_END_MSG message, it is important + * to ensure that the child task does not inherit the file + * descriptor of the context. By default, file descriptor + * are inherited during exec(). We explicitely close it + * here. We could have set it up through fcntl(FD_CLOEXEC) + * to achieve the same thing. + */ + if (pid == 0) { + close(fd); + child(arg); + } + + /* + * wait for the child to exec + */ + waitpid(pid, &status, WUNTRACED); + + /* + * process is stopped at this point + */ + if (WIFEXITED(status)) { + warning("task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status)); + goto terminate_session; + } + + /* + * attach context to stopped task + */ + load_args.load_pid = pid; + if (perfmonctl(fd, PFM_LOAD_CONTEXT, &load_args, 1) == -1) { + fatal_error("perfmonctl error PFM_LOAD_CONTEXT errno %d\n",errno); + } + /* + * activate monitoring for stopped task. + * (nothing will be measured at this point + */ + if (perfmonctl(fd, PFM_START, NULL, 0) == -1) { + fatal_error(" perfmonctl error PFM_START errno %d\n",errno); + } + /* + * detach child. Side effect includes + * activation of monitoring. + */ + ptrace(PTRACE_DETACH, pid, NULL, 0); + + /* + * core loop + */ + for(;;) { + /* + * wait for overflow/end notification messages + */ + ret = read(fd, &msg, sizeof(msg)); + if (ret == -1) { + fatal_error("cannot read perfmon msg: %s\n", strerror(errno)); + } + switch(msg.type) { + case PFM_MSG_OVFL: /* the sampling buffer is full */ + process_smpl_buf(fd, smpl_pmd_mask, 1); + ovfl_count++; + break; + case PFM_MSG_END: /* monitored task terminated */ + printf("task terminated\n"); + goto terminate_session; + default: fatal_error("unknown message type %d\n", msg.type); + } + } +terminate_session: + /* + * cleanup child + */ + waitpid(pid, &status, 0); + + /* + * check for any leftover samples + */ + process_smpl_buf(fd, smpl_pmd_mask, 0); + + /* + * destroy perfmon context + */ + close(fd); + + printf("%lu samples collected in %lu buffer overflows\n", collect_samples, ovfl_count); + + return 0; +} + +int +main(int argc, char **argv) +{ + pfmlib_options_t pfmlib_options; + + if (argc < 2) + fatal_error("You must specify a command to execute\n"); + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + fatal_error("Can't initialize library\n"); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + return mainloop(argv+1); +} diff --git a/src/libpfm-3.y/examples_ia64_v2.0/whichpmu.c b/src/libpfm-3.y/examples_ia64_v2.0/whichpmu.c new file mode 100644 index 0000000..0014ad5 --- /dev/null +++ b/src/libpfm-3.y/examples_ia64_v2.0/whichpmu.c @@ -0,0 +1,106 @@ +/* + * whichpmu.c - example of how to figure out the host PMU model detected by pfmlib + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include + +#include + +#define MAX_PMU_NAME_LEN 32 +int +main(void) +{ + pfmlib_regmask_t impl_pmds; + pfmlib_regmask_t impl_pmcs; + pfmlib_regmask_t impl_counters; + unsigned int num_pmds, num_pmcs, num_counters, num_events; + unsigned int width = 0; + unsigned int i; + char model[MAX_PMU_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) { + printf("Can't initialize library\n"); + return 1; + } + memset(&impl_pmcs, 0, sizeof(impl_pmcs)); + memset(&impl_pmds, 0, sizeof(impl_pmds)); + memset(&impl_counters, 0, sizeof(impl_counters)); + + /* + * Now simply print the CPU model detected by pfmlib + * + * When the CPU model is not directly supported AND the generic support + * is compiled into the library, the detected will yield "Generic" which + * mean that only the architected features will be supported. + * + * This call can be used to tune applications based on the detected host + * CPU model. This is useful because some features are CPU model specific, + * such as address range restriction which is an Itanium feature. + * + */ + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + pfm_get_hw_counter_width(&width); + pfm_get_impl_pmds(&impl_pmds); + pfm_get_impl_pmcs(&impl_pmcs); + pfm_get_impl_counters(&impl_counters); + pfm_get_num_events(&num_events); + pfm_get_num_pmds(&num_pmds); + pfm_get_num_pmcs(&num_pmcs); + pfm_get_num_counters(&num_counters); + + printf("PMU model detected by pfmlib: %s\n", model); + + printf("]\nnumber of PMD registers : %u\n", num_pmds); + printf("implemented PMD registers : [ "); + for (i=0; num_pmds; i++) { + if (pfm_regmask_isset(&impl_pmds, i) == 0) continue; + printf("%-3u", i); + num_pmds--; + } + + printf("]\nnumber of PMC registers : %u\n", num_pmcs); + printf("implemented PMC registers : [ "); + for (i=0; num_pmcs; i++) { + if (pfm_regmask_isset(&impl_pmcs, i) == 0) continue; + printf("%-3u", i); + num_pmcs--; + } + + printf("]\nnumber of counters : %u\n", num_counters); + printf("implemented counters : [ "); + for (i=0; num_counters; i++) { + if (pfm_regmask_isset(&impl_counters, i) == 0) continue; + printf("%-3u", i); + num_counters--; + } + printf("]\nhardware counter width : %u\n", width); + printf("number of events supported : %u\n", num_events); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/Makefile b/src/libpfm-3.y/examples_v2.x/Makefile new file mode 100644 index 0000000..3bfb9e0 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/Makefile @@ -0,0 +1,94 @@ +# +# Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + +TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/.. + +include $(TOPDIR)/config.mk +include $(TOPDIR)/rules.mk + +DIRS= + +ifeq ($(ARCH),ia64) +DIRS +=ia64 +endif + +ifeq ($(ARCH),ia32) +DIRS +=x86 +endif + +ifeq ($(ARCH),x86_64) +DIRS +=x86 +endif + +ifeq ($(CONFIG_PFMLIB_ARCH_CRAYXT),y) +CFLAGS += -DCONFIG_PFMLIB_ARCH_CRAYXT +endif + +CFLAGS+= -I. -D_GNU_SOURCE +LIBS += -lm + +ifeq ($(SYS),Linux) +CFLAGS+= -pthread +LIBS += -lrt +endif + +TARGET_GEN=showevtinfo check_events + +ifeq ($(SYS),Linux) +TARGET_LINUX +=self task task_attach task_attach_timeout syst \ + notify_self notify_self2 notify_self3 \ + multiplex multiplex2 set_notify whichpmu \ + showreginfo task_smpl task_smpl_user \ + pfmsetup self_smpl_multi self_pipe \ + notify_self_fork self_smpl \ + task_attach_timeout_np syst_np syst_multi_np + +XTRA += rtop +endif + +all: $(TARGET_GEN) $(TARGET_LINUX) $(XTRA) + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done +# Many systems don't have ncurses installed +rtop: rtop.o detect_pmcs.o $(PFMLIB) + -$(CC) $(CFLAGS) $(LDFLAGS) -D_GNU_SOURCE -o $@ $^ $(LIBS) -lpthread -lncurses + +$(TARGET_LINUX): %:%.o detect_pmcs.o $(PFMLIB) + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $^ $(LIBS) + +$(TARGET_GEN): %:%.o $(PFMLIB) + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $^ $(LIBS) +clean: + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done + $(RM) -f *.o $(TARGET_LINUX) $(TARGET_GEN) $(XTRA) *~ + +distclean: clean + +install_examples: $(TARGET_LINUX) $(TARGET_GEN) + +install_examples: + @echo installing: $(TARGET_LINUX) $(TARGET_GEN) + -mkdir -p $(DESTDIR)$(EXAMPLESDIR)/v2 + $(INSTALL) -m 755 $(TARGET_LINUX) $(TARGET_GEN) $(DESTDIR)$(EXAMPLESDIR)/v2 + @set -e ; for d in $(DIRS) ; do $(MAKE) -C $$d $@ ; done +# +# examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ +# diff --git a/src/libpfm-3.y/examples_v2.x/check_events.c b/src/libpfm-3.y/examples_v2.x/check_events.c new file mode 100644 index 0000000..b0f8193 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/check_events.c @@ -0,0 +1,150 @@ +/* + * check_events.c - check if event assignment is possible + * + * Copyright (c) 2008 Google, Inc + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_PMU_NAME_LEN 32 + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * The goal of this program is to exercise the event assignment + * code for a specific PMU model. This program is independent of + * the kernel API. + */ +int +main(int argc, char **argv) +{ + char **p; + unsigned int i; + int ret; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_options_t pfmlib_options; + char model[MAX_PMU_NAME_LEN]; + unsigned int num_counters; + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + printf("PMU model: %s\n", model); + + pfm_get_num_counters(&num_counters); + printf("%u counters available\n", num_counters); + + /* + * prepare parameters to library. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * be nice to user! + */ + if (argc > 1) { + p = argv+1; + for (i=0; *p ; i++, p++) { + ret = pfm_find_full_event(*p, &inp.pfp_events[i]); + if (ret != PFMLIB_SUCCESS) + fatal_error("event %s: %s\n", *p, pfm_strerror(ret)); + } + } else { + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + i = 2; + } + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + for (i=0; i < outp.pfp_pmc_count; i++) + printf("PMC%u=0x%llx\n", + outp.pfp_pmcs[i].reg_num, + outp.pfp_pmcs[i].reg_value); + + for (i=0; i < outp.pfp_pmd_count; i++) + printf("PMD%u\n", outp.pfp_pmds[i].reg_num); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/detect_pmcs.c b/src/libpfm-3.y/examples_v2.x/detect_pmcs.c new file mode 100644 index 0000000..128536f --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/detect_pmcs.c @@ -0,0 +1,114 @@ +/* + * detect_pmu_regs.c - detect unavailable PMD/PMC registers based on perfmon2 information + * + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * The goal of this function is to help pfm_dispatch_events() + * in situations where not all PMC/PMD registers are available. + * + * It builds bitmasks of *unavailable* PMC/PMD registers. + * It can use an existing perfmon context file descriptor or if + * non is passed, it will create a temporary context to retrieve + * the information. + * + * Note that there is no guarantee that the registers marked + * as available will actually be available by the time the perfmon + * context is loaded. + * + * arguments: + * fd : a perfmon context file descriptor, or -1 + * r_pmcs: a bitmask for PMC availability, NULL if not needed + * r_pmcs: a bitmask for PMD availability, NULL if not needed + * + * return: + * -1: invalid file descriptor passed or cannot retrieve information + * 0: success + */ +int +detect_unavail_pmu_regs(int fd, pfmlib_regmask_t *r_pmcs, pfmlib_regmask_t *r_pmds) +{ + pfarg_ctx_t ctx; + pfarg_setinfo_t setf; + int ret, i, j, myfd, max; + + memset(&ctx, 0, sizeof(ctx)); + memset(&setf, 0, sizeof(setf)); + if (r_pmcs) + memset(r_pmcs, 0, sizeof(*r_pmcs)); + if (r_pmds) + memset(r_pmds, 0, sizeof(*r_pmds)); + /* + * if no context descriptor is passed, then create + * a temporary context + */ + if (fd == -1) { + myfd = pfm_create_context(&ctx, NULL, NULL, 0); + if (myfd == -1) + return -1; + } else { + myfd = fd; + } + /* + * retrieve available register bitmasks from set0 + * which is guaranteed to exist for every context + * + * if myfd is bogus (passed by user) then we return + * an error. + */ + ret = pfm_getinfo_evtsets(myfd, &setf, 1); + if (ret == 0) { + if (r_pmcs) { + max = PFMLIB_REG_BV < PFM_PMC_BV ? PFMLIB_REG_BV : PFM_PMC_BV; + for(i=0; i < max; i++) { + for(j=0; j < 64; j++) { + if ((setf.set_avail_pmcs[i] & (1ULL << j)) == 0) + pfm_regmask_set(r_pmcs, (i<<6)+j); + } + } + } + if (r_pmds) { + max = PFMLIB_REG_BV < PFM_PMD_BV ? PFMLIB_REG_BV : PFM_PMD_BV; + for(i=0; i < max; i++) { + for(j=0; j < 64; j++) { + if ((setf.set_avail_pmds[i] & (1ULL << j)) == 0) + pfm_regmask_set(r_pmds, (i<<6)+j); + } + } + } + } + if (fd == -1) + close(myfd); + return ret; +} diff --git a/src/libpfm-3.y/examples_v2.x/detect_pmcs.h b/src/libpfm-3.y/examples_v2.x/detect_pmcs.h new file mode 100644 index 0000000..e932ff4 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/detect_pmcs.h @@ -0,0 +1,44 @@ +/* + * detect_pmcs.h - detect unavailable PMD/PMC registers based on perfmon2 information + * + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#ifndef __DETECT_PMCS_H__ +#define __DETECT_PMCS_H__ + +#include + +/* + * if no context exists, pass -1 for fd + * if do not care about PMCS, pass r_pmcs as NULL + * if do not care about PMDs, pass r_pmds as NULL + */ +extern int detect_unavail_pmu_regs(int fd, pfmlib_regmask_t *r_pmcs, pfmlib_regmask_t *r_pmds); + +static inline int detect_unavail_pmcs(int fd, pfmlib_regmask_t *r_pmcs) +{ + return detect_unavail_pmu_regs(fd, r_pmcs, NULL); +} + +#endif /* __DETECT_PMCS_H__ */ diff --git a/src/libpfm-3.y/examples_v2.x/ia64/Makefile b/src/libpfm-3.y/examples_v2.x/ia64/Makefile new file mode 100644 index 0000000..860b5c9 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/Makefile @@ -0,0 +1,65 @@ +# +# Copyright (c) 2002-2005 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# This file is part of libpfm, a performance monitoring support library for +# applications on Linux/ia64. +# + +TOPDIR := $(shell if [ "$$PWD" != "" ]; then echo $$PWD; else pwd; fi)/../.. + +include $(TOPDIR)/config.mk +include $(TOPDIR)/rules.mk + +LDFLAGS+=-static + +LIBS += -lm + +SRCS +=ita_rr.c ita_irr.c ita_opcode.c ita_btb.c ita_dear.c +SRCS +=ita2_opcode.c ita2_rr.c ita2_irr.c ita2_dear.c ita2_btb.c +SRCS +=mont_opcode.c mont_rr.c mont_irr.c mont_dear.c mont_etb.c + +TARGETS = $(SRCS:.c=) + +PFMLIB=$(PFMLIBDIR)/libpfm.a + +all: $(TARGETS) + +$(TARGETS): %:%.o $(PFMLIB) + $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LIBS) + +clean: + $(RM) -f *.o $(TARGETS) + +distclean: clean + +depend: + $(MKDEP) $(CFLAGS) $(SRCS) + +install_examples: $(TARGETS) + +install_examples: + @echo installing: $(TARGETS) + -mkdir -p $(DESTDIR)$(EXAMPLESDIR)/ia64 + $(INSTALL) -m 755 $(TARGETS) $(DESTDIR)$(EXAMPLESDIR)/ia64 + +# +# examples are installed as part of the RPM install, typically in /usr/share/doc/libpfm-X.Y/ +# diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita2_btb.c b/src/libpfm-3.y/examples_v2.x/ia64/ita2_btb.c new file mode 100644 index 0000000..f802b53 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita2_btb.c @@ -0,0 +1,488 @@ +/* + * ita2_btb.c - example of how use the BTB with the Itanium 2 PMU + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_dfl_smpl_hdr_t btb_hdr_t; +typedef pfm_dfl_smpl_entry_t btb_entry_t; +typedef pfm_dfl_smpl_arg_t smpl_arg_t; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +static void *smpl_vaddr; +static unsigned int entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return 0;} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop; + } + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf +static void +show_btb_reg(int j, pfm_ita2_pmd_reg_t reg, pfm_ita2_pmd_reg_t pmd16) +{ + unsigned long bruflush, b1; + int is_valid = reg.pmd8_15_ita2_reg.btb_b == 0 && reg.pmd8_15_ita2_reg.btb_mp == 0 ? 0 :1; + + b1 = (pmd16.pmd_val >> (4 + 4*(j-8))) & 0x1; + bruflush = (pmd16.pmd_val >> (5 + 4*(j-8))) & 0x1; + + safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d bru=%ld b1=%ld valid=%c\n", + j, + reg.pmd_val, + reg.pmd8_15_ita2_reg.btb_b, + reg.pmd8_15_ita2_reg.btb_mp, + bruflush, b1, + is_valid ? 'Y' : 'N'); + + if (!is_valid) return; + + if (reg.pmd8_15_ita2_reg.btb_b) { + unsigned long addr; + + + addr = (reg.pmd8_15_ita2_reg.btb_addr+b1)<<4; + + addr |= reg.pmd8_15_ita2_reg.btb_slot < 3 ? reg.pmd8_15_ita2_reg.btb_slot : 0; + + safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction: %s\n\n", + addr, + reg.pmd8_15_ita2_reg.btb_slot < 3 ? 'Y' : 'N', + reg.pmd8_15_ita2_reg.btb_mp ? "FE Failure" : + bruflush ? "BE Failure" : "Success"); + } else { + safe_printf("\t Target Address: 0x%016lx\n\n", + (unsigned long)(reg.pmd8_15_ita2_reg.btb_addr<<4)); + } +} + + +static void +show_btb(pfm_ita2_pmd_reg_t *btb, pfm_ita2_pmd_reg_t *pmd16) +{ + int i, last; + + + i = (pmd16->pmd16_ita2_reg.btbi_full) ? pmd16->pmd16_ita2_reg.btbi_bbi : 0; + last = pmd16->pmd16_ita2_reg.btbi_bbi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita2_reg.btbi_bbi, pmd16->pmd16_ita2_reg.btbi_full); + do { + show_btb_reg(i+8, btb[i], *pmd16); + i = (i+1) % 8; + } while (i != last); +} + + +void +process_smpl_buffer(void) +{ + btb_hdr_t *hdr; + btb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_ita2_pmd_reg_t *reg, *pmd16; + unsigned long i; + int ret; + static unsigned long last_ovfl = ~0UL; + + + hdr = (btb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (btb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita2_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd8-pmd15 has the BTB. We have also + * included pmd16 (BTB index) has part of the registers to record. This trick + * allows us to get the index to decode the sequential order of the BTB. + * + * Recorded registers are always recorded in increasing order. So we know + * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. + */ + pmd16 = reg+8; + show_btb(reg, pmd16); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (pfm_restart(id) == -1) { + perror("pfm_restart"); + exit(1); + } +} + + +int +main(void) +{ + int ret; + int type = 0; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfarg_ctx_t ctx; + smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interested in all the + * mispredicted (target, taken/not taken) branches, therefore we + * program the various fields of the BTB config to: + */ + ita2_inp.pfp_ita2_btb.btb_used = 1; + + ita2_inp.pfp_ita2_btb.btb_ds = 0; /* capture target */ + ita2_inp.pfp_ita2_btb.btb_tm = 0x3; /* all branches */ + ita2_inp.pfp_ita2_btb.btb_ptm = 0x1; /* target mispredicted */ + ita2_inp.pfp_ita2_btb.btb_ppm = 0x1; /* mispredicted path */ + ita2_inp.pfp_ita2_btb.btb_brt = 0x0; /* all types captured */ + ita2_inp.pfp_ita2_btb.btb_plm = PFM_PLM3; + + if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event BRANCH_EVENT\n"); + } + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + * PMD16 is part of the set of used PMD returned by libpfm. + * It will be reset automatically + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * indicate we want notification when buffer is full + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + pfm_bv_set(pd[0].reg_smpl_pmds, 16); + + entry_size = sizeof(btb_entry_t) + 1 * 8; + + for(i=8; i < 16; i++) { + pfm_bv_set(pd[0].reg_smpl_pmds, i); + entry_size += 8; + } + + /* + * When our counter overflows, we want to BTB index to be reset, so that we keep + * in sync. This is required to make it possible to interpret pmd16 on overflow + * to avoid repeating the same branch several times. + */ + pfm_bv_set(pd[0].reg_reset_pmds, 16); + + /* + * Now program the registers + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + + process_smpl_buffer(); + + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita2_dear.c b/src/libpfm-3.y/examples_v2.x/ia64/ita2_dear.c new file mode 100644 index 0000000..ca97f49 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita2_dear.c @@ -0,0 +1,416 @@ +/* + * ita2_dear.c - example of how use the D-EAR with the Itanium 2 PMU + * + * Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define SMPL_PERIOD (40) + +#define EVENT_NAME "data_ear_cache_lat4" + +typedef pfm_dfl_smpl_hdr_t dear_hdr_t; +typedef pfm_dfl_smpl_entry_t dear_entry_t; +typedef pfm_dfl_smpl_arg_t smpl_arg_t; + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("line = %d No memory available!\n", __LINE__); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita2_pmd_reg_t*)(ent+1); + + safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); + + reg++; + + safe_printf("PMD3 : 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd3_ita2_reg.dear_latency); + + reg++; + + safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd17_ita2_reg.dear_vl ? 'Y': 'N', + (reg->pmd17_ita2_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd17_ita2_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (pfm_restart(id) == -1) { + perror("pfm_restart"); + exit(1); + } +} + +int +main(void) +{ + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_ctx_t ctx; + smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event %s\n", EVENT_NAME); + } + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_ctx_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_ctx_t + * with what is needed fot this format. + */ + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * indicate we want notification when buffer is full + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + pfm_bv_set(pd[0].reg_smpl_pmds, 2); + pfm_bv_set(pd[0].reg_smpl_pmds, 3); + pfm_bv_set(pd[0].reg_smpl_pmds, 17); + entry_size = sizeof(dear_entry_t) + 3 * 8; + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita2_irr.c b/src/libpfm-3.y/examples_v2.x/ia64/ita2_irr.c new file mode 100644 index 0000000..ac9ba7c --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita2_irr.c @@ -0,0 +1,382 @@ +/* + * ita2_irr.c - example of how to use code range restriction with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define VECTOR_SIZE 1000000UL + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } + printf("done saxpy\n"); +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } + printf("done saxpy2\n"); +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfmlib_ita2_output_param_t ita2_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_pmc_t ibrs[8]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + /* + * linker may reorder saxpy() and saxpy2() + */ + if (range_end < range_start) { + unsigned long tmp; + tmp = range_start; + range_start = range_end; + range_end = tmp; + } + + memset(pc, 0, sizeof(pc)); + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + memset(&ita2_outp,0, sizeof(ita2_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("cannot find %s event\n", p->event_name); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + * In the case of code range restriction on Itanium 2, the library will try to use the fine + * mode first and then it will default to using multiple pairs to cover the range. + */ + + ita2_inp.pfp_ita2_irange.rr_used = 1; /* indicate we use code range restriction */ + ita2_inp.pfp_ita2_irange.rr_limits[0].rr_start = range_start; + ita2_inp.pfp_ita2_irange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + ita2_outp.pfp_ita2_irange.rr_infos[0].rr_soff, + ita2_outp.pfp_ita2_irange.rr_infos[0].rr_eoff, + ita2_outp.pfp_ita2_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the syscall. The library does not know the type of the syscall + * anymore. The code debug registers start at PMC256 on all Itanium processors. + */ + for (i=0; i < ita2_outp.pfp_ita2_irange.rr_nbr_used; i++) { + ibrs[i].reg_num = 256+ita2_outp.pfp_ita2_irange.rr_br[i].reg_num; + ibrs[i].reg_value = ita2_outp.pfp_ita2_irange.rr_br[i].reg_value; + } + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + /* + * Program the code debug registers. + */ + if (pfm_write_pmcs(id, ibrs, ita2_outp.pfp_ita2_irange.rr_nbr_used) == -1) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("child: pfm_write_pmds error errno %d\n",errno); + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) + fatal_error("pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita2_opcode.c b/src/libpfm-3.y/examples_v2.x/ia64/ita2_opcode.c new file mode 100644 index 0000000..e47f789 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita2_opcode.c @@ -0,0 +1,277 @@ +/* + * ita2_opcode.c - example of how to use the opcode matcher with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + int ret; + int type = 0; + int id; + unsigned int i; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + + /* + * We indicate that we are using the PMC8 opcode matcher. This is required + * otherwise the library add PMC8 to the list of PMC to pogram during + * pfm_dispatch_events(). + */ + ita2_inp.pfp_ita2_pmc8.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]=4) and the btype field is 5 (which represents + * bits[6-8]) so it is included in the match/mask fields of PMC8. + * It is necessarily in a B slot. + * + * We don't care which operands are used with br.cloop therefore + * the mask field of pmc8 is set such that only the 4 bits of the + * opcode and 3 bits of btype must match exactly. This is accomplished by + * clearing the top 4 bits and bits [6-8] of the mask field and setting the + * remaining bits. Similarly, the match field only has the opcode value and btype + * set according to the encoding of br.cloop, the + * remaining bits are zero. Bit 60 of PMC8 is set to indicate + * that we look only in B slots (this is the only possibility for + * this instruction anyway). + * + * So the binary representation of the value for PMC8 is as follows: + * + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ---------------------------------------------------------------- + * 0001010000000000000000101000000000000011111111111111000111111000 + * + * which yields a value of 0x1400028003fff1f8. + * + * Depending on the level of optimization to compile this code, it may + * be that the count reported could be zero, if the compiler uses a br.cond + * instead of br.cloop. + * + * + * The 0x1 sets the ig_ad field to make sure we ignore any range restriction. + * Also bit 2 must always be set + */ + ita2_inp.pfp_ita2_pmc8.pmc_val = 0x1400028003fff1fa; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_IBRP0_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event IA64_TAGGED_INST_RETIRED_IBRP0_PMC8\n"); + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100UL); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) { + fatal_error("pfm_read_pmds error errno %d\n",errno); + } + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s\n", + pd[0].reg_num, + pd[0].reg_value, + name); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita2_rr.c b/src/libpfm-3.y/examples_v2.x/ia64/ita2_rr.c new file mode 100644 index 0000000..367c21e --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita2_rr.c @@ -0,0 +1,380 @@ +/* + * ita2_rr.c - example of how to use data range restriction with the Itanium2 PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + + + +#define TEST_DATA_COUNT 16 +#define N_LOOP 100000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * here we capture only misaligned_loads because it cannot + * be measured with misaligned_stores_retired at the same time + */ +static char *event_list[]={ + "misaligned_loads_retired", + NULL +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + char **p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita2_input_param_t ita2_inp; + pfmlib_ita2_output_param_t ita2_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id, num_pmcs = 0; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM2_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) { + fatal_error("cannot allocate test data structure"); + } + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) { + fatal_error("cannot allocate test data structure"); + } + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita2_inp,0, sizeof(ita2_inp)); + memset(&ita2_outp,0, sizeof(ita2_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields in rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + */ + + ita2_inp.pfp_ita2_drange.rr_used = 1; + ita2_inp.pfp_ita2_drange.rr_limits[0].rr_start = range_start; + ita2_inp.pfp_ita2_drange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita2_inp, &outp, &ita2_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + ita2_outp.pfp_ita2_drange.rr_nbr_used >> 1, + ita2_outp.pfp_ita2_drange.rr_infos[0].rr_soff, + ita2_outp.pfp_ita2_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++, num_pmcs++) { + pc[num_pmcs].reg_num = outp.pfp_pmcs[i].reg_num; + pc[num_pmcs].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the syscall. + */ + for (i=0; i < ita2_outp.pfp_ita2_drange.rr_nbr_used; i++, num_pmcs++) { + pc[num_pmcs].reg_num = 264+ita2_outp.pfp_ita2_drange.rr_br[i].reg_num; + pc[num_pmcs].reg_value = ita2_outp.pfp_ita2_drange.rr_br[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, num_pmcs) == -1) + fatal_error("child: pfm_write_pmc error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error( "child: pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + + for(i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) + fatal_error( "pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for misaligned loads. + * But it can be two when the test_data and test_data_fake + * are allocated very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, N_LOOP); + + if (pd[i].reg_value != N_LOOP) { + printf("error: Result should be 1 for %s\n", name); + break; + } + } + /* + * let's stop this now + */ + close(id); + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita_btb.c b/src/libpfm-3.y/examples_v2.x/ia64/ita_btb.c new file mode 100644 index 0000000..ee080fc --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita_btb.c @@ -0,0 +1,493 @@ +/* + * ita_btb.c - example of how use the BTB with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +typedef pfm_dfl_smpl_hdr_t btb_hdr_t; +typedef pfm_dfl_smpl_entry_t btb_entry_t; +typedef pfm_dfl_smpl_arg_t smpl_arg_t; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The BRANCH_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the BTB: a source and a target entry. The BTB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +/* + * We use a small buffer size to exercise the overflow handler + */ +#define SMPL_BUF_NENTRIES 64 + +static void *smpl_vaddr; +static unsigned int entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return 0;} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop; + } + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf + +static int +show_btb_reg(int j, pfm_ita_pmd_reg_t reg) +{ + int ret; + int is_valid = reg.pmd8_15_ita_reg.btb_b == 0 && reg.pmd8_15_ita_reg.btb_mp == 0 ? 0 :1; + + ret = safe_printf("\tPMD%-2d: 0x%016lx b=%d mp=%d valid=%c\n", + j, + reg.pmd_val, + reg.pmd8_15_ita_reg.btb_b, + reg.pmd8_15_ita_reg.btb_mp, + is_valid ? 'Y' : 'N'); + + if (!is_valid) return ret; + + if (reg.pmd8_15_ita_reg.btb_b) { + unsigned long addr; + + addr = reg.pmd8_15_ita_reg.btb_addr<<4; + addr |= reg.pmd8_15_ita_reg.btb_slot < 3 ? reg.pmd8_15_ita_reg.btb_slot : 0; + + ret = safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction: %s\n\n", + addr, + reg.pmd8_15_ita_reg.btb_slot < 3 ? 'Y' : 'N', + reg.pmd8_15_ita_reg.btb_mp ? "Failure" : "Success"); + } else { + ret = safe_printf("\t Target Address: 0x%016lx\n\n", + (unsigned long)(reg.pmd8_15_ita_reg.btb_addr<<4)); + } + return ret; +} + +static void +show_btb(pfm_ita_pmd_reg_t *btb, pfm_ita_pmd_reg_t *pmd16) +{ + int i, last; + + + i = (pmd16->pmd16_ita_reg.btbi_full) ? pmd16->pmd16_ita_reg.btbi_bbi : 0; + last = pmd16->pmd16_ita_reg.btbi_bbi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", i, last,pmd16->pmd16_ita_reg.btbi_bbi, pmd16->pmd16_ita_reg.btbi_full); + do { + show_btb_reg(i+8, btb[i]); + i = (i+1) % 8; + } while (i != last); +} + + +static void +process_smpl_buffer(void) +{ + btb_hdr_t *hdr; + btb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_ita_pmd_reg_t *reg, *pmd16; + unsigned long i; + int ret; + static unsigned long last_ovfl = ~0UL; + + + hdr = (btb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (btb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd8-pmd15 has the BTB. We have also + * included pmd16 (BTB index) has part of the registers to record. This trick + * allows us to get the index to decode the sequential order of the BTB. + * + * Recorded registers are always recorded in increasing order. So we know + * that pmd16 is at a fixed offset (+8*sizeof(unsigned long)) from pmd8. + */ + pmd16 = reg+8; + show_btb(reg, pmd16); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (pfm_restart(id) == -1) { + perror("pfm_restart"); + exit(1); + } +} + + +int +main(void) +{ + int ret; + int type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx; + smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + memset(&inp, 0, sizeof(inp)); + memset(&outp, 0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interesteed in all the mispredicted branches, + * therefore we program we set the various fields of the BTB config to: + */ + ita_inp.pfp_ita_btb.btb_used = 1; + + ita_inp.pfp_ita_btb.btb_tar = 0x1; + ita_inp.pfp_ita_btb.btb_tm = 0x2; + ita_inp.pfp_ita_btb.btb_ptm = 0x3; + ita_inp.pfp_ita_btb.btb_tac = 0x1; + ita_inp.pfp_ita_btb.btb_bac = 0x1; + ita_inp.pfp_ita_btb.btb_ppm = 0x3; + ita_inp.pfp_ita_btb.btb_plm = PFM_PLM3; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("BRANCH_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event BRANCH_EVENT\n"); + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + * PMD16 is part of the set of used PMD returned by libpfm. + * It will be reset automatically + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * indicate we want notification when buffer is full + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + pfm_bv_set(pd[0].reg_smpl_pmds, 16); + + entry_size = sizeof(btb_entry_t) + 1 * 8; + + for(i=8; i < 16; i++) { + pfm_bv_set(pd[0].reg_smpl_pmds, i); + entry_size += 8; + } + + /* + * When our counter overflows, we want to BTB index to be reset, so that we keep + * in sync. This is required to make it possible to interpret pmd16 on overflow + * to avoid repeating the same branch several times. + */ + pfm_bv_set(pd[0].reg_reset_pmds, 16); + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + + process_smpl_buffer(); + + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita_dear.c b/src/libpfm-3.y/examples_v2.x/ia64/ita_dear.c new file mode 100644 index 0000000..2f9d6fe --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita_dear.c @@ -0,0 +1,417 @@ +/* + * ita_dear.c - example of how use the D-EAR with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define EVENT_NAME "DATA_EAR_CACHE_LAT4" +#define SMPL_PERIOD (40) + +#define M_PMD(x) (1UL<<(x)) +#define DEAR_REGS_MASK (M_PMD(2)|M_PMD(3)|M_PMD(17)) + +typedef pfm_dfl_smpl_hdr_t dear_hdr_t; +typedef pfm_dfl_smpl_entry_t dear_entry_t; +typedef pfm_dfl_smpl_arg_t smpl_arg_t; + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + + /* + * walk through all the entries recored in the buffer + */ + for(i=0; i < hdr->hdr_count; i++) { + + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_ita_pmd_reg_t*)(ent+1); + + safe_printf("PMD2 : 0x%016lx\n", reg->pmd_val); + + reg++; + + safe_printf("PMD3 : 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd3_ita_reg.dear_latency); + + reg++; + + safe_printf("PMD17: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd17_ita_reg.dear_vl ? 'Y': 'N', + (reg->pmd17_ita_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd17_ita_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + /* dangerous */ + printf("Notification received\n"); + + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (pfm_restart(id) == -1) { + perror("pfm_restart"); + exit(1); + } +} + +int +main(void) +{ + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_ctx_t ctx; + smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) { + fatal_error("cannot find event %s\n", EVENT_NAME); + } + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM0|PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_ctx_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_ctx_t + * with what is needed fot this format. + */ + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * indicate we want notification when buffer is full + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + pfm_bv_set(pd[0].reg_smpl_pmds, 2); + pfm_bv_set(pd[0].reg_smpl_pmds, 3); + pfm_bv_set(pd[0].reg_smpl_pmds, 17); + entry_size = sizeof(dear_entry_t) + 3 * 8; + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(10000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita_irr.c b/src/libpfm-3.y/examples_v2.x/ia64/ita_irr.c new file mode 100644 index 0000000..4fbb1bc --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita_irr.c @@ -0,0 +1,384 @@ +/* + * ita_irr.c - example of how to use code range restriction with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define VECTOR_SIZE 1000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired_hi", 0UL} , + { "fp_ops_retired_lo", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfmlib_ita_output_param_t ita_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_pmc_t ibrs[8]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + unsigned long range_start, range_end; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + int ret, type = 0; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + /* + * linker may reorder saxpy() and saxpy2() + */ + if (range_end < range_start) { + unsigned long tmp; + tmp = range_start; + range_start = range_end; + range_end = tmp; + } + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&ita_outp,0, sizeof(ita_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", p->event_name); + } + } + + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields of rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + */ + + ita_inp.pfp_ita_irange.rr_used = 1; /* indicate we use code range restriction */ + ita_inp.pfp_ita_irange.rr_limits[0].rr_start = range_start; + ita_inp.pfp_ita_irange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + ita_outp.pfp_ita_irange.rr_infos[0].rr_soff, + ita_outp.pfp_ita_irange.rr_infos[0].rr_eoff, + ita_outp.pfp_ita_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("cannot create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for (i=0; i < outp.pfp_pmd_count; i++) { + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the syscall. The library does not know the type of the syscall + * anymore. IBRs are mapped to PMC256-PMC263 + */ + for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { + ibrs[i].reg_num = 256+ita_outp.pfp_ita_irange.rr_br[i].reg_num; + ibrs[i].reg_value = ita_outp.pfp_ita_irange.rr_br[i].reg_value; + } + + /* + * Program the code debug registers. + * + * IMPORTANT: programming the debug register MUST always be done before the PMCs + * otherwise the kernel will fail on PFM_WRITE_PMCS. This is for security reasons. + */ + if (pfm_write_pmcs(id, ibrs, ita_outp.pfp_ita_irange.rr_nbr_used) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) { + fatal_error("pfm_read_pmds error errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, + event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita_opcode.c b/src/libpfm-3.y/examples_v2.x/ia64/ita_opcode.c new file mode 100644 index 0000000..84c37c5 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita_opcode.c @@ -0,0 +1,272 @@ +/* + * ita_opcode.c - example of how to use the opcode matcher with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + int ret; + int type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(ctx, 0, sizeof(ctx)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&load_args,0, sizeof(load_args)); + + /* + * We indicate that we are using the PMC8 opcode matcher. This is required + * otherwise the library add PMC8 to the list of PMC to pogram during + * pfm_dispatch_events(). + */ + ita_inp.pfp_ita_pmc8.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]=4) and the btype field is 5 (which represents + * bits[6-8]) so it is included in the match/mask fields of PMC8. + * It is necessarily in a B slot. + * + * We don't care which operands are used with br.cloop therefore + * the mask field of pmc8 is set such that only the 4 bits of the + * opcode and 3 bits of btype must match exactly. This is accomplished by + * clearing the top 4 bits and bits [6-8] of the mask field and setting the + * remaining bits. Similarly, the match field only has the opcode value and btype + * set according to the encoding of br.cloop, the + * remaining bits are zero. Bit 60 of PMC8 is set to indicate + * that we look only in B slots (this is the only possibility for + * this instruction anyway). + * + * So the binary representation of the value for PMC8 is as follows: + * + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ---------------------------------------------------------------- + * 0001010000000000000000101000000000000011111111111111000111111000 + * + * which yields a value of 0x1400028003fff1f8. + * + * Depending on the level of optimization to compile this code, it may + * be that the count reported could be zero, if the compiler uses a br.cond + * instead of br.cloop. + */ + ita_inp.pfp_ita_pmc8.pmc_val = 0x1400028003fff1f8; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event("IA64_TAGGED_INST_RETIRED_PMC8", &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("Cannot find event IA64_TAGGED_INST_RETIRED_PMC8\n"); + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100UL); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) + fatal_error( "pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s\n", + pd[0].reg_num, + pd[0].reg_value, + name); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/ita_rr.c b/src/libpfm-3.y/examples_v2.x/ia64/ita_rr.c new file mode 100644 index 0000000..c3f67a9 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/ita_rr.c @@ -0,0 +1,407 @@ +/* + * ita_rr.c - example of how to use data range restriction with the Itanium PMU + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux/ia64. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define N_LOOP 100000000U + +#if defined(__ECC) && defined(__INTEL_COMPILER) + +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + +#define TEST_DATA_COUNT 16 + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_PMU_NAME_LEN 32 +#define MAX_EVT_NAME_LEN 128 + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + + +static event_desc_t event_list[]={ + { "misaligned_loads_retired", N_LOOP }, + { "misaligned_stores_retired", N_LOOP }, + { NULL, 0UL} +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_ita_input_param_t ita_inp; + pfmlib_ita_output_param_t ita_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_pmc_t dbrs[8]; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_ITANIUM_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) { + fatal_error("cannot allocate test data structure"); + } + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) { + fatal_error("cannot allocate test data structure"); + } + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(ctx, 0, sizeof(ctx)); + memset(dbrs,0, sizeof(dbrs)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&ita_inp,0, sizeof(ita_inp)); + memset(&ita_outp,0, sizeof(ita_outp)); + + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", p->event_name); + } + } + + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out, the actual + * start and end offsets of the generated range by checking the rr_soff and rr_eoff fields + * in the pfmlib_ita_output_param_t structure when coming back from the library call. + * + * Upon return, the pfmlib_ita_output_param_t.pfp_ita_drange.rr_dbr array is programmed and + * the number of entries used to cover the range is in rr_nbr_used. + */ + + /* + * We indicate that we are using a Data Range Restriction feature. + * In this particular case this will cause, pfm_dispatch_events() to + * add pmc13 to the list of PMC registers to initialize and the + */ + + ita_inp.pfp_ita_drange.rr_used = 1; + ita_inp.pfp_ita_drange.rr_limits[0].rr_start = range_start; + ita_inp.pfp_ita_drange.rr_limits[0].rr_end = range_end; + + + /* + * use the library to find the monitors to use + * + * upon return, cnt contains the number of entries + * used in pc[]. + */ + if ((ret=pfm_dispatch_events(&inp, &ita_inp, &outp, &ita_outp)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + } + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + ita_outp.pfp_ita_drange.rr_nbr_used >> 1, + ita_outp.pfp_ita_drange.rr_infos[0].rr_soff, + ita_outp.pfp_ita_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + id =pfm_create_context(ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("cannot create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * the PMC controlling the event ALWAYS come first, that's why this loop + * is safe even when extra PMC are needed to support a particular event. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pd[i].reg_num = pc[i].reg_num; + } + + /* + * propagate the setup for the debug registers from the library to the arguments + * to the syscall. The library does not know the type of the syscall + * anymore. DBRS are ampped at PMC264+PMC271 + */ + for (i=0; i < ita_outp.pfp_ita_drange.rr_nbr_used; i++) { + dbrs[i].reg_num = 264+ita_outp.pfp_ita_drange.rr_br[i].reg_num; + dbrs[i].reg_value = ita_outp.pfp_ita_drange.rr_br[i].reg_value; + } + + /* + * Program the data debug registers. + */ + if (pfm_write_pmcs(id, dbrs, ita_outp.pfp_ita_drange.rr_nbr_used) == -1) { + fatal_error("pfm_write_pmcs error errno %d\n",errno); + } + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count) == -1) { + fatal_error("pfm_write_pmcs error errno %d\n",errno); + } + + if (pfm_write_pmds(id, pd, inp.pfp_event_count) == -1) { + fatal_error("pfm_write_pmds error errno %d\n",errno); + } + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + for (i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) { + fatal_error( "pfm_read_pmds error errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for both misaligned loads + * and misaligned stores. But it can be two when the test_data and test_data_fake + * are allocate very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + + if (pd[i].reg_value != event_list[i].expected_value) { + printf("error: Result should be %lu for %s\n", event_list[i].expected_value, name); + break; + } + } + /* + * let's stop this now + */ + close(id); + + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/mont_dear.c b/src/libpfm-3.y/examples_v2.x/ia64/mont_dear.c new file mode 100644 index 0000000..5bc039f --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/mont_dear.c @@ -0,0 +1,412 @@ +/* + * mont_dear.c - example of how use the D-EAR with the Dual-core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define SMPL_PERIOD (40) + +#define EVENT_NAME "data_ear_cache_lat4" + +typedef pfm_dfl_smpl_hdr_t dear_hdr_t; +typedef pfm_dfl_smpl_entry_t dear_entry_t; +typedef pfm_dfl_smpl_arg_t dear_smpl_arg_t; + +static void *smpl_vaddr; +static unsigned long entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +long +do_test(unsigned long size) +{ + unsigned long i, sum = 0; + int *array; + + printf("buffer size %.1fMB\n", (size*sizeof(int))/1024.0); + array = (int *)malloc(size * sizeof(int)); + if (array == NULL ) { + printf("line = %d No memory available!\n", __LINE__); + exit(1); + } + for(i=0; ihdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + count = hdr->hdr_count; + + /* + * walk through all the entries recored in the buffer + */ + while(count--) { + ret = 0; + + ent = (dear_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_mont_pmd_reg_t*)(ent+1); + + safe_printf("PMD32: 0x%016lx\n", reg->pmd32_mont_reg.dear_daddr); + + reg++; + + safe_printf("PMD33: 0x%016lx, latency %u\n", + reg->pmd_val, + reg->pmd33_mont_reg.dear_latency); + + reg++; + + safe_printf("PMD36: 0x%016lx, valid %c, address 0x%016lx\n", + reg->pmd_val, + reg->pmd36_mont_reg.dear_vl ? 'Y': 'N', + (reg->pmd36_mont_reg.dear_iaddr << 4) | + (unsigned long)reg->pmd36_mont_reg.dear_slot); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + process_smpl_buffer(); + /* + * And resume monitoring + */ + if (pfm_restart(id)) + fatal_error("pfm_restart"); +} + +int +main(void) +{ + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_ctx_t ctx; + dear_smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + int ret, type = 0; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(EVENT_NAME, &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event %s\n", EVENT_NAME); + + /* + * set the (global) privilege mode: + * PFM_PLM0 : kernel level only + */ + inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + * + * We use all global settings for this EAR. + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * prepare context structure. + * + * format specific parameters MUST be concatenated to the regular + * pfarg_ctx_t structure. For convenience, the default sampling + * format provides a data structure that already combines the pfarg_ctx_t + * with what is needed fot this format. + */ + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * indicate we want notification when buffer is full + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + pfm_bv_set(pd[0].reg_smpl_pmds, 32); + pfm_bv_set(pd[0].reg_smpl_pmds, 33); + pfm_bv_set(pd[0].reg_smpl_pmds, 36); + entry_size = sizeof(dear_entry_t) + 3 * 8; + + /* + * initialize the PMD and the sampling period + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * attach context to stopped task + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(100000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer, i.e. which may not be full + */ + process_smpl_buffer(); + + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/mont_etb.c b/src/libpfm-3.y/examples_v2.x/ia64/mont_etb.c new file mode 100644 index 0000000..d8a2a5f --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/mont_etb.c @@ -0,0 +1,498 @@ +/* + * mont_btb.c - example of how use the ETB with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef pfm_dfl_smpl_hdr_t etb_hdr_t; +typedef pfm_dfl_smpl_entry_t etb_entry_t; +typedef pfm_dfl_smpl_arg_t smpl_arg_t; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * The ETB_EVENT is increment by 1 for each branch event. Such event is composed of + * two entries in the ETB: a source and a target entry. The ETB is full after 4 branch + * events. + */ +#define SMPL_PERIOD (4UL*256) + +/* + * We use a small buffer size to exercise the overflow handler + */ +#define SMPL_BUF_NENTRIES 64 + +static void *smpl_vaddr; +static size_t entry_size; +static int id; + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +/* + * we don't use static to make sure the compiler does not inline the function + */ +long func1(void) { return random();} +long func2(void) { return random();} + +long +do_test(unsigned long loop) +{ + long sum = 0; + + while(loop--) { + if (loop & 0x1) + sum += func1(); + else + sum += loop + func2(); + } + return sum; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * print content of sampling buffer + * + * XXX: using stdio to print from a signal handler is not safe with multi-threaded + * applications + */ +#define safe_printf printf + +static void +show_etb_reg(int j, pfm_mont_pmd_reg_t reg, pfm_mont_pmd_reg_t pmd39) +{ + unsigned long bruflush, b1, etb_ext; + unsigned long addr; + int is_valid; + + is_valid = reg.pmd48_63_etb_mont_reg.etb_s == 0 && reg.pmd48_63_etb_mont_reg.etb_mp == 0 ? 0 : 1; + + /* + * the joy of the ETB extension register layout! + */ + if (j < 8) + etb_ext = (pmd39.pmd_val>>(8*j)) & 0xf; + else + etb_ext = (pmd39.pmd_val>>(4+8*(j-8))) & 0xf; + + b1 = etb_ext & 0x1; + bruflush = (etb_ext >> 1) & 0x1; + + safe_printf("\tPMD%-2d: 0x%016lx s=%d mp=%d bru=%ld b1=%ld valid=%c\n", + j+48, + reg.pmd_val, + reg.pmd48_63_etb_mont_reg.etb_s, + reg.pmd48_63_etb_mont_reg.etb_mp, + bruflush, b1, + is_valid ? 'Y' : 'N'); + + + if (!is_valid) return; + + if (reg.pmd48_63_etb_mont_reg.etb_s) { + addr = (reg.pmd48_63_etb_mont_reg.etb_addr+b1)<<4; + addr |= reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? reg.pmd48_63_etb_mont_reg.etb_slot : 0; + + safe_printf("\t Source Address: 0x%016lx\n" + "\t Taken=%c Prediction:%s\n\n", + addr, + reg.pmd48_63_etb_mont_reg.etb_slot < 3 ? 'Y' : 'N', + reg.pmd48_63_etb_mont_reg.etb_mp ? "FE Failure" : + bruflush ? "BE Failure" : "Success"); + } else { + safe_printf("\t Target Address:0x%016lx\n\n", + (unsigned long)(reg.pmd48_63_etb_mont_reg.etb_addr<<4)); + } +} + +static void +show_etb(pfm_mont_pmd_reg_t *etb) +{ + int i, last; + pfm_mont_pmd_reg_t pmd38, pmd39; + + pmd38.pmd_val = etb[0].pmd_val; + pmd39.pmd_val = etb[1].pmd_val; + + i = pmd38.pmd38_mont_reg.etbi_full ? pmd38.pmd38_mont_reg.etbi_ebi : 0; + last = pmd38.pmd38_mont_reg.etbi_ebi; + + safe_printf("btb_trace: i=%d last=%d bbi=%d full=%d\n", + i, + last, + pmd38.pmd38_mont_reg.etbi_ebi, + pmd38.pmd38_mont_reg.etbi_full); + + /* + * i+2 = skip over PMD38/pmd39 + */ + do { + show_etb_reg(i, etb[i+2], pmd39); + i = (i+1) % 16; + } while (i != last); +} + +void +process_smpl_buffer(void) +{ + etb_hdr_t *hdr; + etb_entry_t *ent; + unsigned long pos; + unsigned long smpl_entry = 0; + pfm_mont_pmd_reg_t *reg; + size_t count; + static unsigned long last_ovfl = ~0UL; + + + hdr = (etb_hdr_t *)smpl_vaddr; + + /* + * check that we are not diplaying the previous set of samples again. + * Required to take care of the last batch of samples. + */ + if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL) { + printf("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl); + return; + } + + pos = (unsigned long)(hdr+1); + count = hdr->hdr_count; + /* + * walk through all the entries recored in the buffer + */ + while(count--) { + + ent = (etb_entry_t *)pos; + /* + * print entry header + */ + safe_printf("Entry %ld PID:%d TID:%d CPU:%d STAMP:0x%lx IIP:0x%016lx\n", + smpl_entry++, + ent->tgid, + ent->pid, + ent->cpu, + ent->tstamp, + ent->ip); + + /* + * point to first recorded register (always contiguous with entry header) + */ + reg = (pfm_mont_pmd_reg_t*)(ent+1); + + /* + * in this particular example, we have pmd48-pmd63 has the ETB. We have also + * included pmd38/pmd39 (ETB index and extenseion) has part of the registers + * to record. This trick allows us to get the index to decode the sequential + * order of the ETB. + * + * Recorded registers are always recorded in increasing index order. So we know + * that where to find pmd38/pmd39. + */ + show_etb(reg); + + /* + * move to next entry + */ + pos += entry_size; + } +} + +static void +overflow_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + process_smpl_buffer(); + + /* + * And resume monitoring + */ + if (pfm_restart(id)) + fatal_error("pfm_restart errno %d\n", errno); +} + + +int +main(void) +{ + int ret; + int type = 0; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfarg_ctx_t ctx; + smpl_arg_t buf_arg; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + + /* + * Install the overflow handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)overflow_handler; + sigaction (SIGIO, &act, 0); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 0; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + memset(&buf_arg, 0, sizeof(buf_arg)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + + /* + * Before calling pfm_find_dispatch(), we must specify what kind + * of branches we want to capture. We are interested in all taken + * branches * therefore we program we set the various fields to: + */ + mont_inp.pfp_mont_etb.etb_used = 1; + + mont_inp.pfp_mont_etb.etb_tm = 0x2; + mont_inp.pfp_mont_etb.etb_ptm = 0x3; + mont_inp.pfp_mont_etb.etb_ppm = 0x3; + mont_inp.pfp_mont_etb.etb_brt = 0x0; + mont_inp.pfp_mont_etb.etb_plm = PFM_PLM3; + + if (pfm_find_full_event("ETB_EVENT", &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event ETB_EVENT\n"); + + /* + * set the (global) privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * the size of the buffer is indicated in bytes (not entries). + * + * The kernel will record into the buffer up to a certain point. + * No partial samples are ever recorded. + */ + buf_arg.buf_size = getpagesize(); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, "default", &buf_arg, sizeof(buf_arg)); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * retrieve the virtual address at which the sampling + * buffer has been mapped + */ + smpl_vaddr = mmap(NULL, (size_t)buf_arg.buf_size, PROT_READ, MAP_PRIVATE, id, 0); + if (smpl_vaddr == MAP_FAILED) + fatal_error("cannot mmap sampling buffer errno %d\n", errno); + + printf("Sampling buffer mapped at %p\n", smpl_vaddr); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + * PMD38 is part of the set of used PMD returned by libpfm. + * It will be reset automatically + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * indicate we want notification when buffer is full and randomization + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY | PFM_REGFL_RANDOM; + + /* + * Now prepare the argument to initialize the PMD and the sampling period + * We know we use only one PMD in this case, therefore pmd[0] corresponds + * to our first event which is our sampling period. + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * populate our smpl_pmds bitmask to include all of the ETB PMDs, + * including index, extensions + */ + pfm_bv_set(pd[0].reg_smpl_pmds, 38); + pfm_bv_set(pd[0].reg_smpl_pmds, 39); + + entry_size = sizeof(etb_entry_t) + 2 * 8; + + for(i=48; i < 64; i++) { + pfm_bv_set(pd[0].reg_smpl_pmds, i); + entry_size += 8; + } + + /* + * When our counter overflows, we want to ETB index to be reset, so that we keep + * in sync. + */ + pfm_bv_set(pd[0].reg_reset_pmds, 38); + + /* + * Now program the registers + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(id, F_SETFL, fcntl(id, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(id, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(1000); + + pfm_self_stop(id); + + /* + * We must call the processing routine to cover the last entries recorded + * in the sampling buffer. Note that the buffer may not be full at this point. + * + */ + process_smpl_buffer(); + /* + * let's stop this now + */ + munmap(smpl_vaddr, (size_t)buf_arg.buf_size); + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/mont_irr.c b/src/libpfm-3.y/examples_v2.x/ia64/mont_irr.c new file mode 100644 index 0000000..d27c939 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/mont_irr.c @@ -0,0 +1,374 @@ +/* + * mont_irr.c - example of how to use code range restriction with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define VECTOR_SIZE 1000000UL + +typedef struct { + char *event_name; + unsigned long expected_value; +} event_desc_t; + +static event_desc_t event_list[]={ + { "fp_ops_retired", VECTOR_SIZE<<1 }, + { NULL, 0UL } +}; + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + + +void +saxpy(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } + printf("saxpy done\n"); +} + +void +saxpy2(double *a, double *b, double *c, unsigned long size) +{ + unsigned long i; + + for(i=0; i < size; i++) { + c[i] = 2*a[i] + b[i]; + } + printf("saxpy2 done\n"); +} + + + +static int +do_test(void) +{ + unsigned long size; + double *a, *b, *c; + + size = VECTOR_SIZE; + + a = malloc(size*sizeof(double)); + b = malloc(size*sizeof(double)); + c = malloc(size*sizeof(double)); + + if (a == NULL || b == NULL || c == NULL) fatal_error("Cannot allocate vectors\n"); + + memset(a, 0, size*sizeof(double)); + memset(b, 0, size*sizeof(double)); + memset(c, 0, size*sizeof(double)); + + saxpy(a,b,c, size); + saxpy2(a,b,c, size); + + return 0; +} + +int +main(int argc, char **argv) +{ + event_desc_t *p; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfmlib_mont_output_param_t mont_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_pmc_t ibrs[8]; + pfarg_ctx_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct fd { /* function descriptor */ + unsigned long addr; + unsigned long gp; + } *fd; + unsigned int i; + int id; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 1; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Compute the range we are interested in + * + * On IA-64, the function pointer does not point directly + * to the function but to a descriptor which contains two + * unsigned long: the first one is the actual start address + * of the function, the second is the gp (global pointer) + * to load into r1 before jumping into the function. Unlesss + * we're jumping into a shared library the gp is the same as + * the current gp. + * + * In the artificial example, we also rely on the compiler/linker + * NOT reordering code layout. We depend on saxpy2() being just + * after saxpy(). + * + */ + fd = (struct fd *)saxpy; + range_start = fd->addr; + + fd = (struct fd *)saxpy2; + range_end = fd->addr; + + /* + * linker may reorder saxpy() and saxpy2() + */ + if (range_end < range_start) { + unsigned long tmp; + tmp = range_start; + range_start = range_end; + range_end = tmp; + } + + memset(pc, 0, sizeof(pc)); + memset(pd, 0, sizeof(pd)); + memset(&ctx, 0, sizeof(ctx)); + memset(ibrs,0, sizeof(ibrs)); + memset(&load_args,0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + memset(&mont_outp,0, sizeof(mont_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; p->event_name ; i++, p++) { + if (pfm_find_event(p->event_name, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("cannot find %s event\n", p->event_name); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + * + * In the case of code range restriction on Itanium 2, the library will try to use the fine + * mode first and then it will default to using multiple pairs to cover the range. + */ + + mont_inp.pfp_mont_irange.rr_used = 1; /* indicate we use code range restriction */ + mont_inp.pfp_mont_irange.rr_limits[0].rr_start = range_start; + mont_inp.pfp_mont_irange.rr_limits[0].rr_end = range_end; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * print offsets + */ + printf("code range : [0x%016lx-0x%016lx)\n" + "start_offset:-0x%lx end_offset:+0x%lx\n" + "%d pairs of debug registers used\n", + range_start, + range_end, + mont_outp.pfp_mont_irange.rr_infos[0].rr_soff, + mont_outp.pfp_mont_irange.rr_infos[0].rr_eoff, + mont_outp.pfp_mont_irange.rr_nbr_used >> 1); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events cause extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * propagate IBR settings. IBRS are mapped to PMC256-PMC263 + */ + for (i=0; i < mont_outp.pfp_mont_irange.rr_nbr_used; i++) { + ibrs[i].reg_num = 256+mont_outp.pfp_mont_irange.rr_br[i].reg_num; + ibrs[i].reg_value = mont_outp.pfp_mont_irange.rr_br[i].reg_value; + } + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count)) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + /* + * Program the code debug registers. + */ + if (pfm_write_pmcs (id, ibrs, mont_outp.pfp_mont_irange.rr_nbr_used)) + fatal_error("child: pfm_write_pmcs error for IBRS errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count) == -1) + fatal_error("child: pfm_write_pmds error errno %d\n",errno); + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + if (pfm_load_context(id, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's roll now. + * + * We run two distinct copies of the same function but we restrict measurement + * to the first one (saxpy). Therefore the expected count is half what you would + * get if code range restriction was not used. The core loop in both case uses + * two floating point operation per iteration. + */ + pfm_self_start(id); + + do_test(); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count) == -1) { + fatal_error( "pfm_read_pmds error errno %d\n",errno); + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, event_list[i].expected_value); + } + /* + * let's stop this now + */ + close(id); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/mont_opcode.c b/src/libpfm-3.y/examples_v2.x/ia64/mont_opcode.c new file mode 100644 index 0000000..cd7ee7c --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/mont_opcode.c @@ -0,0 +1,264 @@ +/* + * mont_opcode.c - example of how to use the opcode matcher with the Dual-Core Itanium 2 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define OPCM_EVENT "IA64_TAGGED_INST_RETIRED_IBRP0_PMC32_33" + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +#define NLOOP 200UL + +/* + * we don't use static to make sure the compiler does not inline the function + */ +int +do_test(unsigned long loop) +{ + unsigned long sum = 0; + while(loop--) sum += loop; + return sum; +} + + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(void) +{ + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + int ret; + int type = 0; + int id; + unsigned int i; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with the %s PMU\n", model); + } + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + + /* + * We indicate that we are using the first opcode matcher (PMC32/PMC33). + */ + mont_inp.pfp_mont_opcm1.opcm_used = 1; + + /* + * We want to match all the br.cloop in our test function. + * This branch is an IP-relative branch for which the major + * opcode (bits [40-37]) is 4 and the btype field (bits[6-8]) is 5. + * We ignore all the other fields in the opcode. + * + * On Montecito, the opcode matcher covers the full 41 bits of each + * instruction but we'll ignore them in this example. Hence the + * match value is: + * + * match = (4<<37)| (5<<6) = 0x8000000140 + * + * On Montecito, the match field covers the full 41 bits of each instruction. + * But for this example, we only care about the major and btype field, + * and we ignore all other bits. When a bit is set in the mask it means + * that the corresponding match bit value is a "don't care". A bit + * with value of zero indicates that the corresponding match bit + * must match. Hence we build the following mask: + * + * mask = ~((0xf<<37) | (0x3<<6)) = 0x1fffffff3f; + * + * The 0xf comes from the fact that major opcode is 4-bit wide. + * The 0x3 comes from the fact that btype is 3-bit wide. + */ + mont_inp.pfp_mont_opcm1.opcm_b = 1; + mont_inp.pfp_mont_opcm1.opcm_match = 0x8000000140; + mont_inp.pfp_mont_opcm1.opcm_mask = 0x1fffffff3f; + + /* + * To count the number of occurence of this instruction, we must + * program a counting monitor with the IA64_TAGGED_INST_RETIRED_PMC8 + * event. + */ + if (pfm_find_full_event(OPCM_EVENT, &inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find event %s\n", OPCM_EVENT); + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = 1; + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more thann coutning monitors. + */ + if (pfm_write_pmcs(id, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's roll now. + */ + pfm_self_start(id); + + do_test(NLOOP); + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count)) + fatal_error("pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + */ + pfm_get_full_event_name(&inp.pfp_events[0], name, MAX_EVT_NAME_LEN); + + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[0].reg_num, + pd[0].reg_value, + name, NLOOP); + + if (pd[0].reg_value != 0) + printf("compiler used br.cloop\n"); + else + printf("compiler did not use br.cloop\n"); + + /* + * let's stop this now + */ + close(id); + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/ia64/mont_rr.c b/src/libpfm-3.y/examples_v2.x/ia64/mont_rr.c new file mode 100644 index 0000000..1bf6a43 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/ia64/mont_rr.c @@ -0,0 +1,376 @@ +/* + * mont_rr.c - example of how to use data range restriction with the Itanium 3 PMU + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if defined(__ECC) && defined(__INTEL_COMPILER) +/* if you do not have this file, your compiler is too old */ +#include + +#define clear_psr_ac() __rum(1UL<<3) + +#elif defined(__GNUC__) + +static inline void +clear_psr_ac(void) +{ + __asm__ __volatile__("rum psr.ac;;" ::: "memory" ); +} +#else +#error "You need to define clear_psr_ac() for your compiler" +#endif + + + +#define TEST_DATA_COUNT 16 +#define N_LOOP 100000000UL + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +#define MAX_EVT_NAME_LEN 128 +#define MAX_PMU_NAME_LEN 32 + +/* + * here we capture only misaligned_loads because it cannot + * be measured with misaligned_stores_retired at the same time + */ +static char *event_list[]={ + "misaligned_loads_retired", + NULL +}; + + +typedef union { + unsigned long l_tab[2]; + unsigned int i_tab[4]; + unsigned short s_tab[8]; + unsigned char c_tab[16]; +} test_data_t; + +static int +do_test(test_data_t *data) +{ + unsigned int *l, v; + + l = (unsigned int *)(data->c_tab+1); + + if (((unsigned long)l & 0x1) == 0) { + printf("Data is not unaligned, can't run test\n"); + return -1; + } + + v = *l; + v++; + *l = v; + + return 0; +} + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +int +main(int argc, char **argv) +{ + char **p; + test_data_t *test_data, *test_data_fake; + unsigned long range_start, range_end; + int ret, type = 0; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_mont_input_param_t mont_inp; + pfmlib_mont_output_param_t mont_outp; + pfarg_pmd_t pd[NUM_PMDS]; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_ctx_t ctx; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + unsigned int i; + int id, num_pmcs = 0; + char name[MAX_EVT_NAME_LEN]; + + /* + * Initialize pfm library (required before we can use it) + */ + if (pfm_initialize() != PFMLIB_SUCCESS) + fatal_error("Can't initialize library\n"); + + /* + * Let's make sure we run this on the right CPU family + */ + pfm_get_pmu_type(&type); + if (type != PFMLIB_MONTECITO_PMU) { + char model[MAX_PMU_NAME_LEN]; + pfm_get_pmu_name(model, MAX_PMU_NAME_LEN); + fatal_error("this program does not work with %s PMU\n", model); + } + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for debug */ + pfm_set_options(&pfmlib_options); + + /* + * now let's allocate the data structure we will be monitoring + */ + test_data = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data == NULL) + fatal_error("cannot allocate test data structure"); + + test_data_fake = (test_data_t *)malloc(sizeof(test_data_t)*TEST_DATA_COUNT); + if (test_data_fake == NULL) + fatal_error("cannot allocate test data structure"); + + /* + * Compute the range we are interested in + */ + range_start = (unsigned long)test_data; + range_end = range_start + sizeof(test_data_t)*TEST_DATA_COUNT; + + memset(pd, 0, sizeof(pd)); + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + + /* + * prepare parameters to library. we don't use any Itanium + * specific features here. so the pfp_model is NULL. + */ + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + memset(&mont_inp,0, sizeof(mont_inp)); + memset(&mont_outp,0, sizeof(mont_outp)); + + /* + * find requested event + */ + p = event_list; + for (i=0; *p ; i++, p++) { + if (pfm_find_event(*p, &inp.pfp_events[i].event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event\n", *p); + } + } + + /* + * set the privilege mode: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + /* + * We use the library to figure out how to program the debug registers + * to cover the data range we are interested in. The rr_end parameter + * must point to the byte after the last element of the range (C-style range). + * + * Because of the masking mechanism and therefore alignment constraints used to implement + * this feature, it may not be possible to exactly cover a given range. It may be that + * the coverage exceeds the desired range. So it is possible to capture noise if + * the surrounding addresses are also heavily used. You can figure out by how much the + * actual range is off compared to the requested range by checking the rr_soff and rr_eoff + * fields in rr_infos on return from the library call. + * + * Upon return, the rr_dbr array is programmed and the number of debug registers (not pairs) + * used to cover the range is in rr_nbr_used. + */ + + mont_inp.pfp_mont_drange.rr_used = 1; + mont_inp.pfp_mont_drange.rr_limits[0].rr_start = range_start; + mont_inp.pfp_mont_drange.rr_limits[0].rr_end = range_end; + + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, &mont_inp, &outp, &mont_outp)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); + + printf("data range : [0x%016lx-0x%016lx): %d pair of debug registers used\n" + "start_offset:-0x%lx end_offset:+0x%lx\n", + range_start, + range_end, + mont_outp.pfp_mont_drange.rr_nbr_used >> 1, + mont_outp.pfp_mont_drange.rr_infos[0].rr_soff, + mont_outp.pfp_mont_drange.rr_infos[0].rr_eoff); + + printf("fake data range: [0x%016lx-0x%016lx)\n", + (unsigned long)test_data_fake, + (unsigned long)test_data_fake+sizeof(test_data_t)*TEST_DATA_COUNT); + + /* + * now create the context for self monitoring/per-task + */ + id = pfm_create_context(&ctx, NULL, NULL, 0); + if (id == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * Now prepare the argument to initialize the PMDs and PMCS. + * We must pfp_pmc_count to determine the number of PMC to intialize. + * We must use pfp_event_count to determine the number of PMD to initialize. + * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. + * + * This step is new compared to libpfm-2.x. It is necessary because the library no + * longer knows about the kernel data structures. + */ + + for (i=0; i < outp.pfp_pmc_count; i++, num_pmcs++) { + pc[num_pmcs].reg_num = outp.pfp_pmcs[i].reg_num; + pc[num_pmcs].reg_value = outp.pfp_pmcs[i].reg_value; + } + + /* + * propagate the setup for the data debug registers. DBRS are mapped + * at PMC264-PMC271 + */ + for (i=0; i < mont_outp.pfp_mont_drange.rr_nbr_used; i++, num_pmcs++) { + pc[num_pmcs].reg_num = 264+mont_outp.pfp_mont_drange.rr_br[i].reg_num; + pc[num_pmcs].reg_value = mont_outp.pfp_mont_drange.rr_br[i].reg_value; + } + + /* + * figure out pmd mapping from output pmc + */ + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + + /* + * Now program the registers + * + * We don't use the save variable to indicate the number of elements passed to + * the kernel because, as we said earlier, pc may contain more elements than + * the number of events we specified, i.e., contains more than coutning monitors. + */ + if (pfm_write_pmcs(id, pc, num_pmcs)) + fatal_error("child: pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(id, pd, outp.pfp_pmd_count)) + fatal_error( "child: pfm_write_pmds error errno %d\n",errno); + + /* + * now we load (i.e., attach) the context to ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(id, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * Let's make sure that the hardware does the unaligned accesses (do not use the + * kernel software handler otherwise the PMU won't see the unaligned fault). + */ + clear_psr_ac(); + + /* + * Let's roll now. + * + * The idea behind this test is to have two dynamically allocated data structures + * which are access in a unaligned fashion. But we want to capture only the unaligned + * accesses on one of the two. So the debug registers are programmed to cover the + * first one ONLY. Then we activate monotoring and access the two data structures. + * This is an artificial example just to demonstrate how to use data address range + * restrictions. + */ + pfm_self_start(id); + + for(i=0; i < N_LOOP; i++) { + do_test(test_data); + do_test(test_data_fake); + } + + pfm_self_stop(id); + + /* + * now read the results + */ + if (pfm_read_pmds(id, pd, inp.pfp_event_count)) + fatal_error("pfm_read_pmds error errno %d\n",errno); + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + * For this example, we expect to see a value of 1 for misaligned loads. + * But it can be two when the test_data and test_data_fake + * are allocated very close from each other and the range created with the debug + * registers is larger then test_data. + * + */ + for (i=0; i < inp.pfp_event_count; i++) { + pfm_get_full_event_name(&inp.pfp_events[i], name, MAX_EVT_NAME_LEN); + printf("PMD%-3u %20lu %s (expected %lu)\n", + pd[i].reg_num, + pd[i].reg_value, + name, N_LOOP); + + if (pd[i].reg_value != N_LOOP) { + printf("error: Result should be 1 for %s\n", name); + break; + } + } + /* + * let's stop this now + */ + close(id); + free(test_data); + free(test_data_fake); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/multiplex.c b/src/libpfm-3.y/examples_v2.x/multiplex.c new file mode 100644 index 0000000..6164f27 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/multiplex.c @@ -0,0 +1,1147 @@ +/* + * multiplex2.c - example of kernel-level time-based or overflow-based event multiplexing + * + * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#ifndef _GNU_SOURCE + #define _GNU_SOURCE /* for getline */ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "detect_pmcs.h" + +#define MIN_FULL_PERIODS 2 + +#define MAX_EVT_NAME_LEN 128 + +#define MULTIPLEX_VERSION "0.2" + +#define SMPL_FREQ_IN_HZ 300 + +#define NUM_PMCS PMU_MAX_PMCS +#define NUM_PMDS PMU_MAX_PMDS + +#define MAX_NUM_COUNTERS NUM_PMDS +#define MAX_PMU_NAME_LEN 32 + +typedef struct { + struct { + int opt_plm; /* which privilege level to monitor (more than one possible) */ + int opt_debug; /* print debug information */ + int opt_verbose; /* verbose output */ + int opt_us_format; /* print large numbers with comma for thousands */ + int opt_ovfl_switch; /* overflow-based switching */ + int opt_is_system; /* use system-wide */ + int opt_intr_only; /* interrupts only*/ + int opt_no_cmd_out; /* redirect cmd output to /dev/null */ + int opt_no_header; /* no header */ + } program_opt_flags; + + unsigned long max_counters; /* maximum number of counter for the platform */ + unsigned long session_timeout; + uint64_t smpl_period; + uint32_t smpl_freq; + + unsigned long cpu_mhz; + + pid_t attach_pid; + int pin_cmd_cpu; + int pin_cpu; + struct timespec switch_timeout; +} program_options_t; + +#define opt_plm program_opt_flags.opt_plm +#define opt_debug program_opt_flags.opt_debug +#define opt_verbose program_opt_flags.opt_verbose +#define opt_us_format program_opt_flags.opt_us_format +#define opt_ovfl_switch program_opt_flags.opt_ovfl_switch +#define opt_is_system program_opt_flags.opt_is_system +#define opt_intr_only program_opt_flags.opt_intr_only +#define opt_no_cmd_out program_opt_flags.opt_no_cmd_out +#define opt_no_header program_opt_flags.opt_no_header + +typedef struct _event_set_t { + struct _event_set_t *next; + unsigned short id; + unsigned int n_events; + unsigned int pmcs_base; + unsigned int pmds_base; + int npmcs; + int npmds; + unsigned long set_runs; + char *event_str; +} event_set_t; + +static program_options_t options; + +static pfarg_pmc_t *all_pmcs; +static pfarg_pmd_t *all_pmds; +static uint64_t *all_values; +static event_set_t *current_set, *all_sets; + +static unsigned int num_pmds, num_pmcs, num_sets, total_events; +static unsigned long full_periods; +static volatile int time_to_quit; +static jmp_buf jbuf; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +vbprintf(char *fmt, ...) +{ + va_list ap; + + if (options.opt_verbose == 0) return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * unreliable for CPU with variable clock speed + */ +static unsigned long +get_cpu_speed(void) +{ + FILE *fp1; + unsigned long f1 = 0, f2 = 0; + char buffer[128], *p, *value; + + memset(buffer, 0, sizeof(buffer)); + + fp1 = fopen("/proc/cpuinfo", "r"); + if (fp1 == NULL) return 0; + + for (;;) { + buffer[0] = '\0'; + + p = fgets(buffer, 127, fp1); + if (p == NULL) + break; + + /* skip blank lines */ + if (*p == '\n') continue; + + p = strchr(buffer, ':'); + if (p == NULL) + break; + + /* + * p+2: +1 = space, +2= firt character + * strlen()-1 gets rid of \n + */ + *p = '\0'; + value = p+2; + + value[strlen(value)-1] = '\0'; + + if (!strncmp("cpu MHz", buffer, 7)) { + float fl; + sscanf(value, "%f", &fl); + f1 = lroundf(fl); + break; + } + if (!strncmp("BogoMIPS", buffer, 8)) { + float fl; + sscanf(value, "%f", &fl); + f2 = lroundf(fl); + } + } + fclose(fp1); + return f1 == 0 ? f2 : f1; +} + +/* + * pin task to CPU + */ +#ifndef __NR_sched_setaffinity +#error "you need to define __NR_sched_setaffinity" +#endif + +#define MAX_CPUS 2048 +#define NR_CPU_BITS (MAX_CPUS>>3) +int +pin_cpu(pid_t pid, unsigned int cpu) +{ + uint64_t my_mask[NR_CPU_BITS]; + + if (cpu >= MAX_CPUS) + fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); + + my_mask[cpu>>6] = 1ULL << (cpu&63); + + return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); +} + +int +child(char **arg) +{ + ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + if (options.pin_cmd_cpu != -1) { + pin_cpu(getpid(), options.pin_cmd_cpu); + vbprintf("command running on CPU core %d\n", options.pin_cmd_cpu); + } + + if (options.opt_no_cmd_out) { + close(1); + close(2); + } + execvp(arg[0], arg); + /* not reached */ + + exit(1); +} + +static void +dec2sep(char *str2, char *str, char sep) +{ + int i, l, b, j, c=0; + + l = strlen(str2); + if (l <= 3) { + strcpy(str, str2); + return; + } + b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */ + for(i=l, j=0; i >= 0; i--, j++) { + if (j) c++; + str[b-j] = str2[i]; + if (c == 3 && i>0) { + str[b-++j] = sep; + c = 0; + } + } +} + +static void +print_results(void) +{ + unsigned int i, j, cnt; + int ovfl_adj; + uint64_t value, set_runs; + event_set_t *e; + char *p; + char tmp1[32], tmp2[32], *str; + char mtotal_str[32], *mtotal; + char stotal_str[32], *stotal; + + if (full_periods < num_sets) + fatal_error("not all sets have been activated, need to run longer %lu\n", full_periods); + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + if (options.opt_no_header == 0) { + printf("# %u Hz period = %u usecs\n# %"PRIu64" cycles @ %lu MHz\n", + options.smpl_freq, + 1000000 / options.smpl_freq, + options.smpl_period, + options.cpu_mhz); + + if (options.opt_ovfl_switch == 0) + printf("# using time-based multiplexing\n" + "# %uus effective switch timeout\n", + 1000000 / options.smpl_freq); + else + printf("# using overflow-based multiplexing\n"); + + if (options.opt_is_system) + printf("# system-wide mode on CPU core %d\n",options.pin_cpu); + + printf("# %d sets\n", num_sets); + printf("# %.2f average run per set\n", (double)full_periods/num_sets); + printf("# set measured total #runs scaled total event name\n"); + printf("# ------------------------------------------------------------------\n"); + } + ovfl_adj= options.opt_ovfl_switch ? 1 : 0; + + for (i=0, e = all_sets, cnt = 0; i < num_sets; i++, e = e->next) { + + set_runs = e->set_runs; + + str = e->event_str; + + for(j=0; j < e->npmds-ovfl_adj; j++, cnt++) { + + value = all_values[j+e->pmds_base]; + + sprintf(tmp1, "%"PRIu64, value); + + if (options.opt_us_format) { + dec2sep(tmp1, mtotal_str, ','); + } else { + strcpy(mtotal_str, tmp1); + } + mtotal = mtotal_str; + + /* + * scaling + */ + sprintf(tmp2, "%"PRIu64, ((value*full_periods)/set_runs)); + + if (options.opt_us_format) { + dec2sep(tmp2, stotal_str, ','); + } else { + strcpy(stotal_str, tmp2); + } + stotal = stotal_str; + + printf(" %03d %20s %8"PRIu64" %20s %s\n", + i, + mtotal, + set_runs, + stotal, + str); + p = strchr(str, '\0'); + if (p) + str = p+1; + } + /* + * skip first event + */ + if (options.opt_ovfl_switch) cnt++; + } +} + +static void +update_set(int ctxid) +{ + int count; + int base; + int ret; + int i; + + base = current_set->pmds_base; + + /* + * we do not read the last counter (cpu_cycles) to avoid overwriting + * the reg_value field which will be used for next round + * + * We need to retry the read in case we get EBUSY because it means that + * the child task context is not yet available from inspection by PFM_READ_PMDS2. + * + */ + count = current_set->npmds; + + if (options.opt_ovfl_switch) + count--; + + ret = pfm_read_pmds(ctxid, all_pmds + base, count); + if (ret == -1) + fatal_error("error reading set: %s\n", strerror(errno)); + + /* update counts for this set */ + for (i=0; i < count; i++) { + all_values[base+i] += all_pmds[base+i].reg_value; + /* reset for next round */ + all_pmds[base+i].reg_value = 0UL; + } +} +static void +switch_sets(int ctxid) +{ + update_set(ctxid); + + current_set = current_set->next; + + if (current_set == NULL) + current_set = all_sets; + + current_set->set_runs++; + + vbprintf("starting set %d run %lu\n", + current_set->id, + current_set->set_runs); + + /* + * we must reprogram all avaibale PMCs (or PMDS) to ensure that no + * state is left over from the previous set and which could conflict + * on restart + */ + if (pfm_write_pmcs(ctxid, all_pmcs+current_set->pmcs_base, current_set->npmcs) == -1) { + fatal_error("error writing pmcs: %s\n", strerror(errno)); + } + + if (pfm_write_pmds(ctxid, all_pmds+current_set->pmds_base, current_set->npmds) == -1) { + fatal_error("error writing pmds: %s\n", strerror(errno)); + } + + full_periods++; + + if (options.opt_ovfl_switch && pfm_restart(ctxid) == -1) { + if (errno != EBUSY) + fatal_error("error pfm_restart: %s\n", strerror(errno)); + /* + * in case of EBUSY, it probably means the task has exited now + */ + } +} + +static void +sigintr_handler(int sig) +{ + if (sig == SIGALRM) + time_to_quit = 1; + else + time_to_quit = 2; + longjmp(jbuf, 1); +} + +static void +sigchld_handler(int sig) +{ + time_to_quit = 1; +} + +static int +measure_one_task(char **argv) +{ + int ctxid; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_arg; + struct pollfd pollfd; + pid_t pid; + int status, ret; + + memset(ctx, 0, sizeof(ctx)); + memset(&load_arg, 0, sizeof(load_arg)); + + /* + * create the context + */ + + ctxid = pfm_create_context(ctx, NULL, NULL, 0); + if (ctxid == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. + */ + if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) + fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); + + /* + * write registers for first set + */ + if (pfm_write_pmcs(ctxid, all_pmcs+current_set->pmcs_base, current_set->npmcs) == -1) { + fatal_error("error pfm_write_pmcs: %s\n", strerror(errno)); + } + + if (pfm_write_pmds(ctxid, all_pmds+current_set->pmds_base, current_set->npmds) == -1) { + fatal_error("error pfm_write_pmds: %s\n", strerror(errno)); + } + /* + * now launch the child code + */ + if (options.attach_pid == 0) { + if ((pid= fork()) == -1) fatal_error("Cannot fork process\n"); + if (pid == 0) exit(child(argv)); + } else { + pid = options.attach_pid; + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret) { + fatal_error("cannot attach to task %d: %s\n",options.attach_pid, strerror(errno)); + } + } + + ret = waitpid(pid, &status, WUNTRACED); + if (ret < 0 || WIFEXITED(status)) + fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); + + vbprintf("child created and stopped\n"); + + /* + * now attach the context + */ + load_arg.load_pid = pid; + if (pfm_load_context(ctxid, &load_arg) == -1) { + fatal_error("pfm_load_context error errno %d\n",errno); + } + + current_set->set_runs = 1; + + /* + * start monitoring + */ + if (pfm_start(ctxid, NULL) == -1) { + fatal_error("pfm_start error errno %d\n",errno); + } + + ptrace(PTRACE_DETACH, pid, NULL, 0); + + if (setjmp(jbuf) == 1) { + if (time_to_quit == 1) { + printf("timeout expired\n"); + } + if (time_to_quit == 2) + printf("session interrupted\n"); + goto finish_line; + } + + + if (options.session_timeout) { + printf("\n", options.session_timeout); + alarm(options.session_timeout); + } + pollfd.fd = ctxid; + pollfd.events = POLLIN; + pollfd.revents = 0; + + while(time_to_quit == 0) { + /* + * mainloop. poll timeout is in msecs + */ + ret = poll(&pollfd, 1, 1000 / options.smpl_freq); + switch(ret) { + case 0: + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret) { + time_to_quit = 1; + break; + } + + ret = waitpid(pid, &status, WUNTRACED); + /* + * exit with time_to_quit = 0 + * to avoid unloading from dead thread + */ + if (WIFEXITED(status)) + goto finish_line; + + switch_sets(ctxid); + + ptrace(PTRACE_DETACH, pid, NULL, 0); + break; + + case -1: fatal_error("poll error: %s\n", strerror(errno)); + + default: /* we don't even read END_MSG */ + time_to_quit = 1; + } + } +finish_line: + /* + * cleanup after an alarm timeout + */ + if (time_to_quit) { + /* stop monitored task */ + ptrace(PTRACE_ATTACH, pid, NULL, 0); + waitpid(pid, NULL, WUNTRACED); + + /* detach context */ + pfm_unload_context(ctxid); + } + + if (options.attach_pid == 0) { + kill(pid, SIGKILL); + waitpid(pid, &status, 0); + } else { + ptrace(PTRACE_DETACH, pid, NULL, 0); + } + + if (time_to_quit < 2) print_results(); + + close(ctxid); + + return 0; +} + +static int +measure_one_cpu(char **argv) +{ + int ctxid, status; + pfarg_ctx_t ctx[1]; + pfarg_load_t load_arg; + struct pollfd pollfd; + pid_t pid = 0; + int ret, timeout; + + memset(ctx, 0, sizeof(ctx)); + memset(&load_arg, 0, sizeof(load_arg)); + + if (options.pin_cpu == -1) { + options.pin_cpu = 0; + printf("forcing monitoring onto CPU core 0\n"); + pin_cpu(getpid(), 0); + } + + ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE; + /* + * create the context + */ + ctxid = pfm_create_context(ctx, NULL, NULL, 0); + if (ctxid == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. + */ + if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) + fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); + + /* + * Now program the all the registers in one call + * + * Note that there is a limitation on the size of the argument vector + * that can be passed. It is usually set to a page size (16KB). + */ + if (pfm_write_pmcs(ctxid, all_pmcs+current_set->pmcs_base, current_set->npmcs) == -1) + fatal_error("error: pfm_write_pmcs errno: %s\n", strerror(errno)); + + /* + * initialize the PMD registers. + * + * To be read, each PMD must be either written or declared + * as being part of a sample (reg_smpl_pmds) + */ + if (pfm_write_pmds(ctxid, all_pmds+current_set->pmds_base, current_set->npmds) == -1) + fatal_error("pfm_write_pmds error errno %d\n", strerror(errno)); + + /* + * now launch the child code + */ + if (*argv) { + if ((pid = fork()) == -1) fatal_error("Cannot fork process\n"); + if (pid == 0) exit(child(argv)); + } + + /* + * wait for the child to exec or be stopped + * We do this even in system-wide mode to ensure + * that the task does not start until we are ready + * to monitor. + */ + if (pid) { + ret = waitpid(pid, &status, WUNTRACED); + if (ret < 0 || WIFEXITED(status)) + fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); + + vbprintf("child created and stopped\n"); + } + + /* + * now attach the context + */ + load_arg.load_pid = options.pin_cpu; + if (pfm_load_context(ctxid, &load_arg) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * start monitoring + */ + if (pfm_start(ctxid, NULL) == -1) + fatal_error("pfm_start error errno %d\n",errno); + + if (pid) { + signal(SIGCHLD, sigchld_handler); + ptrace(PTRACE_DETACH, pid, NULL, 0); + } + + /* + * mainloop + */ + pollfd.fd = ctxid; + pollfd.events = POLLIN; + pollfd.revents = 0; + + timeout = options.opt_ovfl_switch ? -1 : (1000 / options.smpl_freq); + + while (time_to_quit == 0) { + ret = poll(&pollfd, 1, timeout); + switch(ret) { + case 1: + case 0: + /* + *we are consuming the message. + * to avoid this phase we could use PFM_FL_OVFL_NO_MSG + * and use signal based notification + */ + if (options.opt_ovfl_switch) { + ssize_t r; + pfarg_msg_t msg; + r = read(ctxid, &msg, sizeof(msg)); + (void) r; + } + switch_sets(ctxid); + break; + default: + if (errno != EINTR) + fatal_error("poll fails\n"); + } + } + if (full_periods < MIN_FULL_PERIODS) + fatal_error("Not enough periods (%lu) to print results\n", full_periods); + + if (pid) + waitpid(pid, &status, 0); + + print_results(); + + close(ctxid); + + return 0; +} + + +int +mainloop(char **argv) +{ + event_set_t *e; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_regmask_t impl_counters, used_pmcs; + pfmlib_event_t cycle_event; + unsigned int i, j; + char *p, *str; + unsigned int max_counters, allowed_counters; + int ret; + + pfm_get_num_counters(&max_counters); + + if (max_counters < 2 && options.opt_ovfl_switch) + fatal_error("not enough counter to get overflow switching to work\n"); + + allowed_counters = max_counters; + + /* + * account for overflow counter (cpu cycles) + */ + if (options.opt_ovfl_switch) allowed_counters--; + + memset(&used_pmcs, 0, sizeof(used_pmcs)); + memset(&impl_counters, 0, sizeof(impl_counters)); + + pfm_get_impl_counters(&impl_counters); + + options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq; + + vbprintf("%lu Hz period = %"PRIu64" cycles @ %lu Mhz\n", options.smpl_freq, options.smpl_period, options.cpu_mhz); + + for (e = all_sets; e; e = e->next) { + for (p = str = e->event_str; p ; ) { + p = strchr(str, ','); + if (p) str = p +1; + total_events++; + } + } + + /* + * account for extra event per set (cycle event) + */ + if (options.opt_ovfl_switch) { + total_events += num_sets; + /* + * look for our trigger event + */ + if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) { + fatal_error("Cannot find cycle event\n"); + } + } + + vbprintf("total_events=%u\n", total_events); + + all_pmcs = calloc(1, sizeof(pfarg_pmc_t)*total_events); + all_pmds = calloc(1, sizeof(pfarg_pmd_t)*total_events); + all_values = calloc(1, sizeof(uint64_t)*total_events); + + if (all_pmcs == NULL || all_pmds == NULL || all_values == NULL) + fatal_error("cannot allocate event tables\n"); + + /* + * use the library to figure out assignments for all events of all sets + */ + for (i=0, e = all_sets; i < num_sets; i++, e = e->next) { + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(-1, &inp.pfp_unavail_pmcs); + + + str = e->event_str; + for(j=0, p = str; p && j < allowed_counters; j++) { + + p = strchr(str, ','); + if (p) *p = '\0'; + + if (pfm_find_full_event(str, &inp.pfp_events[j]) != PFMLIB_SUCCESS) { + fatal_error("Cannot find %s event for set %d event %d\n", str, i, j); + } + if (p) { + *p = ','; + str = p + 1; + } + } + if (p) { + fatal_error("error in set %d: cannot have more than %d event(s) per set %s\n", + i, + allowed_counters, + options.opt_ovfl_switch ? "(overflow switch mode)": "(hardware limit)"); + } + /* + * add the cycle event as the last event when we switch on overflow + */ + if (options.opt_ovfl_switch) { + inp.pfp_events[j] = cycle_event; + inp.pfp_event_count = j+1; + e->n_events = j+1; + } else { + e->n_events = j; + inp.pfp_event_count = j; + } + + inp.pfp_dfl_plm = options.opt_plm; + + if (options.opt_is_system) + inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; + + vbprintf("PMU programming for set %d\n", i); + + /* + * let the library do the hard work + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) { + fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret)); + } + e->id = i; + + e->pmcs_base = num_pmcs; + e->pmds_base = num_pmds; + + /* + * propagate from libpfm to kernel data structures + */ + for (j=0; j < outp.pfp_pmc_count; j++, num_pmcs++) { + all_pmcs[num_pmcs].reg_num = outp.pfp_pmcs[j].reg_num; + all_pmcs[num_pmcs].reg_value = outp.pfp_pmcs[j].reg_value; + } + for (j=0; j < outp.pfp_pmd_count; j++, num_pmds++) + all_pmds[num_pmds].reg_num = outp.pfp_pmds[j].reg_num; + + e->npmcs = num_pmcs - e->pmcs_base; + e->npmds = num_pmds - e->pmds_base; + + if (options.opt_ovfl_switch) { + /* + * We do this even in system-wide mode to ensure + * that the task does not start until we are ready + * to monitor. + * setup the sampling period + */ + all_pmds[num_pmds-1].reg_value = - options.smpl_period; + all_pmds[num_pmds-1].reg_short_reset = - options.smpl_period; + all_pmds[num_pmds-1].reg_long_reset = - options.smpl_period; + all_pmds[num_pmds-1].reg_flags = PFM_REGFL_OVFL_NOTIFY; + } + vbprintf("set%d pmc_base=%d pmd_base=%d npmcs=%d npmds=%d\n", + e->id, + e->pmcs_base, + e->pmds_base, + e->npmcs, + e->npmds); + } + + current_set = all_sets; + + signal(SIGALRM, sigintr_handler); + signal(SIGINT, sigintr_handler); + + if (options.opt_is_system) + return measure_one_cpu(argv); + + return measure_one_task(argv); +} + +static struct option multiplex_options[]={ + { "help", 0, 0, 1}, + { "freq", 1, 0, 2 }, + { "kernel-level", 0, 0, 3 }, + { "user-level", 0, 0, 4 }, + { "version", 0, 0, 5 }, + { "set", 1, 0, 6 }, + { "session-timeout", 1, 0, 7 }, + { "attach-task", 1, 0, 8 }, + { "pin-cmd", 1, 0, 9 }, + { "cpu", 1, 0, 10 }, + + { "verbose", 0, &options.opt_verbose, 1 }, + { "debug", 0, &options.opt_debug, 1 }, + { "us-counter-format", 0, &options.opt_us_format, 1}, + { "ovfl-switch", 0, &options.opt_ovfl_switch, 1}, + { "system-wide", 0, &options.opt_is_system, 1}, + { "no-cmd-output", 0, &options.opt_no_cmd_out, 1}, + { "no-header", 0, &options.opt_no_header, 1}, + { 0, 0, 0, 0} +}; + +static void +generate_default_sets(void) +{ + event_set_t *es, *tail = NULL; + pfmlib_event_t events[2]; + size_t len; + char *name; + unsigned int i; + int ret; + + ret = pfm_get_cycle_event(&events[0]); + if (ret != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + ret = pfm_get_inst_retired_event(&events[1]); + if (ret != PFMLIB_SUCCESS) + fatal_error("cannot find instruction retired event\n"); + + pfm_get_max_event_name_len(&len); + + for (i=0; i < 2; i++) { + name = malloc(len+1); + if (name == NULL) { + fatal_error("cannot allocate space for event name\n"); + } + pfm_get_full_event_name(&events[i], name, len+1); + + es = (event_set_t *)malloc(sizeof(event_set_t)); + if (es == NULL) + fatal_error("cannot allocate new event set\n"); + + memset(es, 0, sizeof(*es)); + + es->event_str = name; + es->next = NULL; + es->n_events = 0; + + if (all_sets == NULL) + all_sets = es; + else + tail->next = es; + tail = es; + } + num_sets = i; +} + +static void +print_usage(char **argv) +{ + printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]); + + printf( "-h, --help\t\t\t\tdisplay this help and exit\n" + "-V, --version\t\t\t\toutput version information and exit\n" + "-u, --user-level\t\t\tmonitor at the user level for all events\n" + "-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n" + "-c, --us-counter-format\tprint large counts with comma for thousands\n" + "-p pid, --attach-task pid\tattach to a running task\n" + "--set=ev1[,ev2,ev3,ev4,...]\t\tdescribe one set\n" + "--freq=number\t\t\t\tset set switching frequency in Hz\n" + "-c cpu, --cpu=cpu\t\t\tCPU to use for system-wide [default current]\n" + "--ovfl-switch\t\t\t\t\tuse overflow based multiplexing (default: time-based)\n" + "--verbose\t\t\t\tprint more information during execution\n" + "--system-wide\t\t\t\tuse system-wide (only one CPU at a time)\n" + "--excl-idle\t\t\texclude idle task(system-wide only)\n" + "--excl-intr\t\t\texclude interrupt triggered execution(system-wide only)\n" + "--intr-only\t\t\tinclude only interrupt triggered execution(system-wide only)\n" + "--session-timeout=sec\t\t\tsession timeout in seconds (system-wide only)\n" + "--no-cmd-output\t\t\t\toutput of executed command redirected to /dev/null\n" + "--pin-cmd=cpu\t\t\t\tpin executed command onto a specific cpu\n" + ); +} + +int +main(int argc, char **argv) +{ + char *endptr = NULL; + pfmlib_options_t pfmlib_options; + event_set_t *tail = NULL, *es; + unsigned long long_val; + int c, ret; + + options.pin_cmd_cpu = options.pin_cpu = -1; + + while ((c=getopt_long(argc, argv,"+vhkuVct:p:", multiplex_options, 0)) != -1) { + switch(c) { + case 0: continue; /* fast path for options */ + + case 1: + print_usage(argv); + exit(0); + + case 'v': options.opt_verbose = 1; + break; + case 'c': + options.opt_us_format = 1; + break; + case 2: + if (options.smpl_freq) fatal_error("sampling frequency set twice\n"); + options.smpl_freq = strtoul(optarg, &endptr, 10); + if (*endptr != '\0') + fatal_error("invalid freqyency: %s\n", optarg); + break; + case 3: + case 'k': + options.opt_plm |= PFM_PLM0; + break; + case 4: + case 'u': + options.opt_plm |= PFM_PLM3; + break; + case 'V': + case 5: + printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n" + "Copyright (C) 2004 Hewlett-Packard Company\n"); + exit(0); + case 6: + es = (event_set_t *)malloc(sizeof(event_set_t)); + if (es == NULL) fatal_error("cannot allocate new event set\n"); + + es->event_str = optarg; + es->next = NULL; + es->n_events = 0; + + if (all_sets == NULL) + all_sets = es; + else + tail->next = es; + tail = es; + num_sets++; + break; + case 't': + case 7: + if (options.session_timeout) fatal_error("too many timeouts\n"); + if (*optarg == '\0') fatal_error("--session-timeout needs an argument\n"); + long_val = strtoul(optarg,&endptr, 10); + if (*endptr != '\0') + fatal_error("invalid number of seconds for timeout: %s\n", optarg); + + if (long_val >= UINT_MAX) + fatal_error("timeout is too big, must be < %u\n", UINT_MAX); + + options.session_timeout = (unsigned int)long_val; + break; + case 'p': + case 8: + if (options.attach_pid) fatal_error("process to attach specified twice\n"); + options.attach_pid = (pid_t)atoi(optarg); + break; + case 9: + if (options.pin_cmd_cpu != -1) fatal_error("cannot pin command twice\n"); + options.pin_cmd_cpu = atoi(optarg); + break; + + case 10: + if (options.pin_cpu != -1) fatal_error("cannot pin to more than one cpu\n"); + options.pin_cpu = atoi(optarg); + break; + default: + fatal_error(""); /* just quit silently now */ + } + } + + if (optind == argc && options.opt_is_system == 0 && options.attach_pid == 0) + fatal_error("you need to specify a command to measure\n"); + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + if ((options.cpu_mhz = get_cpu_speed()) == 0) + fatal_error("can't get CPU speed\n"); + + if (options.smpl_freq == 0UL) + options.smpl_freq = SMPL_FREQ_IN_HZ; + + if (options.opt_plm == 0) + options.opt_plm = PFM_PLM3; + + if (num_sets == 0) + generate_default_sets(); + + return mainloop(argv+optind); +} diff --git a/src/libpfm-3.y/examples_v2.x/multiplex2.c b/src/libpfm-3.y/examples_v2.x/multiplex2.c new file mode 100644 index 0000000..d93c519 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/multiplex2.c @@ -0,0 +1,1182 @@ +/* + * multiplex2.c - example of kernel-level time-based or overflow-based event multiplexing + * + * Copyright (c) 2004-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#ifndef _GNU_SOURCE + #define _GNU_SOURCE /* for getline */ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "detect_pmcs.h" + +#define MAX_EVT_NAME_LEN 128 + +#define MULTIPLEX_VERSION "0.2" + +#define SMPL_FREQ_IN_HZ 100 + +#define NUM_PMCS 256 + +typedef struct { + struct { + int opt_plm; /* which privilege level to monitor (more than one possible) */ + int opt_debug; /* print debug information */ + int opt_verbose; /* verbose output */ + int opt_us_format; /* print large numbers with comma for thousands */ + int opt_ovfl_switch; /* overflow-based switching */ + int opt_is_system; /* use system-wide */ + int opt_excl_idle; /* exclude idle task */ + int opt_excl_intr; /* exclude interrupts */ + int opt_intr_only; /* interrupts only*/ + int opt_no_cmd_out; /* redirect cmd output to /dev/null */ + int opt_no_header; /* no header */ + } program_opt_flags; + + unsigned long max_counters; /* maximum number of counter for the platform */ + uint64_t smpl_freq_hz; + uint64_t smpl_freq_ns; + unsigned long session_timeout; + uint64_t smpl_period; + uint64_t clock_res; + + unsigned long cpu_mhz; + + pid_t attach_pid; + int pin_cmd_cpu; + int pin_cpu; +} program_options_t; + +#define opt_plm program_opt_flags.opt_plm +#define opt_debug program_opt_flags.opt_debug +#define opt_verbose program_opt_flags.opt_verbose +#define opt_us_format program_opt_flags.opt_us_format +#define opt_ovfl_switch program_opt_flags.opt_ovfl_switch +#define opt_is_system program_opt_flags.opt_is_system +#define opt_excl_idle program_opt_flags.opt_excl_idle +#define opt_excl_intr program_opt_flags.opt_excl_intr +#define opt_intr_only program_opt_flags.opt_intr_only +#define opt_no_cmd_out program_opt_flags.opt_no_cmd_out +#define opt_no_header program_opt_flags.opt_no_header + +typedef struct _event_set_t { + struct _event_set_t *next; + char *event_str; + unsigned int n_events; +} event_set_t; + +typedef int pfm_ctxid_t; + +static program_options_t options; + +static pfarg_pmc_t *all_pmcs; +static pfarg_pmd_t *all_pmds; +static pfarg_setdesc_t *all_sets; +static event_set_t *all_events; + +static unsigned int num_pmds, num_pmcs, num_sets, total_events; +static volatile int time_to_quit; +static jmp_buf jbuf; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +vbprintf(char *fmt, ...) +{ + va_list ap; + + if (options.opt_verbose == 0) return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * unreliable for CPU with variable clock speed + */ +static unsigned long +get_cpu_speed(void) +{ + FILE *fp1; + unsigned long f1 = 0, f2 = 0; + char buffer[128], *p, *value; + + memset(buffer, 0, sizeof(buffer)); + + fp1 = fopen("/proc/cpuinfo", "r"); + if (fp1 == NULL) return 0; + + for (;;) { + buffer[0] = '\0'; + + p = fgets(buffer, 127, fp1); + if (p == NULL) + break; + + /* skip blank lines */ + if (*p == '\n') continue; + + p = strchr(buffer, ':'); + if (p == NULL) + break; + + /* + * p+2: +1 = space, +2= firt character + * strlen()-1 gets rid of \n + */ + *p = '\0'; + value = p+2; + + value[strlen(value)-1] = '\0'; + + if (!strncasecmp("cpu MHz", buffer, 7)) { + float fl; + sscanf(value, "%f", &fl); + f1 = lroundf(fl); + break; + } + if (!strncasecmp("BogoMIPS", buffer, 8)) { + float fl; + sscanf(value, "%f", &fl); + f2 = lroundf(fl); + } + } + fclose(fp1); + return f1 == 0 ? f2 : f1; +} + +/* + * pin task to CPU + */ +#ifndef __NR_sched_setaffinity +#error "you need to define __NR_sched_setaffinity" +#endif + +#define MAX_CPUS 2048 +#define NR_CPU_BITS (MAX_CPUS>>3) +int +pin_cpu(pid_t pid, unsigned int cpu) +{ + uint64_t my_mask[NR_CPU_BITS]; + + if (cpu >= MAX_CPUS) + fatal_error("this program supports only up to %d CPUs\n", MAX_CPUS); + + my_mask[cpu>>6] = 1ULL << (cpu&63); + + return syscall(__NR_sched_setaffinity, pid, sizeof(my_mask), &my_mask); +} + +int +child(char **arg) +{ + ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + if (options.pin_cmd_cpu != -1) { + pin_cpu(getpid(), options.pin_cmd_cpu); + vbprintf("command running on CPU core %d\n", options.pin_cmd_cpu); + } + + if (options.opt_no_cmd_out) { + close(1); + close(2); + } + execvp(arg[0], arg); + /* not reached */ + + exit(1); +} + +static void +dec2sep(char *str2, char *str, char sep) +{ + int i, l, b, j, c=0; + + l = strlen(str2); + if (l <= 3) { + strcpy(str, str2); + return; + } + b = l + l /3 - (l%3 == 0); /* l%3=correction to avoid extraneous comma at the end */ + for(i=l, j=0; i >= 0; i--, j++) { + if (j) c++; + str[b-j] = str2[i]; + if (c == 3 && i>0) { + str[b-++j] = sep; + c = 0; + } + } +} + +static void +print_results(int ctxid, uint64_t *eff_timeout) +{ + unsigned int i, j, cnt, ovfl_event; + uint64_t value, tot_runs = 0; + uint64_t tot_dur = 0, c; + pfarg_setinfo_t *all_setinfos; + event_set_t *e; + char *p; + char tmp1[32], tmp2[32], *str; + char mtotal_str[32], *mtotal; + char stotal_str[32], *stotal; + int ret; + + all_setinfos = malloc(sizeof(pfarg_setinfo_t)*num_sets); + if (all_setinfos == NULL) + fatal_error("cannot allocate all_setinfo\n"); + + memset(all_setinfos, 0, sizeof(pfarg_setinfo_t)*num_sets); + + for(i=0; i < num_sets; i++) + all_setinfos[i].set_id = i; + + /* + * read all counters in one call + * + * There is a limitation on the size of the argument vector and + * it may be necesarry to split into multiple calls. That limit + * is usally at page size (16KB) + */ + ret = pfm_read_pmds(ctxid, all_pmds, num_pmds); + if (ret == -1) + fatal_error("cannot read pmds: %s\n", strerror(errno)); + + /* + * extract all set information + * + * There is a limitation on the size of the argument vector and + * it may be necesarry to split into multiple calls. That limit + * is usually at page size (16KB) + */ + ret = pfm_getinfo_evtsets(ctxid, all_setinfos, num_sets); + if (ret == -1) + fatal_error("cannot get set info: %s\n", strerror(errno)); + + /* + * compute average number of runs + * + * the number of runs per set can be at most off by 1 between all sets + */ + for (i=0, cnt = 0; i < num_sets; i++) { + if (all_setinfos[i].set_runs == 0) + fatal_error("not enough runs to collect meaningful results: set%u did not run\n", i); + tot_runs += all_setinfos[i].set_runs; + tot_dur += all_setinfos[i].set_act_duration; + } + + /* + * print the results + * + * It is important to realize, that the first event we specified may not + * be in PMD4. Not all events can be measured by any monitor. That's why + * we need to use the pc[] array to figure out where event i was allocated. + * + */ + if (options.opt_no_header == 0) { + printf("# %.2fHz period = %"PRIu64"nsecs\n# %"PRIu64" cycles @ %lu MHz\n", + 1000000000.0 / options.smpl_freq_ns, + options.smpl_freq_ns, + options.smpl_period, + options.cpu_mhz); + + if (options.opt_ovfl_switch == 0) + printf("# using time-based multiplexing\n" + "# %"PRIu64" nsecs effective switch timeout\n", + *eff_timeout); + else + printf("# using overflow-based multiplexing\n"); + + if (options.opt_is_system) + printf("# system-wide mode on CPU core %d\n",options.pin_cpu); + printf("# %d sets\n", num_sets); + printf("# %.2f average run per set\n", (double)tot_runs/num_sets); + printf("# %.2f average ns per set\n", (double)tot_dur/num_sets); + printf("# set measured total #runs scaled total event name\n"); + printf("# ------------------------------------------------------------------\n"); + } + ovfl_event = options.opt_ovfl_switch ? 1 : 0; + + for (i=0, e = all_events, cnt = 0; i < num_sets; i++, e = e->next) { + + str = e->event_str; + + for(j=0; j < e->n_events-ovfl_event; j++, cnt++) { + value = all_pmds[cnt].reg_value; + + sprintf(tmp1, "%"PRIu64, value); + + if (options.opt_us_format) { + dec2sep(tmp1, mtotal_str, ','); + } else { + strcpy(mtotal_str, tmp1); + } + mtotal = mtotal_str; + + /* + * scaling + * We use duration rather than number of runs to compute a more precise + * scaled value. This avoids overcounting when the last set only partially + * ran. + * + * We use double to avoid overflowing of the 64-bit count in case of very + * large total duration + */ + c = llround(((double)value*tot_dur)/(double)all_setinfos[i].set_act_duration); + sprintf(tmp2, "%"PRIu64, c); + + if (options.opt_us_format) { + dec2sep(tmp2, stotal_str, ','); + } else { + strcpy(stotal_str, tmp2); + } + stotal = stotal_str; + + printf(" %03d %20s %8"PRIu64" %20s %s\n", + i, + mtotal, + all_setinfos[i].set_runs, + stotal, + str); + p = strchr(str, '\0'); + if (p) + str = p+1; + } + /* + * skip first event + */ + if (options.opt_ovfl_switch) cnt++; + } +} + +static void +sigintr_handler(int sig) +{ + if (sig == SIGALRM) + time_to_quit = 1; + else + time_to_quit = 2; + longjmp(jbuf, 1); +} + +static int +measure_one_task(char **argv) +{ + int ctxid; + pfarg_ctx_t ctx[1]; + pfarg_setdesc_t *my_sets; + pfarg_pmc_t *my_pmcs; + pfarg_pmd_t *my_pmds; + pfarg_load_t load_arg; + uint64_t eff_timeout; + pfarg_msg_t msg; + pid_t pid; + int status, ret; + + my_pmcs = malloc(sizeof(pfarg_pmc_t)*num_pmcs); + my_pmds = malloc(sizeof(pfarg_pmd_t)*num_pmds); + my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets); + + if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL) + fatal_error("cannot allocate event tables\n"); + /* + * make private copies + */ + memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs); + memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds); + memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets); + + memset(ctx, 0, sizeof(ctx)); + memset(&load_arg, 0, sizeof(load_arg)); + + /* + * create the context + */ + ctxid = pfm_create_context(ctx, NULL, NULL, 0); + if (ctxid == -1 ) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. + */ + if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) + fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); + + /* + * create the event sets + * + * event set 0 is always exist by default for backward compatibility + * reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS + * for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS + */ + vbprintf("requested timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout); + + if (pfm_create_evtsets(ctxid, my_sets, num_sets)) + fatal_error("cannot create sets\n"); + + eff_timeout = my_sets[0].set_timeout; + + vbprintf("effective timeout %"PRIu64" nsecs\n", my_sets[0].set_timeout); + /* + * Now program the all the registers in one call + * + * Note that there is a limitation on the size of the argument vector + * that can be passed. It is usually set to a page size (16KB). + */ + if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + /* + * initialize the PMD registers. + * + * To be read, each PMD must be either written or declared + * as being part of a sample (reg_smpl_pmds) + */ + if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now launch the child code + */ + if (options.attach_pid == 0) { + if ((pid= fork()) == -1) fatal_error("Cannot fork process\n"); + if (pid == 0) exit(child(argv)); + } else { + pid = options.attach_pid; + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret) { + fatal_error("cannot attach to task %d: %s\n",options.attach_pid, strerror(errno)); + } + } + + ret = waitpid(pid, &status, WUNTRACED); + if (ret < 0 || WIFEXITED(status)) + fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); + + vbprintf("child created and stopped\n"); + + /* + * now attach the context + */ + load_arg.load_pid = pid; + if (pfm_load_context(ctxid, &load_arg) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * start monitoring + */ + if (pfm_start(ctxid, NULL) == -1) + fatal_error("pfm_start error errno %d\n",errno); + + ptrace(PTRACE_DETACH, pid, NULL, 0); + vbprintf("child restarted\n"); + + if (setjmp(jbuf) == 1) { + if (time_to_quit == 1) { + printf("timeout expired\n"); + } + if (time_to_quit == 2) + printf("session interrupted\n"); + goto finish_line; + } + signal(SIGALRM, sigintr_handler); + signal(SIGINT, sigintr_handler); + + if (options.session_timeout) { + printf("\n", options.session_timeout); + alarm(options.session_timeout); + } + /* + * mainloop + */ + ret = read(ctxid, &msg, sizeof(msg)); + if (ret < sizeof(msg)) + fatal_error("interrupted read\n"); + + switch(msg.type) { + case PFM_MSG_OVFL: + fatal_error("unexpected ovfl message\n"); + break; + case PFM_MSG_END: + break; + default: printf("unknown message type %d\n", msg.type); + } + +finish_line: + /* + * cleanup after an alarm timeout + */ + if (time_to_quit) { + /* stop monitored task */ + ptrace(PTRACE_ATTACH, pid, NULL, 0); + waitpid(pid, NULL, WUNTRACED); + + /* detach context */ + pfm_unload_context(ctxid); + } + + if (options.attach_pid == 0) { + kill(pid, SIGKILL); + waitpid(pid, &status, 0); + } else { + ptrace(PTRACE_DETACH, pid, NULL, 0); + } + + if (time_to_quit < 2) + print_results(ctxid, &eff_timeout); + + close(ctxid); + + return 0; +} + + +static int +measure_one_cpu(char **argv) +{ + int ctxid, status; + pfarg_ctx_t ctx[1]; + pfarg_pmc_t *my_pmcs; + pfarg_pmd_t *my_pmds; + pfarg_setdesc_t *my_sets; + pfarg_load_t load_arg; + pid_t pid = 0; + int ret; + + my_pmcs = malloc(sizeof(pfarg_pmc_t)*total_events); + my_pmds = malloc(sizeof(pfarg_pmd_t)*total_events); + my_sets = malloc(sizeof(pfarg_setdesc_t)*num_sets); + + if (my_pmcs == NULL || my_pmds == NULL || my_sets == NULL) + fatal_error("cannot allocate event tables\n"); + /* + * make private copies + */ + memcpy(my_pmcs, all_pmcs, sizeof(pfarg_pmc_t)*num_pmcs); + memcpy(my_pmds, all_pmds, sizeof(pfarg_pmd_t)*num_pmds); + memcpy(my_sets, all_sets, sizeof(pfarg_setdesc_t)*num_sets); + + memset(ctx, 0, sizeof(ctx)); + memset(&load_arg, 0, sizeof(load_arg)); + + if (options.pin_cpu == -1) { + options.pin_cpu = 0; + printf("forcing monitoring onto CPU core 0\n"); + pin_cpu(getpid(), 0); + } + + ctx[0].ctx_flags = PFM_FL_SYSTEM_WIDE; + /* + * create the context + */ + ctxid = pfm_create_context(ctx, NULL, NULL, 0); + if (ctxid == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + /* + * set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., + * fd not visible to child. + */ + if (fcntl(ctxid, F_SETFD, FD_CLOEXEC)) + fatal_error("cannot set CLOEXEC: %s\n", strerror(errno)); + + /* + * create the event sets + * + * event set 0 is always created by default for backward compatibility + * reason. However to avoid special casing set0 for creation, a PFM_CREATE_EVTSETS + * for set0 does not complain and behaves as a PFM_CHANGE_EVTSETS + */ + if (pfm_create_evtsets(ctxid, my_sets, num_sets)) + fatal_error("cannot create sets\n"); + + /* + * Now program the all the registers in one call + * + * Note that there is a limitation on the size of the argument vector + * that can be passed. It is usually set to a page size (16KB). + */ + if (pfm_write_pmcs(ctxid, my_pmcs, num_pmcs) == -1) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + /* + * initialize the PMD registers. + * + * To be read, each PMD must be either written or declared + * as being part of a sample (reg_smpl_pmds) + */ + if (pfm_write_pmds(ctxid, my_pmds, num_pmds) == -1) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * now launch the child code + */ + if (*argv) { + if ((pid = fork()) == -1) fatal_error("Cannot fork process\n"); + if (pid == 0) exit(child(argv)); + } + + /* + * wait for the child to exec or be stopped + * We do this even in system-wide mode to ensure + * that the task does not start until we are ready + * to monitor. + */ + if (pid) { + ret = waitpid(pid, &status, WUNTRACED); + if (ret < 0 || WIFEXITED(status)) + fatal_error("error command already terminated, exit code %d\n", WEXITSTATUS(status)); + + vbprintf("child created and stopped\n"); + } + + /* + * now attach the context + */ + load_arg.load_pid = options.opt_is_system ? getpid() : pid; + if (pfm_load_context(ctxid, &load_arg) == -1) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * start monitoring + */ + if (pfm_start(ctxid, NULL) == -1) + fatal_error("pfm_start error errno %d\n",errno); + + if (pid) ptrace(PTRACE_DETACH, pid, NULL, 0); + + if (pid == 0) { + if (options.session_timeout == 0) { + printf("\n"); + getchar(); + } else { + printf("\n", options.session_timeout); + sleep(options.session_timeout); + } + } else { + ret = waitpid(pid, &status, 0); + } + print_results(ctxid, &my_sets[0].set_timeout); + + if (ctxid) close(ctxid); + + return 0; +} + +int +mainloop(char **argv) +{ + event_set_t *e; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfmlib_regmask_t impl_counters, used_pmcs; + pfmlib_event_t cycle_event; + unsigned int i, j; + char *p, *str; + int ret; + unsigned int max_counters, allowed_counters; + + pfm_get_num_counters(&max_counters); + + if (max_counters < 2 && options.opt_ovfl_switch) + fatal_error("not enough counter to get overflow switching to work\n"); + + allowed_counters = max_counters; + + /* + * account for overflow counter (cpu cycles) + */ + if (options.opt_ovfl_switch) allowed_counters--; + + memset(&used_pmcs, 0, sizeof(used_pmcs)); + memset(&impl_counters, 0, sizeof(impl_counters)); + + pfm_get_impl_counters(&impl_counters); + + options.smpl_period = (options.cpu_mhz*1000000)/options.smpl_freq_hz; + + vbprintf("%"PRIu64"Hz period = %"PRIu64" cycles @ %luMhz\n", options.smpl_freq_hz, options.smpl_period, options.cpu_mhz); + + for (e = all_events; e; e = e->next) { + for (p = str = e->event_str; p ; ) { + p = strchr(str, ','); + if (p) str = p +1; + total_events++; + } + } + + /* + * account for extra event per set (cycle event) + */ + if (options.opt_ovfl_switch) { + total_events += num_sets; + /* + * look for our trigger event + */ + if (pfm_get_cycle_event(&cycle_event) != PFMLIB_SUCCESS) + fatal_error("Cannot find cycle event\n"); + } + + vbprintf("total_events=%u\n", total_events); + + /* + * assumes number of pmds = number of events + * cannot assume number of pmcs = num of events (e.g., P4 2 PMCS per event) + */ + all_pmcs = calloc(NUM_PMCS, sizeof(pfarg_pmc_t)); + all_pmds = calloc(total_events, sizeof(pfarg_pmd_t)); + all_sets = calloc(num_sets, sizeof(pfarg_setdesc_t)); + + if (all_pmcs == NULL || all_pmds == NULL || all_sets == NULL) + fatal_error("cannot allocate event tables\n"); + + /* + * use the library to figure out assignments for all events of all sets + */ + for (i=0, e = all_events; i < num_sets; i++, e = e->next) { + + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(-1, &inp.pfp_unavail_pmcs); + + str = e->event_str; + for(j=0, p = str; p && j < allowed_counters; j++) { + + p = strchr(str, ','); + if (p) + *p = '\0'; + ret = pfm_find_full_event(str, &inp.pfp_events[j]); + if (ret != PFMLIB_SUCCESS) + fatal_error("event %s for set %d event %d: %s\n", str, i, j, pfm_strerror(ret)); + if (p) + str = p + 1; + } + if (p) { + fatal_error("error in set %d: cannot have more than %d event(s) per set %s\n", + i, + allowed_counters, + options.opt_ovfl_switch ? "(overflow switch mode)": "(hardware limit)"); + } + /* + * add the cycle event as the last event when we switch on overflow + */ + if (options.opt_ovfl_switch) { + inp.pfp_events[j] = cycle_event; + inp.pfp_event_count = j+1; + inp.pfp_dfl_plm = options.opt_plm; + e->n_events = j+1; + } else { + e->n_events = j; + inp.pfp_event_count = j; + } + + inp.pfp_dfl_plm = options.opt_plm; + + if (options.opt_is_system) + inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; + + vbprintf("PMU programming for set %d\n", i); + /* + * let the library do the hard work + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("cannot configure events for set %d: %s\n", i, pfm_strerror(ret)); + + /* + * propagate from libpfm to kernel data structures + */ + for (j=0; j < outp.pfp_pmc_count; j++, num_pmcs++) { + all_pmcs[num_pmcs].reg_num = outp.pfp_pmcs[j].reg_num; + all_pmcs[num_pmcs].reg_value = outp.pfp_pmcs[j].reg_value; + all_pmcs[num_pmcs].reg_set = i; + } + for (j=0; j < outp.pfp_pmd_count; j++, num_pmds++) { + all_pmds[num_pmds].reg_num = outp.pfp_pmds[j].reg_num; + all_pmds[num_pmds].reg_set = i; + } + + /* + * setup event set properties + */ + all_sets[i].set_id = i; + + if (options.opt_ovfl_switch) { + + all_sets[i].set_flags = PFM_SETFL_OVFL_SWITCH; + + /* + * last counter contains our sampling counter + * + * the first overflow of our trigger counter does + * trigger a switch. + */ + all_pmds[num_pmds-1].reg_ovfl_switch_cnt = 1; + + /* + * We do this even in system-wide mode to ensure + * that the task does not start until we are ready + * to monitor. + * setup the sampling period + */ + all_pmds[num_pmds-1].reg_value = - options.smpl_period; + all_pmds[num_pmds-1].reg_short_reset = - options.smpl_period; + all_pmds[num_pmds-1].reg_long_reset = - options.smpl_period; + } else { + /* + * setup the switch timeout (in nanoseconds) + * Note that the actual timeout may be bigger than requested + * due to timer tick granularity. It is always advised to + * check the set_timeout value upon return from set creation. + * The structure will by then contain the actual timeout. + */ + all_sets[i].set_flags = PFM_SETFL_TIME_SWITCH; + all_sets[i].set_timeout = options.smpl_freq_ns; + } +#ifdef __ia64__ + if (options.opt_excl_intr && options.opt_is_system) + all_sets[i].set_flags |= PFM_ITA_SETFL_EXCL_INTR; + + if (options.opt_intr_only && options.opt_is_system) + all_sets[i].set_flags |= PFM_ITA_SETFL_INTR_ONLY; +#endif + } + + if (options.opt_is_system) + return measure_one_cpu(argv); + return measure_one_task(argv); + +} + +static struct option multiplex_options[]={ + { "help", 0, 0, 1}, + { "freq", 1, 0, 2 }, + { "kernel-level", 0, 0, 3 }, + { "user-level", 0, 0, 4 }, + { "version", 0, 0, 5 }, + { "set", 1, 0, 6 }, + { "session-timeout", 1, 0, 7 }, + { "attach-task", 1, 0, 8 }, + { "pin-cmd", 1, 0, 9 }, + { "cpu", 1, 0, 10 }, + + { "verbose", 0, &options.opt_verbose, 1 }, + { "debug", 0, &options.opt_debug, 1 }, + { "us-counter-format", 0, &options.opt_us_format, 1}, + { "ovfl-switch", 0, &options.opt_ovfl_switch, 1}, + { "system-wide", 0, &options.opt_is_system, 1}, +#ifdef __ia64__ + { "excl-intr", 0, &options.opt_excl_intr, 1}, + { "intr-only", 0, &options.opt_intr_only, 1}, +#endif + { "no-cmd-output", 0, &options.opt_no_cmd_out, 1}, + { "no-header", 0, &options.opt_no_header, 1}, + { 0, 0, 0, 0} +}; + +static void +generate_default_sets(void) +{ + event_set_t *es, *tail = NULL; + pfmlib_event_t events[2]; + size_t len; + char *name; + unsigned int i; + int ret; + + ret = pfm_get_cycle_event(&events[0]); + if (ret != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + ret = pfm_get_inst_retired_event(&events[1]); + if (ret != PFMLIB_SUCCESS) + fatal_error("cannot find instruction retired event\n"); + + pfm_get_max_event_name_len(&len); + + for (i=0; i < 2; i++) { + name = malloc(len+1); + if (name == NULL) + fatal_error("cannot allocate space for event name\n"); + + pfm_get_full_event_name(events+i, name, len+1); + + es = (event_set_t *)malloc(sizeof(event_set_t)); + if (es == NULL) + fatal_error("cannot allocate new event set\n"); + + memset(es, 0, sizeof(*es)); + + es->event_str = name; + es->next = NULL; + es->n_events = 0; + + if (all_events == NULL) + all_events = es; + else + tail->next = es; + tail = es; + } + num_sets = i; +} + +static void +print_usage(char **argv) +{ + printf("usage: %s [OPTIONS]... COMMAND\n", argv[0]); + + printf( "-h, --help\t\t\t\tdisplay this help and exit\n" + "-V, --version\t\t\t\toutput version information and exit\n" + "-u, --user-level\t\t\tmonitor at the user level for all events\n" + "-k, --kernel-level\t\t\tmonitor at the kernel level for all events\n" + "-c, --us-counter-format\t\t\tprint large counts with comma for thousands\n" + "-p pid, --attach-task pid\t\tattach to a running task\n" + "--set=ev1[,ev2,ev3,ev4,...]\t\tdescribe one set\n" + "--freq=number\t\t\t\tset set switching frequency in Hz\n" + "-c cpu, --cpu=cpu\t\t\tCPU to use for system-wide [default current]\n" + "--ovfl-switch\t\t\t\tuse overflow based multiplexing (default: time-based)\n" + "--verbose\t\t\t\tprint more information during execution\n" + "--system-wide\t\t\t\tuse system-wide (only one CPU at a time)\n" + "--excl-idle\t\t\t\texclude idle task(system-wide only)\n" + "--excl-intr\t\t\t\texclude interrupt triggered execution(system-wide only)\n" + "--intr-only\t\t\t\tinclude only interrupt triggered execution(system-wide only)\n" + "--session-timeout=sec\t\t\tsession timeout in seconds (system-wide only)\n" + "--no-cmd-output\t\t\t\toutput of executed command redirected to /dev/null\n" + "--pin-cmd=cpu\t\t\t\tpin executed command onto a specific cpu\n" + ); +} + +int +main(int argc, char **argv) +{ + char *endptr = NULL; + pfmlib_options_t pfmlib_options; + event_set_t *tail = NULL, *es; + unsigned long long_val; + struct timespec ts; + uint64_t f_ns, d, f_final; + int c, ret; + + options.pin_cmd_cpu = options.pin_cpu = -1; + + while ((c=getopt_long(argc, argv,"+vhkuVct:p:", multiplex_options, 0)) != -1) { + switch(c) { + case 0: continue; /* fast path for options */ + + case 'h': + case 1: + print_usage(argv); + exit(0); + + case 'v': options.opt_verbose = 1; + break; + case 'c': + options.opt_us_format = 1; + break; + case 2: + if (options.smpl_freq_hz) fatal_error("sampling frequency set twice\n"); + options.smpl_freq_hz = strtoull(optarg, &endptr, 10); + if (*endptr != '\0') + fatal_error("invalid frequency: %s\n", optarg); + break; + case 3: + case 'k': + options.opt_plm |= PFM_PLM0; + break; + case 4: + case 'u': + options.opt_plm |= PFM_PLM3; + break; + case 'V': + case 5: + printf("multiplex version " MULTIPLEX_VERSION " Date: " __DATE__ "\n" + "Copyright (C) 2004 Hewlett-Packard Company\n"); + exit(0); + case 6: + es = (event_set_t *)malloc(sizeof(event_set_t)); + if (es == NULL) fatal_error("cannot allocate new event set\n"); + + es->event_str = optarg; + es->next = NULL; + es->n_events = 0; + + if (all_events == NULL) + all_events = es; + else + tail->next = es; + tail = es; + num_sets++; + break; + case 't': + case 7: + if (options.session_timeout) fatal_error("too many timeouts\n"); + if (*optarg == '\0') fatal_error("--session-timeout needs an argument\n"); + long_val = strtoul(optarg,&endptr, 10); + if (*endptr != '\0') + fatal_error("invalid number of seconds for timeout: %s\n", optarg); + + if (long_val >= UINT_MAX) + fatal_error("timeout is too big, must be < %u\n", UINT_MAX); + + options.session_timeout = (unsigned int)long_val; + break; + case 'p': + case 8: + if (options.attach_pid) fatal_error("process to attach specified twice\n"); + options.attach_pid = (pid_t)atoi(optarg); + break; + case 9: + if (options.pin_cmd_cpu != -1) fatal_error("cannot pin command twice\n"); + options.pin_cmd_cpu = atoi(optarg); + break; + + case 10: + if (options.pin_cpu != -1) fatal_error("cannot pin to more than one cpu\n"); + options.pin_cpu = atoi(optarg); + break; + default: + fatal_error(""); /* just quit silently now */ + } + } + + if (optind == argc && options.opt_is_system == 0 && options.attach_pid == 0) + fatal_error("you need to specify a command to measure\n"); + + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = options.opt_verbose; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + if ((options.cpu_mhz = get_cpu_speed()) == 0) + fatal_error("can't get CPU speed\n"); + + + /* + * extract kernel clock resolution + */ + clock_getres(CLOCK_MONOTONIC, &ts); + options.clock_res = ts.tv_sec * 1000000000 + ts.tv_nsec; + + /* + * adjust frequency to be a multiple of clock resolution + * otherwise kernel will fail pfm_create_evtsets() + */ + + /* + * f_ns = run period in ns (1s/hz) + * default switch period is clock resolution + */ + if (options.smpl_freq_hz == 0) + f_ns = options.clock_res; + else + f_ns = 1000000000 / options.smpl_freq_hz; + + /* round up period in nanoseconds */ + d = (f_ns+options.clock_res-1) / options.clock_res; + + /* final period (multilple of clock_res */ + f_final = d * options.clock_res; + + if (options.opt_ovfl_switch) + printf("clock_res=%"PRIu64"ns(%.2fHz) ask period=%"PRIu64"ns(%.2fHz) get period=%"PRIu64"ns(%.2fHz)\n", + options.clock_res, + 1000000000.0 / options.clock_res, + f_ns, + 1000000000.0 / f_ns, + f_final, + 1000000000.0 / f_final); + + if (f_ns != f_final) + printf("Not getting the expected frequency due to kernel/hw limitation\n"); + + /* adjust period */ + options.smpl_freq_ns = f_final; + + /* not used */ + options.smpl_freq_hz = 1000000000 / f_final; + + if (options.opt_plm == 0) options.opt_plm = PFM_PLM3; + + if (num_sets == 0) + generate_default_sets(); + + return mainloop(argv+optind); +} diff --git a/src/libpfm-3.y/examples_v2.x/notify_self.c b/src/libpfm-3.y/examples_v2.x/notify_self.c new file mode 100644 index 0000000..bf8d68a --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/notify_self.c @@ -0,0 +1,331 @@ +/* + * notify_self.c - example of how you can use overflow notifications + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#include "detect_pmcs.h" + +#define SMPL_PERIOD 1000000000ULL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_pmd_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +warning(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +sigio_handler(int n) +{ + pfarg_msg_t msg; + int fd = ctx_fd; + int r; + + if (event1_name && pfm_read_pmds(fd, pd+1, 1) == -1) + fatal_error("pfm_read_pmds: %s", strerror(errno)); + +retry: + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + if(r == -1 && errno == EINTR) { + warning("read interrupted, retrying\n"); + goto retry; + } + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) + fatal_error("unexpected msg type: %d\n",msg.type); + + /* + * increment our notification counter + */ + notification_received++; + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s ip=0x%llx\n", + notification_received, pd[1].reg_value, + event1_name, + (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); + else + printf("Notification %lu ip=0x%llx\n", + notification_received, + (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); + + /* + * And resume monitoring + */ + if (pfm_restart(fd) == -1) + fatal_error("pfm_restart: %d\n", errno); +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 3;) ; +} + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +int +main(int argc, char **argv) +{ + pfarg_ctx_t ctx; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = sigio_handler; + sigaction (SIGIO, &act, 0); + + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + inp.pfp_event_count = i; + + /* + * how many counters we use + */ + if (i > 1) { + + pfm_get_max_event_name_len(&len); + + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * now create the context for self monitoring/per-task + */ + ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); + if (ctx_fd == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * nothing to sample when only one counter + */ + if (inp.pfp_event_count > 1) + pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + */ + if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(ctx_fd, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + if (event1_name) + free(event1_name); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/notify_self2.c b/src/libpfm-3.y/examples_v2.x/notify_self2.c new file mode 100644 index 0000000..375ef5e --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/notify_self2.c @@ -0,0 +1,356 @@ +/* + * notify_self2.c - example of how you can use overflow notifications with F_SETSIG + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ +#ifndef _GNU_SOURCE + #define _GNU_SOURCE /* for getline */ +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "detect_pmcs.h" + +#define SMPL_PERIOD 1000000000ULL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_pmd_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +warning(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +sigio_handler(int n, struct siginfo *info, void *data) +{ + pfarg_msg_t msg; + int fd; + int r =0; + + if (info == NULL) + fatal_error("info is NULL\n"); + + fd = info->si_fd; + + if (info->si_code < 0) + fatal_error("signal not generated by kernel\n"); + + if (info->si_code != POLL_IN) + fatal_error("unexpected si_code=0x%x\n", info->si_code); + + if (fd != ctx_fd) + fatal_error("handler does not get valid file descriptor\n"); + + if (event1_name && pfm_read_pmds(fd, pd+1, 1)) + fatal_error("pfm_read_pmds: %s", strerror(errno)); +retry: + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + if(r == -1 && errno == EINTR) { + warning("read interrupted, retrying\n"); + goto retry; + } + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) + fatal_error("unexpected msg type: %d\n",msg.type); + + /* + * increment our notification counter + */ + notification_received++; + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %lu\n", notification_received); + + /* + * And resume monitoring + */ + if (pfm_restart(fd)) + fatal_error("pfm_restart: %d\n", errno); +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 20;) ; +} + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +int +main(int argc, char **argv) +{ + pfarg_ctx_t ctx; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Install the signal handler (SIGIO) + * + * SA_SIGINFO required on some platforms + * to get siginfo passed to handler. + */ + memset(&act, 0, sizeof(act)); + act.sa_sigaction = sigio_handler; + act.sa_flags = SA_SIGINFO; + sigaction (SIGIO, &act, 0); + + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + inp.pfp_event_count = i; + /* + * how many counters we use + */ + if (i > 1) { + pfm_get_max_event_name_len(&len); + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * now create the context for self monitoring/per-task + */ + ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); + if (ctx_fd == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + if (inp.pfp_event_count > 1) + pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + */ + if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + if (pfm_load_context(ctx_fd, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); +#ifndef _GNU_SOURCE +#error "this program must be compiled with -D_GNU_SOURCE" +#else + /* + * when you explicitely declare that you want a particular signal, + * even with you use the default signal, the kernel will send more + * information concerning the event to the signal handler. + * + * In particular, it will send the file descriptor from which the + * event is originating which can be quite useful when monitoring + * multiple tasks from a single thread. + */ + ret = fcntl(ctx_fd, F_SETSIG, SIGIO); + if (ret == -1) + fatal_error("cannot setsig: %s\n", strerror(errno)); +#endif + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + if (event1_name) + free(event1_name); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/notify_self3.c b/src/libpfm-3.y/examples_v2.x/notify_self3.c new file mode 100644 index 0000000..8300b62 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/notify_self3.c @@ -0,0 +1,304 @@ +/* + * notify_self3.c - example of how you can use overflow notifications with no messages + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#include "detect_pmcs.h" + +#define SMPL_PERIOD 1000000000ULL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_pmd_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +sigio_handler(int n) +{ + if (pfm_read_pmds(ctx_fd, pd+1, 1) == -1) { + fatal_error("pfm_read_pmds: %s", strerror(errno)); + } + + /* + * we do not need to extract the overflow message, we know + * where it is coming from. + */ + /* + * increment our notification counter + */ + notification_received++; + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %02lu: %"PRIu64" %s\n", notification_received, pd[1].reg_value, event1_name); + else + printf("Notification %02lu:\n", notification_received); + + /* + * And resume monitoring + */ + if (pfm_restart(ctx_fd)) + fatal_error("error pfm_restart: %d\n", errno); +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 40;) ; +} + +int +main(int argc, char **argv) +{ + int ret; + pfarg_ctx_t ctx; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + size_t len; + unsigned int i, num_counters; + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = sigio_handler; + sigaction (SIGIO, &act, 0); + + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + /* + * how many counters we use + */ + inp.pfp_event_count = i; + + if (i > 1) { + pfm_get_max_event_name_len(&len); + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * when we know we are self-monitoring and we have only one context, then + * when we get an overflow we know where it is coming from. Therefore we can + * save the call to the kernel to extract the notification message. By default, + * a message is generated. The queue of messages has a limited size, therefore + * it is important to clear the queue by reading the message on overflow. Failure + * to do so may result in a queue full and you will lose notification messages. + * + * With the PFM_FL_OVFL_NO_MSG, no message will be queue, but you will still get + * the signal. Similarly, the PFM_MSG_END will be generated. + */ + ctx.ctx_flags = PFM_FL_OVFL_NO_MSG; + + /* + * now create the context for self monitoring/per-task + */ + ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); + if (ctx_fd == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + if (inp.pfp_event_count > 1) + pd[0].reg_reset_pmds[0] |= 1UL << pd[1].reg_num; + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + */ + if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(ctx_fd, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + if (event1_name) + free(event1_name); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/notify_self_fork.c b/src/libpfm-3.y/examples_v2.x/notify_self_fork.c new file mode 100644 index 0000000..2a11ef4 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/notify_self_fork.c @@ -0,0 +1,350 @@ +/* + * notify_self_fork.c - example of how you can use overflow notifications across fork + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * Modified by Phil Mucci to add the fork() + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + +#include "detect_pmcs.h" + +#define SMPL_PERIOD 1000000000ULL + +static volatile unsigned long notification_received; + +#define NUM_PMCS PFMLIB_MAX_PMCS +#define NUM_PMDS PFMLIB_MAX_PMDS + +static pfarg_pmd_t pd[NUM_PMDS]; +static int ctx_fd; +static char *event1_name; + +static void fatal_error(char *fmt,...) __attribute__((noreturn)); + +static void +fatal_error(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + exit(1); +} + +static void +warning(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + +static void +sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) +{ + pfarg_msg_t msg; + int fd = ctx_fd; + int r; + + if (fd != ctx_fd) + fatal_error("handler does not get valid file descriptor\n"); + + if (event1_name && pfm_read_pmds(fd, pd+1, 1) == -1) + fatal_error("pfm_read_pmds: %s", strerror(errno)); + +retry: + r = read(fd, &msg, sizeof(msg)); + if (r != sizeof(msg)) { + if(r == -1 && errno == EINTR) { + warning("read interrupted, retrying\n"); + goto retry; + } + fatal_error("cannot read overflow message: %s\n", strerror(errno)); + } + + if (msg.type != PFM_MSG_OVFL) + fatal_error("unexpected msg type: %d\n",msg.type); + + /* + * increment our notification counter + */ + notification_received++; + + /* + * XXX: risky to do printf() in signal handler! + */ + if (event1_name) + printf("Notification %lu: %"PRIu64" %s ip=0x%llx\n", + notification_received, pd[1].reg_value, + event1_name, + (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); + else + printf("Notification %lu ip=0x%llx\n", + notification_received, + (unsigned long long)msg.pfm_ovfl_msg.msg_ovfl_ip); + fflush(stdout); + + /* + * And resume monitoring + */ + if (pfm_restart(fd) == -1) + fatal_error("pfm_restart: %d\n", errno); +} + +/* + * infinite loop waiting for notification to get out + */ +void +busyloop(void) +{ + /* + * busy loop to burn CPU cycles + */ + for(;notification_received < 3;) ; + + /* + * forking causes the context to be shared with the child + * When the child terminates, it closes its descriptor. + * The parent's remains and notification keep on coming. + */ + if (fork() == 0) { + printf("child terminates\n"); + fflush(stdout); + exit(0); + } + printf("after fork\n"); + fflush(stdout); + for(;notification_received < 6;) ; +} + +#define BPL (sizeof(uint64_t)<<3) +#define LBPL 6 + +static inline void pfm_bv_set(uint64_t *bv, uint16_t rnum) +{ + bv[rnum>>LBPL] |= 1UL << (rnum&(BPL-1)); +} + +int +main(int argc, char **argv) +{ + pfarg_ctx_t ctx; + pfmlib_input_param_t inp; + pfmlib_output_param_t outp; + pfarg_pmc_t pc[NUM_PMCS]; + pfarg_load_t load_args; + pfmlib_options_t pfmlib_options; + struct sigaction act; + unsigned int i, num_counters; + size_t len; + int ret; + + /* + * pass options to library (optional) + */ + memset(&pfmlib_options, 0, sizeof(pfmlib_options)); + pfmlib_options.pfm_debug = 0; /* set to 1 for debug */ + pfmlib_options.pfm_verbose = 1; /* set to 1 for verbose */ + pfm_set_options(&pfmlib_options); + + /* + * Initialize pfm library (required before we can use it) + */ + ret = pfm_initialize(); + if (ret != PFMLIB_SUCCESS) + fatal_error("Cannot initialize library: %s\n", pfm_strerror(ret)); + + /* + * Install the signal handler (SIGIO) + */ + memset(&act, 0, sizeof(act)); + act.sa_handler = (sig_t)sigio_handler; + sigaction (SIGIO, &act, 0); + + memset(pc, 0, sizeof(pc)); + memset(&ctx, 0, sizeof(ctx)); + memset(&load_args, 0, sizeof(load_args)); + memset(&inp,0, sizeof(inp)); + memset(&outp,0, sizeof(outp)); + + pfm_get_num_counters(&num_counters); + + if (pfm_get_cycle_event(&inp.pfp_events[0]) != PFMLIB_SUCCESS) + fatal_error("cannot find cycle event\n"); + + if (pfm_get_inst_retired_event(&inp.pfp_events[1]) != PFMLIB_SUCCESS) + fatal_error("cannot find inst retired event\n"); + + i = 2; + + /* + * set the default privilege mode for all counters: + * PFM_PLM3 : user level only + */ + inp.pfp_dfl_plm = PFM_PLM3; + if (i > num_counters) { + i = num_counters; + printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); + } + + inp.pfp_event_count = i; + + /* + * how many counters we use + */ + if (i > 1) { + + pfm_get_max_event_name_len(&len); + + event1_name = malloc(len+1); + if (event1_name == NULL) + fatal_error("cannot allocate event name\n"); + + pfm_get_full_event_name(&inp.pfp_events[1], event1_name, len+1); + } + + /* + * now create the context for self monitoring/per-task + */ + ctx_fd = pfm_create_context(&ctx, NULL, NULL, 0); + if (ctx_fd == -1) { + if (errno == ENOSYS) { + fatal_error("Your kernel does not have performance monitoring support!\n"); + } + fatal_error("Can't create PFM context %s\n", strerror(errno)); + } + + /* + * build the pfp_unavail_pmcs bitmask by looking + * at what perfmon has available. It is not always + * the case that all PMU registers are actually available + * to applications. For instance, on IA-32 platforms, some + * registers may be reserved for the NMI watchdog timer. + * + * With this bitmap, the library knows which registers NOT to + * use. Of source, it is possible that no valid assignement may + * be possible if certina PMU registers are not available. + */ + detect_unavail_pmcs(ctx_fd, &inp.pfp_unavail_pmcs); + + /* + * let the library figure out the values for the PMCS + */ + if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) + fatal_error("Cannot configure events: %s\n", pfm_strerror(ret)); + + /* + * Now prepare the argument to initialize the PMDs and PMCS. + */ + + for (i=0; i < outp.pfp_pmc_count; i++) { + pc[i].reg_num = outp.pfp_pmcs[i].reg_num; + pc[i].reg_value = outp.pfp_pmcs[i].reg_value; + } + for (i=0; i < outp.pfp_pmd_count; i++) + pd[i].reg_num = outp.pfp_pmds[i].reg_num; + /* + * We want to get notified when the counter used for our first + * event overflows + */ + pd[0].reg_flags |= PFM_REGFL_OVFL_NOTIFY; + + /* + * nothing to sample when only one counter + */ + if (inp.pfp_event_count > 1) + pfm_bv_set(pd[0].reg_reset_pmds, pd[1].reg_num); + + /* + * we arm the first counter, such that it will overflow + * after SMPL_PERIOD events have been observed + */ + pd[0].reg_value = - SMPL_PERIOD; + pd[0].reg_long_reset = - SMPL_PERIOD; + pd[0].reg_short_reset = - SMPL_PERIOD; + + /* + * Now program the registers + */ + if (pfm_write_pmcs(ctx_fd, pc, outp.pfp_pmc_count)) + fatal_error("pfm_write_pmcs error errno %d\n",errno); + + if (pfm_write_pmds(ctx_fd, pd, outp.pfp_pmd_count)) + fatal_error("pfm_write_pmds error errno %d\n",errno); + + /* + * we want to monitor ourself + */ + load_args.load_pid = getpid(); + + if (pfm_load_context(ctx_fd, &load_args)) + fatal_error("pfm_load_context error errno %d\n",errno); + + /* + * setup asynchronous notification on the file descriptor + */ + ret = fcntl(ctx_fd, F_SETFL, fcntl(ctx_fd, F_GETFL, 0) | O_ASYNC); + if (ret == -1) + fatal_error("cannot set ASYNC: %s\n", strerror(errno)); + + /* + * get ownership of the descriptor + */ + ret = fcntl(ctx_fd, F_SETOWN, getpid()); + if (ret == -1) + fatal_error("cannot setown: %s\n", strerror(errno)); + + /* + * Let's roll now + */ + pfm_self_start(ctx_fd); + + busyloop(); + + pfm_self_stop(ctx_fd); + + /* + * free our context + */ + close(ctx_fd); + + if (event1_name) + free(event1_name); + + return 0; +} diff --git a/src/libpfm-3.y/examples_v2.x/pfmsetup.c b/src/libpfm-3.y/examples_v2.x/pfmsetup.c new file mode 100644 index 0000000..67eaa40 --- /dev/null +++ b/src/libpfm-3.y/examples_v2.x/pfmsetup.c @@ -0,0 +1,1978 @@ +/* + * (C) Copyright IBM Corp. 2006 + * Contributed by Kevin Corry + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sellcopies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * + * pfmsetup + * + * Very simple command-line tool to drive the perfmon2 kernel API. Inspired + * by the dmsetup tool from device-mapper. + * + * Compile with: + * gcc -Wall -o pfmsetup pfmsetup.c -lpfm + * + * Run with: + * pfmsetup + * + * Available commands for the command_file: + * + * create_context [options] + * Create a new context for accessing the performance counters. Each new + * context automatically gets one event-set with an ID of 0. + * - options: --system + * --no-overflow-msg + * --block-on-notify + * --sampler + * - : specify an integer that you want to associate with + * the new context for use in other commands. + * + * load_context + * Attach the specified context and event-set to the specified program. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating an event-set + * within the given context. All contexts automatically + * have an event-set with ID of 0. + * - : ID that you specified when starting a program + * with the run_program command, or the number of + * the CPU to attach to for system-wide mode. + * + * unload_context + * Detach the specified context from the program that it's currently + * attached to. + * - : ID that you specified when creating the context. + * + * close_context + * Clean up the specified context. After this call, the context_id will no + * longer be valid. + * - : ID that you specified when creating the context. + * + * write_pmc < >+ + * Write one or more control register values. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating an event-set + * within the given context. All contexts automatically + * have an event-set with ID of 0. + * - : ID of the desired control register. See the register + * mappings in the Perfmon kernel code to determine which + * PMC represents the control register you're interested in. + * - : Value to write into the specified PMC. You need to know + * the exact numeric value - no translations are done from + * event names or masks. Multiple PMC id/value pairs can + * be given in one write_pmc command. + * + * write_pmd < >+ + * Write one or more data register values. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating an event-set + * within the given context. All contexts automatically + * have an event-set with ID of 0. + * - : ID of the desired data register. See the register + * mappings in the Perfmon kernel code to determine which + * PMD represents the control register you're interested in. + * - : Value to write into the specified PMD. Multiple PMD + * id/value pairs can be given in one write_pmd command. + * + * read_pmd + + * Read one or more data register values. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating an event-set + * within the given context. All contexts automatically + * have an event-set with ID of 0. + * - : ID of the desired data register. See the register + * mappings in the Perfmon kernel code to determine which + * PMD represents the control register you're interested in. + * Multiple PMD IDs can be given in one read_pmd command. + * + * start_counting + * Start counting using the specified context and event-set. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating an event-set + * within the given context. All contexts automatically + * have an event-set with ID of 0. + * + * stop_counting + * Stop counting on the specified context. + * - : ID that you specified when creating the context. + * + * restart_counting + * Restart counting on the specified context. + * - : ID that you specified when creating the context. + * + * create_eventset [options] + * Create a new event-set for an existing context. + * - options: --next-set + * --timeout + * --switch-on-overflow + * --exclude-idle + * - : ID that you specified when creating the context. + * - : specify an integer that you want to associate with + * the new event-set for use in other commands. + * + * delete_eventset + * Delete an existing event-set from an existing context. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating the event-set. + * + * getinfo_eventset + * Display information about an event-set. + * - : ID that you specified when creating the context. + * - : ID that you specified when creating the event-set. + * + * run_program + * First step in starting a program to monitor. In order to allow time to + * set up the counters to monitor the program, this command only forks a + * child process. It then suspends itself using ptrace. You must call the + * resume_program command to wake up the new child process and exec the + * desired program. + * - : Specify an integer that you want to associate with + * the program for use in other commands. + * - : Specify the program and its arguments + * exactly as you would on the command + * line. + * + * resume_program + * When a program is 'run', a child process is forked, but the child is + * ptrace'd before exec'ing the specified program. This gives you time to + * do any necessary setup to monitor the program. This resume_program + * command wakes up the child process and finishes exec'ing the desired + * program. If a context has been loaded and started for this program, + * then the counters will have actually started following this command. + * - : ID that you specified when starting the program. + * + * wait_on_program + * Wait for a program to complete and exit. After this call, the program_id + * will no longer be valid. + * - : ID that you specified when starting the program. + * + * sleep +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FALSE 0 +#define TRUE 1 + +#define WHITESPACE " \t\n" +#define MAX_TOKENS 32 +#define PFMSETUP_NAME "pfmsetup" + +#define USAGE(f, x...) printf(PFMSETUP_NAME ": USAGE: " f "\n" , ## x) +#define LOG_ERROR(f, x...) printf(PFMSETUP_NAME ": Error: %s: " f "\n", __FUNCTION__ , ## x) +#define LOG_INFO(f, x...) printf(PFMSETUP_NAME ": " f "\n" , ## x) + +typedef int (*command_fn)(int argc, char **argv); + +struct command { + const char *full_name; + const char *short_name; + const char *help; + command_fn fn; + int min_args; +}; + +struct context { + int id; + int fd; + int cpu; + pfarg_ctx_t ctx_arg; + pfm_dfl_smpl_arg_t smpl_arg; + struct event_set *event_sets; + struct context *next; +}; + +struct event_set { + int id; + struct event_set *next; +}; + +struct program { + int id; + pid_t pid; + struct program *next; +}; + + +/* Global list of all contexts that have been created. List is ordered by + * context id. Each context contains a list of event-sets belonging to that + * context, which is ordered by event-set id. + */ +static struct context *contexts = NULL; + + +/* Global list of all programs that have been started. + * List is ordered by program id. + */ +static struct program *programs = NULL; + + +/* + * Routines to manipulate the context, event-set, and program lists. + */ + +static struct context *find_context(int ctx_id) +{ + struct context *ctx; + + for (ctx = contexts; ctx; ctx = ctx->next) { + if (ctx->id == ctx_id) { + break; + } + } + + return ctx; +} + +static void insert_context(struct context *ctx) +{ + struct context **next_ctx; + + for (next_ctx = &contexts; + *next_ctx && (*next_ctx)->id < ctx->id; + next_ctx = &((*next_ctx)->next)) { + ; + } + + ctx->next = *next_ctx; + *next_ctx = ctx; +} + +static void remove_context(struct context *ctx) +{ + struct context **next_ctx; + + for (next_ctx = &contexts; *next_ctx; next_ctx = &((*next_ctx)->next)) { + if (*next_ctx == ctx) { + *next_ctx = ctx->next; + break; + } + } +} + +static struct event_set *find_event_set(struct context *ctx, int event_set_id) +{ + struct event_set *evt; + + for (evt = ctx->event_sets; evt; evt = evt->next) { + if (evt->id == event_set_id) { + break; + } + } + + return evt; +} + +static void insert_event_set(struct context *ctx, struct event_set *evt) +{ + struct event_set **next_evt; + + for (next_evt = &ctx->event_sets; + *next_evt && (*next_evt)->id < evt->id; + next_evt = &((*next_evt)->next)) { + ; + } + + evt->next = *next_evt; + *next_evt = evt; +} + +static void remove_event_set(struct context *ctx, struct event_set *evt) +{ + struct event_set **next_evt; + + for (next_evt = &ctx->event_sets; + *next_evt; + next_evt = &((*next_evt)->next)) { + if (*next_evt == evt) { + *next_evt = evt->next; + break; + } + } +} + +static struct program *find_program(int program_id) +{ + struct program *prog; + + for (prog = programs; prog; prog = prog->next) { + if (prog->id == program_id) { + break; + } + } + + return prog; +} + +static void insert_program(struct program *prog) +{ + struct program **next_prog; + + for (next_prog = &programs; + *next_prog && (*next_prog)->id < prog->id; + next_prog = &((*next_prog)->next)) { + ; + } + + prog->next = *next_prog; + *next_prog = prog; +} + +static void remove_program(struct program *prog) +{ + struct program **next_prog; + + for (next_prog = &programs; + *next_prog; + next_prog = &((*next_prog)->next)) { + if (*next_prog == prog) { + *next_prog = prog->next; + break; + } + } +} + +/** + * set_affinity + * + * When loading or unloading a system-wide context, we must pin the pfmsetup + * process to that CPU before making the system call. Also, get the current + * affinity and return it to the caller so we can change it back later. + **/ +static int set_affinity(int cpu, cpu_set_t *old_cpu_set) +{ + cpu_set_t new_cpu_set; + int rc; + + rc = sched_getaffinity(0, sizeof(*old_cpu_set), old_cpu_set); + if (rc) { + rc = errno; + LOG_ERROR("Can't get current process affinity mask: %d\n", rc); + return rc; + } + + CPU_ZERO(&new_cpu_set); + CPU_SET(cpu, &new_cpu_set); + rc = sched_setaffinity(0, sizeof(new_cpu_set), &new_cpu_set); + if (rc) { + rc = errno; + LOG_ERROR("Can't set process affinity to CPU %d: %d\n", cpu, rc); + return rc; + } + + return 0; +} + +/** + * revert_affinity + * + * Reset the process affinity to the specified mask. + **/ +static void revert_affinity(cpu_set_t *old_cpu_set) +{ + int rc; + + rc = sched_setaffinity(0, sizeof(*old_cpu_set), old_cpu_set); + if (rc) { + /* Not a fatal error if we can't reset the affinity. */ + LOG_INFO("Can't revert process affinity to original value.\n"); + } +} + +/** + * create_context + * + * Arguments: [options] + * Options: --system + * --no-overflow-msg + * --block-on-notify + * --sampler + * + * Call the pfm_create_context system-call to create a new perfmon context. + * Add a new entry to the global 'contexts' list. + **/ +static int create_context(int argc, char **argv) +{ + pfarg_ctx_t ctx_arg; + pfm_dfl_smpl_arg_t smpl_arg; + struct context *new_ctx = NULL; + char *sampler_name = NULL; + void *smpl_p; + int no_overflow_msg = FALSE; + int block_on_notify = FALSE; + int system_wide = FALSE; + int c, ctx_id = 0; + int rc; + size_t sz; + + struct option long_opts[] = { + {"sampler", required_argument, NULL, 1}, + {"system", no_argument, NULL, 2}, + {"no-overflow-msg", no_argument, NULL, 3}, + {"block-on-notify", no_argument, NULL, 4}, + {NULL, 0, NULL, 0} }; + + memset(&ctx_arg, 0, sizeof(ctx_arg)); + + opterr = 0; + optind = 0; + while ((c = getopt_long_only(argc, argv, "", + long_opts, NULL)) != EOF) { + switch (c) { + case 1: + sampler_name = optarg; + break; + case 2: + system_wide = TRUE; + break; + case 3: + no_overflow_msg = TRUE; + break; + case 4: + block_on_notify = TRUE; + break; + default: + LOG_ERROR("invalid option: %c", optopt); + rc = EINVAL; + goto error; + } + } + + if (argc < optind + 1) { + USAGE("create_context [options] "); + rc = EINVAL; + goto error; + } + + ctx_id = strtoul(argv[optind], NULL, 0); + if (ctx_id <= 0) { + LOG_ERROR("Invalid context ID (%s). Must be a positive " + "integer.", argv[optind]); + rc = EINVAL; + goto error; + } + + /* Make sure we don't already have a context with this ID. */ + new_ctx = find_context(ctx_id); + if (new_ctx) { + LOG_ERROR("Context with ID %d already exists.", ctx_id); + rc = EINVAL; + goto error; + } + + if (sampler_name) { + smpl_arg.buf_size = getpagesize(); + smpl_p = &smpl_arg; + sz = sizeof(smpl_arg); + } else { + smpl_p = NULL; + sz = 0; + } + + ctx_arg.ctx_flags = (system_wide ? PFM_FL_SYSTEM_WIDE : 0) | + (no_overflow_msg ? PFM_FL_OVFL_NO_MSG : 0) | + (block_on_notify ? PFM_FL_NOTIFY_BLOCK : 0); + + rc = pfm_create_context(&ctx_arg, sampler_name, smpl_p, sz); + if (rc == -1) { + rc = errno; + LOG_ERROR("pfm_create_context system call returned " + "an error: %d.", rc); + goto error; + } + + /* Allocate and initialize a new context structure and add it to the + * global list. Every new context automatically gets one event_set + * with an event ID of 0. + */ + new_ctx = calloc(1, sizeof(*new_ctx)); + if (!new_ctx) { + LOG_ERROR("Can't allocate structure for new context %d.", + ctx_id); + rc = ENOMEM; + goto error; + } + + new_ctx->event_sets = calloc(1, sizeof(*(new_ctx->event_sets))); + if (!new_ctx->event_sets) { + LOG_ERROR("Can't allocate event-set structure for new " + "context %d.", ctx_id); + rc = ENOMEM; + goto error; + } + + new_ctx->id = ctx_id; + new_ctx->fd = rc; + new_ctx->cpu = -1; + new_ctx->ctx_arg = ctx_arg; + new_ctx->smpl_arg = smpl_arg; + + insert_context(new_ctx); + + LOG_INFO("Created context %d with file-descriptor %d.", + new_ctx->id, new_ctx->fd); + + return 0; + +error: + if (new_ctx) { + close(new_ctx->fd); + free(new_ctx->event_sets); + free(new_ctx); + } + return rc; +} + +/** + * load_context + * + * Arguments: + * + * Call the pfm_load_context system-call to load a perfmon context into the + * system's performance monitoring unit. + **/ +static int load_context(int argc, char **argv) +{ + struct context *ctx; + struct event_set *evt; + struct program *prog; + pfarg_load_t load_arg; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id, program_id; + int system_wide, rc; + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + program_id = strtoul(argv[3], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0 || program_id < 0) { + LOG_ERROR("context ID, event-set ID, and program/CPU ID must " + "be positive integers."); + return EINVAL; + } + + /* Find the context, event_set, and program in the global lists. */ + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + load_arg.load_set = evt->id; + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide) { + if (ctx->cpu >= 0) { + LOG_ERROR("Trying to load context %d which is already " + "loaded on CPU %d.\n", ctx_id, ctx->cpu); + return EBUSY; + } + + rc = set_affinity(program_id, &old_cpu_set); + if (rc) { + return rc; + } + + /* Specify the CPU as the PID. */ + load_arg.load_pid = program_id; + } else { + prog = find_program(program_id); + if (!prog) { + LOG_ERROR("Can't find program with ID %d.", program_id); + return EINVAL; + } + load_arg.load_pid = prog->pid; + } + + rc = pfm_load_context(ctx->fd, &load_arg); + if (rc) { + rc = errno; + LOG_ERROR("pfm_load_context system call returned " + "an error: %d.", rc); + return rc; + } + + if (system_wide) { + /* Keep track of which CPU this context is loaded on. */ + ctx->cpu = program_id; + + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Loaded context %d, event-set %d onto %s %d.", + ctx_id, event_set_id, system_wide ? "cpu" : "program", + program_id); + + return 0; +} + +/** + * unload_context + * + * Arguments: + * + * Call the pfm_unload_context system-call to unload a perfmon context from + * the system's performance monitoring unit. + **/ +static int unload_context(int argc, char **argv) +{ + struct context *ctx; + cpu_set_t old_cpu_set; + int system_wide; + int ctx_id; + int rc; + + ctx_id = strtoul(argv[1], NULL, 0); + if (ctx_id <= 0) { + LOG_ERROR("context ID must be a positive integer."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide) { + if (ctx->cpu < 0) { + /* This context isn't loaded on any CPU. */ + LOG_ERROR("Trying to unload context %d that isn't " + "loaded.\n", ctx_id); + return EINVAL; + } + + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_unload_context(ctx->fd); + if (rc) { + rc = errno; + LOG_ERROR("pfm_unload_context system call returned " + "an error: %d.", rc); + return rc; + } + + if (system_wide) { + ctx->cpu = -1; + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Unloaded context %d.", ctx_id); + + return 0; +} + +/** + * close_context + * + * Arguments: + * + * Close the context's file descriptor, remove it from the global list, and + * free the context data structures. + **/ +static int close_context(int argc, char **argv) +{ + struct context *ctx; + struct event_set *evt, *next_evt; + int ctx_id; + + ctx_id = strtoul(argv[1], NULL, 0); + if (ctx_id <= 0) { + LOG_ERROR("context ID must be a positive integer."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + /* There's no perfmon system-call to delete a context. We simply call + * close on the file handle. + */ + close(ctx->fd); + remove_context(ctx); + + for (evt = ctx->event_sets; evt; evt = next_evt) { + next_evt = evt->next; + free(evt); + } + free(ctx); + + LOG_INFO("Closed and freed context %d.", ctx_id); + + return 0; +} + +/** + * write_pmc + * + * Arguments: < >+ + * + * Write values to one or more control registers. + **/ +static int write_pmc(int argc, char **argv) +{ + struct context *ctx; + struct event_set *evt; + pfarg_pmc_t *pmc_args = NULL; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int pmc_id, num_pmcs; + unsigned long long pmc_value; + int system_wide, i, rc; + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + /* Allocate an array of PMC structures. */ + num_pmcs = (argc - 3) / 2; + pmc_args = calloc(num_pmcs, sizeof(*pmc_args)); + if (!pmc_args) { + LOG_ERROR("Can't allocate PMC argument array."); + return ENOMEM; + } + + for (i = 0; i < num_pmcs; i++) { + pmc_id = strtoul(argv[3 + i*2], NULL, 0); + pmc_value = strtoull(argv[4 + i*2], NULL, 0); + + if (pmc_id < 0) { + LOG_ERROR("PMC ID must be a positive integer."); + rc = EINVAL; + goto out; + } + + pmc_args[i].reg_num = pmc_id; + pmc_args[i].reg_set = evt->id; + pmc_args[i].reg_value = pmc_value; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + goto out; + } + } + + rc = pfm_write_pmcs(ctx->fd, pmc_args, num_pmcs); + if (rc) { + rc = errno; + LOG_ERROR("pfm_write_pmcs system call returned " + "an error: %d.", rc); + goto out; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } +out: + free(pmc_args); + return rc; +} + +/** + * write_pmd + * + * Arguments: < >+ + * + * FIXME: Add options for other fields in pfarg_pmd_t. + **/ +static int write_pmd(int argc, char **argv) +{ + struct context *ctx; + struct event_set *evt; + pfarg_pmd_t *pmd_args = NULL; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int pmd_id, num_pmds; + unsigned long long pmd_value; + int system_wide, i, rc; + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + /* Allocate an array of PMD structures. */ + num_pmds = (argc - 3) / 2; + pmd_args = calloc(num_pmds, sizeof(*pmd_args)); + if (!pmd_args) { + LOG_ERROR("Can't allocate PMD argument array."); + return ENOMEM; + } + + for (i = 0; i < num_pmds; i++) { + pmd_id = strtoul(argv[3 + i*2], NULL, 0); + pmd_value = strtoull(argv[4 + i*2], NULL, 0); + + if (pmd_id < 0) { + LOG_ERROR("PMD ID must be a positive integer."); + rc = EINVAL; + goto out; + } + + pmd_args[i].reg_num = pmd_id; + pmd_args[i].reg_set = evt->id; + pmd_args[i].reg_value = pmd_value; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + goto out; + } + } + + rc = pfm_write_pmds(ctx->fd, pmd_args, num_pmds); + if (rc) { + rc = errno; + LOG_ERROR("pfm_write_pmds system call returned " + "an error: %d.", rc); + goto out; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } +out: + free(pmd_args); + return rc; +} + +/** + * read_pmd + * + * Arguments: + + * + * FIXME: Add options for other fields in pfarg_pmd_t. + **/ +static int read_pmd(int argc, char **argv) +{ + struct context *ctx; + struct event_set *evt; + pfarg_pmd_t *pmd_args = NULL; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int pmd_id, num_pmds; + int system_wide, i, rc; + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + /* Allocate an array of PMD structures. */ + num_pmds = argc - 3; + pmd_args = calloc(num_pmds, sizeof(*pmd_args)); + if (!pmd_args) { + LOG_ERROR("Can't allocate PMD argument array."); + return ENOMEM; + } + + for (i = 0; i < num_pmds; i++) { + pmd_id = strtoul(argv[3 + i], NULL, 0); + if (pmd_id < 0) { + LOG_ERROR("PMD ID must be a positive integer."); + rc = EINVAL; + goto out; + } + + pmd_args[i].reg_num = pmd_id; + pmd_args[i].reg_set = evt->id; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + goto out; + } + } + + rc = pfm_read_pmds(ctx->fd, pmd_args, num_pmds); + if (rc) { + rc = errno; + LOG_ERROR("pfm_read_pmds system call returned " + "an error: %d.", rc); + goto out; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } +out: + free(pmd_args); + return rc; +} + +/** + * start_counting + * + * Arguments: + * + * Call the pfm_start system-call to start counting for a perfmon context + * that was previously stopped. + **/ +static int start_counting(int argc, char **argv) +{ + pfarg_start_t start_arg; + struct context *ctx; + struct event_set *evt; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int system_wide, rc; + + memset(&start_arg, 0, sizeof(start_arg)); + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + start_arg.start_set = evt->id; + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_start(ctx->fd, &start_arg); + if (rc) { + rc = errno; + LOG_ERROR("pfm_start system call returned an error: %d.", rc); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Started counting for context %d, event-set %d.", + ctx_id, event_set_id); + + return 0; +} + +/** + * stop_counting + * + * Arguments: + * + * Call the pfm_stop system-call to stop counting for a perfmon context that + * was previously loaded. + **/ +static int stop_counting(int argc, char **argv) +{ + struct context *ctx; + cpu_set_t old_cpu_set; + int system_wide; + int ctx_id; + int rc; + + ctx_id = strtoul(argv[1], NULL, 0); + + if (ctx_id <= 0) { + LOG_ERROR("context ID must be a positive integer."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_stop(ctx->fd); + if (rc) { + rc = errno; + LOG_ERROR("pfm_stop system call returned an error: %d.", rc); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Stopped counting for context %d.", ctx_id); + + return 0; +} + +/** + * restart_counting + * + * Arguments: + * + * Call the pfm_restart system-call to clear the data counters and start + * counting from zero for a perfmon context that was previously loaded. + **/ +static int restart_counting(int argc, char **argv) +{ + struct context *ctx; + cpu_set_t old_cpu_set; + int system_wide; + int ctx_id; + int rc; + + ctx_id = strtoul(argv[1], NULL, 0); + + if (ctx_id <= 0) { + LOG_ERROR("context ID must be a positive integer."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_restart(ctx->fd); + if (rc) { + rc = errno; + LOG_ERROR("pfm_restart system call returned an error: %d.", rc); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Restarted counting for context %d.", ctx_id); + + return 0; +} + +/** + * create_eventset + * + * Arguments: [options] + * Options: --timeout + * --switch-on-overflow + * --exclude-idle + **/ +static int create_eventset(int argc, char **argv) +{ + pfarg_setdesc_t set_arg; + struct context *ctx; + struct event_set *evt; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + unsigned long timeout = 0; + int switch_on_overflow = FALSE; + int switch_on_timeout = FALSE; + int exclude_idle = FALSE; + int new_set = FALSE; + int system_wide,c, rc; + struct option long_opts[] = { + {"next-set", required_argument, NULL, 1}, + {"timeout", required_argument, NULL, 2}, + {"switch-on-overflow", no_argument, NULL, 3}, + {"exclude-idle", no_argument, NULL, 4}, + {NULL, 0, NULL, 0} }; + + memset(&set_arg, 0, sizeof(set_arg)); + + opterr = 0; + optind = 0; + while ((c = getopt_long_only(argc, argv, "", + long_opts, NULL)) != EOF) { + switch (c) { + case 1: + timeout = strtoul(optarg, NULL, 0); + if (!timeout) { + LOG_ERROR("timeout must be a " + "non-zero integer."); + return EINVAL; + } + switch_on_timeout = TRUE; + break; + case 2: + switch_on_overflow = TRUE; + break; + case 3: + exclude_idle = TRUE; + break; + default: + LOG_ERROR("invalid option: %c", optopt); + return EINVAL; + } + } + (void) exclude_idle; + + if (argc < optind + 2) { + USAGE("create_eventset [options] "); + return EINVAL; + } + + ctx_id = strtoul(argv[optind], NULL, 0); + event_set_id = strtoul(argv[optind+1], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + if (switch_on_timeout && switch_on_overflow) { + LOG_ERROR("Cannot switch set %d (context %d) on both " + "timeout and overflow.", event_set_id, ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + evt = calloc(1, sizeof(*evt)); + if (!evt) { + LOG_ERROR("Can't allocate structure for new event-set " + "%d in context %d.", event_set_id, ctx_id); + return ENOMEM; + } + evt->id = event_set_id; + new_set = TRUE; + } + + set_arg.set_id = event_set_id; + set_arg.set_timeout = timeout; /* in nanseconds */ + set_arg.set_flags = (switch_on_overflow ? PFM_SETFL_OVFL_SWITCH : 0) | + (switch_on_timeout ? PFM_SETFL_TIME_SWITCH : 0); + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + free(evt); + return rc; + } + } + + rc = pfm_create_evtsets(ctx->fd, &set_arg, 1); + if (rc) { + rc = errno; + LOG_ERROR("pfm_create_evtsets system call returned " + "an error: %d.", rc); + free(evt); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + if (new_set) { + insert_event_set(ctx, evt); + } + + LOG_INFO("%s event-set %d in context %d.", + new_set ? "Created" : "Modified", event_set_id, ctx_id); + if (switch_on_timeout) { + LOG_INFO(" Actual timeout set to %llu ns.", + (unsigned long long)set_arg.set_timeout); + } + + return 0; +} + +/** + * delete_eventset + * + * Arguments: + **/ +static int delete_eventset(int argc, char **argv) +{ + pfarg_setdesc_t set_arg; + struct context *ctx; + struct event_set *evt; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int system_wide, rc; + + memset(&set_arg, 0, sizeof(set_arg)); + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + set_arg.set_id = evt->id; + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_delete_evtsets(ctx->fd, &set_arg, 1); + if (rc) { + rc = errno; + LOG_ERROR("pfm_delete_evtsets system call returned " + "an error: %d.", rc); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + remove_event_set(ctx, evt); + free(evt); + + LOG_INFO("Deleted event-set %d from context %d.", event_set_id, ctx_id); + + return 0; +} + +/** + * getinfo_eventset + * + * Arguments: + **/ +static int getinfo_eventset(int argc, char **argv) +{ + pfarg_setinfo_t set_arg; + struct context *ctx; + struct event_set *evt; + cpu_set_t old_cpu_set; + int ctx_id, event_set_id; + int system_wide, rc; + + memset(&set_arg, 0, sizeof(set_arg)); + + ctx_id = strtoul(argv[1], NULL, 0); + event_set_id = strtoul(argv[2], NULL, 0); + + if (ctx_id <= 0 || event_set_id < 0) { + LOG_ERROR("context ID and event-set ID must be " + "positive integers."); + return EINVAL; + } + + ctx = find_context(ctx_id); + if (!ctx) { + LOG_ERROR("Can't find context with ID %d.", ctx_id); + return EINVAL; + } + + evt = find_event_set(ctx, event_set_id); + if (!evt) { + LOG_ERROR("Can't find event-set with ID %d in context %d.", + event_set_id, ctx_id); + return EINVAL; + } + + set_arg.set_id = evt->id; + + system_wide = ctx->ctx_arg.ctx_flags & PFM_FL_SYSTEM_WIDE; + if (system_wide && ctx->cpu >= 0) { + rc = set_affinity(ctx->cpu, &old_cpu_set); + if (rc) { + return rc; + } + } + + rc = pfm_getinfo_evtsets(ctx->fd, &set_arg, 1); + if (rc) { + rc = errno; + LOG_ERROR("pfm_getinfo_evtsets system call returned " + "an error: %d.", rc); + return rc; + } + + if (system_wide && ctx->cpu >= 0) { + revert_affinity(&old_cpu_set); + } + + LOG_INFO("Got info for event-set %d in context %d.", event_set_id, ctx_id); + LOG_INFO(" Flags: 0x%x", set_arg.set_flags); + LOG_INFO(" Runs: %llu", (unsigned long long)set_arg.set_runs); + LOG_INFO(" Timeout: %"PRIu64, set_arg.set_timeout); + + return 0; +} + +/** + * run_program + * + * Arguments: + * + * Start the specified program. After fork'ing but before exec'ing, ptrace + * the child so it will remain suspended until a corresponding resume_program + * command. We do this so we can load a context for the program before it + * actually starts running. This logic is taken from the task.c example in + * the libpfm source code tree. + **/ +static int run_program(int argc, char **argv) +{ + struct program *prog; + int program_id; + pid_t pid; + int rc; + + program_id = strtoul(argv[1], NULL, 0); + if (program_id <= 0) { + LOG_ERROR("program ID must be a positive integer."); + return EINVAL; + } + + /* Make sure we haven't already started a program with this ID. */ + prog = find_program(program_id); + if (prog) { + LOG_ERROR("Program with ID %d already exists.", program_id); + return EINVAL; + } + + prog = calloc(1, sizeof(*prog)); + if (!prog) { + LOG_ERROR("Can't allocate new program structure to run '%s'.", + argv[2]); + return ENOMEM; + } + + prog->id = program_id; + + pid = fork(); + if (pid == -1) { + /* Error fork'ing. */ + LOG_ERROR("Unable to fork child process."); + return EINVAL; + + } else if (!pid) { + /* Child */ + + /* This will cause the program to stop before executing the + * first user level instruction. We can only load a context + * if the program is in the STOPPED state. This child + * process will sit here until we've process a resume_program + * command. + */ + rc = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + if (rc) { + rc = errno; + LOG_ERROR("Error ptrace'ing '%s': %d", argv[2], rc); + exit(rc); + } + + execvp(argv[2], argv + 2); + + rc = errno; + LOG_ERROR("Error exec'ing '%s': %d", argv[2], rc); + exit(rc); + } + + /* Parent */ + prog->pid = pid; + insert_program(prog); + + /* Wait for the child to exec. */ + waitpid(pid, &rc, WUNTRACED); + + /* Check if process exited early. */ + if (WIFEXITED(rc)) { + LOG_ERROR("Program '%s' exited too early with status " + "%d", argv[2], WEXITSTATUS(rc)); + return WEXITSTATUS(rc); + } + + LOG_INFO("Started program %d: '%s'.", program_id, argv[2]); + + return 0; +} + +/** + * resume_program + * + * Arguments: + * + * A program started with run_program must be 'resumed' before it actually + * begins running. This allows us to load a context to the process and + * start the counters before the program executes any code. + **/ +static int resume_program(int argc, char **argv) +{ + struct program *prog; + int program_id; + int rc; + + program_id = strtoul(argv[1], NULL, 0); + if (program_id <= 0) { + LOG_ERROR("program ID must be a positive integer."); + return EINVAL; + } + + prog = find_program(program_id); + if (!prog) { + LOG_ERROR("Can't find program with ID %d.", program_id); + return EINVAL; + } + + /* Call ptrace to resume execution of the process. If a context has + * been loaded and the counters started, this is where monitoring + * is effectively activated. + */ + rc = ptrace(PTRACE_DETACH, prog->pid, NULL, 0); + if (rc) { + rc = errno; + LOG_ERROR("Error detaching program %d.\n", prog->id); + return rc; + } + + LOG_INFO("Resumed program %d.", program_id); + + return 0; +} + +/** + * wait_on_program + * + * Arguments: + * + * Wait for the specified program to complete and exit. + **/ +static int wait_on_program(int argc, char **argv) +{ + struct program *prog; + int program_id; + int rc; + + program_id = strtoul(argv[1], NULL, 0); + if (program_id <= 0) { + LOG_ERROR("program ID must be a positive integer."); + return EINVAL; + } + + prog = find_program(program_id); + if (!prog) { + LOG_ERROR("Can't find program with ID %d.", program_id); + return EINVAL; + } + + waitpid(prog->pid, &rc, 0); + + /* The program has exitted, but if there was a context loaded on that + * process, it will still have the latest counts available to read. + */ + + remove_program(prog); + free(prog); + + LOG_INFO("Waited for program %d to complete.", program_id); + + return 0; +} + +/** + * _sleep + * + * Arguments: