/* $Id: p4.c,v 1.7 2004/02/20 21:33:25 mikpe Exp $
*
* pipe stdout through 'sort -u' to see:
* - which ESCRs are usable, and the events they support
* - which COUNTERs/CCCRs are usable, and the usable ESCRs they support
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define ARRAY_SIZE(A) (sizeof(A)/sizeof((A)[0]))
/*
* The 18 40-bit counters.
*/
struct counter {
const char name[16];
};
static const struct counter counters[18] = {
/* nr name available ESCRs */
[ 0] { "BPU_COUNTER0" }, /* {BPU,BSU,FSB,ITLB,MOB,PMH}_ESCR0 */
[ 1] { "BPU_COUNTER1" }, /* {BPU,BSU,FSB,ITLB,MOB,PMH}_ESCR0 */
[ 2] { "BPU_COUNTER2" }, /* {BPU,BSU,FSB,ITLB,MOB,PMH}_ESCR1 */
[ 3] { "BPU_COUNTER3" }, /* {BPU,BSU,FSB,ITLB,MOB,PMH}_ESCR1 */
[ 4] { "MS_COUNTER0" }, /* {MS,TBPU,TC}_ESCR0 */
[ 5] { "MS_COUNTER1" }, /* {MS,TBPU,TC}_ESCR0 */
[ 6] { "MS_COUNTER2" }, /* {MS,TBPU,TC}_ESCR1 */
[ 7] { "MS_COUNTER3" }, /* {MS,TBPU,TC}_ESCR1 */
[ 8] { "FLAME_COUNTER0" }, /* {DAC,FIRM,SAAT}_ESCR0 */
[ 9] { "FLAME_COUNTER1" }, /* {DAC,FIRM,SAAT}_ESCR0 */
[10] { "FLAME_COUNTER2" }, /* {DAC,FIRM,SAAT}_ESCR1 */
[11] { "FLAME_COUNTER3" }, /* {DAC,FIRM,SAAT}_ESCR1 */
[12] { "IQ_COUNTER0" }, /* ALF_ESCR0, CRU_ESCR0, CRU_ESCR2, RAT_ESCR0 */
[13] { "IQ_COUNTER1" }, /* ALF_ESCR0, CRU_ESCR0, CRU_ESCR2, RAT_ESCR0 */
[14] { "IQ_COUNTER2" }, /* ALF_ESCR1, CRU_ESCR1, CRU_ESCR3, RAT_ESCR1 */
[15] { "IQ_COUNTER3" }, /* ALF_ESCR1, CRU_ESCR1, CRU_ESCR3, RAT_ESCR1 */
[16] { "IQ_COUNTER4" }, /* ALF_ESCR0, CRU_ESCR0, CRU_ESCR2, RAT_ESCR0 */
[17] { "IQ_COUNTER5" }, /* ALF_ESCR0, CRU_ESCR1, CRU_ESCR3, RAT_ESCR1 */
};
static unsigned int counter_msr(unsigned int counter_num)
{
assert(counter_num < ARRAY_SIZE(counters));
return 0x300 + counter_num;
}
static const char *counter_name(unsigned int counter_num)
{
assert(counter_num < ARRAY_SIZE(counters));
return counters[counter_num].name;
}
/*
* The 18 counter configuration control registers (CCCRs).
* They are in a one-to-one relation with the counters.
*/
struct cccr {
const char name[16];
};
static const struct cccr cccrs[18] = {
[ 0] { "BPU_CCCR0" },
[ 1] { "BPU_CCCR1" },
[ 2] { "BPU_CCCR2" },
[ 3] { "BPU_CCCR3" },
[ 4] { "MS_CCCR0" },
[ 5] { "MS_CCCR1" },
[ 6] { "MS_CCCR2" },
[ 7] { "MS_CCCR3" },
[ 8] { "FLAME_CCCR0" },
[ 9] { "FLAME_CCCR1" },
[10] { "FLAME_CCCR2" },
[11] { "FLAME_CCCR3" },
[12] { "IQ_CCCR0" },
[13] { "IQ_CCCR1" },
[14] { "IQ_CCCR2" },
[15] { "IQ_CCCR3" },
[16] { "IQ_CCCR4" },
[17] { "IQ_CCCR5" },
};
static unsigned int cccr_msr(unsigned int cccr_num)
{
assert(cccr_num < ARRAY_SIZE(cccrs));
return 0x360 + cccr_num;
}
static const char *cccr_name(unsigned int cccr_num)
{
assert(cccr_num < ARRAY_SIZE(cccrs));
return cccrs[cccr_num].name;
}
/*
* The 45 event selection control registers (ESCRs).
*/
enum escr_num {
BSU_ESCR0, /* BSQ_allocation, BSQ_cache_reference */
BSU_ESCR1, /* bsq_active_entries, BSQ_cache_reference */
FSB_ESCR0, /* FSB_data_activity, IOQ_allocation, b2b_cycles, bnr, global_power_events, response, snoopt */
FSB_ESCR1, /* FSB_data_activity, IOQ_active_entries, IOQ_allocation, b2b_cycles, bnr, global_power_events, response, snoop */
FIRM_ESCR0, /* 128bit_MMX_uop, 64bit_MMX_uop, SSE_input_assist, packed_DP_uop, packed_SP_uop, scalar_DP_uop, scalar_SP_uop, x87_FP_uop, x86_SIMD_moves_uop */
FIRM_ESCR1, /* 128bit_MMX_uop, 64bit_MMX_uop, SSE_input_assist, packed_DP_uop, packed_SP_uop, scalar_DP_uop, scalar_SP_uop, x87_FP_uop, x86_SIMD_moves_uop */
FLAME_ESCR0,/* UNUSED */
FLAME_ESCR1,/* UNUSED */
DAC_ESCR0, /* WC_Buffer, memory_cancel */
DAC_ESCR1, /* WC_Buffer, memory_cancel */
MOB_ESCR0, /* MOB_load_replay */
MOB_ESCR1, /* MOB_load_replay */
PMH_ESCR0, /* page_walk_type */
PMH_ESCR1, /* page_walk_type */
SAAT_ESCR0, /* load_port_replay, memory_complete, store_port_replay */
SAAT_ESCR1, /* load_port_replay, memory_complete, store_port_replay */
U2L_ESCR0, /* UNUSED */
U2L_ESCR1, /* UNUSED */
BPU_ESCR0, /* BPU_fetch_request */
BPU_ESCR1, /* BPU_fetch_request */
IS_ESCR0, /* UNUSED */
IS_ESCR1, /* UNUSED */
ITLB_ESCR0, /* ITLB_reference */
ITLB_ESCR1, /* ITLB_reference */
CRU_ESCR0, /* instr_retired, mispred_branch_retired, uops_retired, instr_completed */
CRU_ESCR1, /* instr_retired, mispred_branch_retired, uops_retired, instr_completed */
IQ_ESCR0, /* UNUSED; available in family 0x0F models 1 and 2, removed from later models */
IQ_ESCR1, /* UNUSED; available in family 0x0F models 1 and 2, removed from later models */
RAT_ESCR0, /* uop_type */
RAT_ESCR1, /* uop_type */
SSU_ESCR0, /* UNUSED */
MS_ESCR0, /* tc_ms_xfer, uop_queue_writes */
MS_ESCR1, /* tc_ms_xfer, uop_queue_writes */
TBPU_ESCR0, /* retired_branch_type, retired_mispred_branch_type */
TBPU_ESCR1, /* retired_branch_type, retired_mispred_branch_type */
TC_ESCR0, /* TC_deliver_mode, TC_misc */
TC_ESCR1, /* TC_deliver_mode, TC_misc */
IX_ESCR0, /* UNUSED */
IX_ESCR1, /* UNUSED */
ALF_ESCR0, /* resource_stall */
ALF_ESCR1, /* resource_stall */
CRU_ESCR2, /* branch_retired, execution_event, front_end_event, machine_clear, replay_event, x87_assist */
CRU_ESCR3, /* branch_retired, execution_event, front_end_event, machine_clear, replay_event, x87_assist */
CRU_ESCR4, /* UNUSED */
CRU_ESCR5, /* UNUSED */
};
struct escr {
const char name[16];
};
static const struct escr escrs[45] = {
[BSU_ESCR0] { "BSU_ESCR0" },
[BSU_ESCR1] { "BSU_ESCR1" },
[FSB_ESCR0] { "FSB_ESCR0" },
[FSB_ESCR1] { "FSB_ESCR1" },
[FIRM_ESCR0] { "FIRM_ESCR0" },
[FIRM_ESCR1] { "FIRM_ESCR1" },
[FLAME_ESCR0] { "FLAME_ESCR0" },
[FLAME_ESCR1] { "FLAME_ESCR1" },
[DAC_ESCR0] { "DAC_ESCR0" },
[DAC_ESCR1] { "DAC_ESCR1" },
[MOB_ESCR0] { "MOB_ESCR0" },
[MOB_ESCR1] { "MOB_ESCR1" },
[PMH_ESCR0] { "PMH_ESCR0" },
[PMH_ESCR1] { "PMH_ESCR1" },
[SAAT_ESCR0] { "SAAT_ESCR0" },
[SAAT_ESCR1] { "SAAT_ESCR1" },
[U2L_ESCR0] { "U2L_ESCR0" },
[U2L_ESCR1] { "U2L_ESCR1" },
[BPU_ESCR0] { "BPU_ESCR0" },
[BPU_ESCR1] { "BPU_ESCR1" },
[IS_ESCR0] { "IS_ESCR0" },
[IS_ESCR1] { "IS_ESCR1" },
[ITLB_ESCR0] { "ITLB_ESCR0" },
[ITLB_ESCR1] { "ITLB_ESCR1" },
[CRU_ESCR0] { "CRU_ESCR0" },
[CRU_ESCR1] { "CRU_ESCR1" },
[IQ_ESCR0] { "IQ_ESCR0" },
[IQ_ESCR1] { "IQ_ESCR1" },
[RAT_ESCR0] { "RAT_ESCR0" },
[RAT_ESCR1] { "RAT_ESCR1" },
[SSU_ESCR0] { "SSU_ESCR0" },
[MS_ESCR0] { "MS_ESCR0" },
[MS_ESCR1] { "MS_ESCR1" },
[TBPU_ESCR0] { "TBPU_ESCR0" },
[TBPU_ESCR1] { "TBPU_ESCR1" },
[TC_ESCR0] { "TC_ESCR0" },
[TC_ESCR1] { "TC_ESCR1" },
[IX_ESCR0] { "IX_ESCR0" },
[IX_ESCR1] { "IX_ESCR1" },
[ALF_ESCR0] { "ALF_ESCR0" },
[ALF_ESCR1] { "ALF_ESCR1" },
[CRU_ESCR2] { "CRU_ESCR2" },
[CRU_ESCR3] { "CRU_ESCR3" },
[CRU_ESCR4] { "CRU_ESCR4" },
[CRU_ESCR5] { "CRU_ESCR5" },
};
static unsigned int escr_msr(enum escr_num escr_num)
{
assert(escr_num < ARRAY_SIZE(escrs));
if( escr_num >= CRU_ESCR4 )
return 0x3E0 + (escr_num - CRU_ESCR4);
if( escr_num >= IX_ESCR0 )
return 0x3C8 + (escr_num - IX_ESCR0);
if( escr_num >= MS_ESCR0 )
return 0x3C0 + (escr_num - MS_ESCR0);
return 0x3A0 + escr_num;
}
static const char *escr_name(enum escr_num escr_num)
{
assert(escr_num < ARRAY_SIZE(escrs));
return escrs[escr_num].name;
}
/*
* The map from CCCR number and ESCR select value to ESCR MSR address.
* This is the manual's original uncompacted table.
*/
static const unsigned short p4_cccr_escr_map_orig[18][8] = {
[0x00] { [7] 0x3A0,
[6] 0x3A2,
[2] 0x3AA,
[4] 0x3AC,
[0] 0x3B2,
[1] 0x3B4,
[3] 0x3B6,
[5] 0x3C8, },
[0x01] { [7] 0x3A0,
[6] 0x3A2,
[2] 0x3AA,
[4] 0x3AC,
[0] 0x3B2,
[1] 0x3B4,
[3] 0x3B6,
[5] 0x3C8, },
[0x02] { [7] 0x3A1,
[6] 0x3A3,
[2] 0x3AB,
[4] 0x3AD,
[0] 0x3B3,
[1] 0x3B5,
[3] 0x3B7,
[5] 0x3C9, },
[0x03] { [7] 0x3A1,
[6] 0x3A3,
[2] 0x3AB,
[4] 0x3AD,
[0] 0x3B3,
[1] 0x3B5,
[3] 0x3B7,
[5] 0x3C9, },
[0x04] { [0] 0x3C0,
[2] 0x3C2,
[1] 0x3C4, },
[0x05] { [0] 0x3C0,
[2] 0x3C2,
[1] 0x3C4, },
[0x06] { [0] 0x3C1,
[2] 0x3C3,
[1] 0x3C5, },
[0x07] { [0] 0x3C1,
[2] 0x3C3,
[1] 0x3C5, },
[0x08] { [1] 0x3A4,
[0] 0x3A6,
[5] 0x3A8,
[2] 0x3AE,
[3] 0x3B0, },
[0x09] { [1] 0x3A4,
[0] 0x3A6,
[5] 0x3A8,
[2] 0x3AE,
[3] 0x3B0, },
[0x0A] { [1] 0x3A5,
[0] 0x3A7,
[5] 0x3A9,
[2] 0x3AF,
[3] 0x3B1, },
[0x0B] { [1] 0x3A5,
[0] 0x3A7,
[5] 0x3A9,
[2] 0x3AF,
[3] 0x3B1, },
[0x0C] { [4] 0x3B8,
[5] 0x3CC,
[6] 0x3E0,
[0] 0x3BA,
[2] 0x3BC,
[3] 0x3BE,
[1] 0x3CA, },
[0x0D] { [4] 0x3B8,
[5] 0x3CC,
[6] 0x3E0,
[0] 0x3BA,
[2] 0x3BC,
[3] 0x3BE,
[1] 0x3CA, },
[0x0E] { [4] 0x3B9,
[5] 0x3CD,
[6] 0x3E1,
[0] 0x3BB,
[2] 0x3BD,
[1] 0x3CB, },
[0x0F] { [4] 0x3B9,
[5] 0x3CD,
[6] 0x3E1,
[0] 0x3BB,
[2] 0x3BD,
[1] 0x3CB, },
[0x10] { [4] 0x3B8,
[5] 0x3CC,
[6] 0x3E0,
[0] 0x3BA,
[2] 0x3BC,
[3] 0x3BE,
[1] 0x3CA, },
[0x11] { [4] 0x3B9,
[5] 0x3CD,
[6] 0x3E1,
[0] 0x3BB,
[2] 0x3BD,
[1] 0x3CB, },
};
static unsigned int p4_escr_addr_orig(unsigned int pmc, unsigned int escr_select)
{
if( pmc > 0x11 || escr_select > 7 )
return 0;
return p4_cccr_escr_map_orig[pmc][escr_select];
};
/*
* The map from CCCR number and ESCR select value to ESCR MSR address.
* This is the compacted map, derived from the manual's table.
*/
static const unsigned char p4_cccr_escr_map[4][8] = {
/* 0x00 and 0x01 as is, 0x02 and 0x03 are +1 */
[0x00/4] { [7] 0xA0,
[6] 0xA2,
[2] 0xAA,
[4] 0xAC,
[0] 0xB2,
[1] 0xB4,
[3] 0xB6,
[5] 0xC8, },
/* 0x04 and 0x05 as is, 0x06 and 0x07 are +1 */
[0x04/4] { [0] 0xC0,
[2] 0xC2,
[1] 0xC4, },
/* 0x08 and 0x09 as is, 0x0A and 0x0B are +1 */
[0x08/4] { [1] 0xA4,
[0] 0xA6,
[5] 0xA8,
[2] 0xAE,
[3] 0xB0, },
/* 0x0C, 0x0D, and 0x10 as is,
0x0E, 0x0F, and 0x11 are +1 except [3] is not in the domain */
[0x0C/4] { [4] 0xB8,
[5] 0xCC,
[6] 0xE0,
[0] 0xBA,
[2] 0xBC,
[3] 0xBE,
[1] 0xCA, },
};
static unsigned int p4_escr_addr(unsigned int pmc, unsigned int escr_select)
{
unsigned int pair, escr_offset;
if( pmc > 0x11 )
return 0; /* pmc range error */
if( pmc > 0x0F )
pmc -= 3; /* 0 <= pmc <= 0x0F */
pair = pmc / 2; /* 0 <= pair <= 7 */
escr_offset = p4_cccr_escr_map[pair / 2][escr_select];
if( !escr_offset || (pair == 7 && escr_select == 3) )
return 0; /* ESCR SELECT range error */
return escr_offset + (pair & 1) + 0x300;
};
static void check_p4_escr_addr(void)
{
unsigned int pmc, escr_select, escr_addr_orig, escr_addr;
for(pmc = 0; pmc < 0x12; ++pmc) {
for(escr_select = 0; escr_select < 8; ++escr_select) {
escr_addr_orig = p4_escr_addr_orig(pmc, escr_select);
escr_addr = p4_escr_addr(pmc, escr_select);
if( escr_addr_orig != escr_addr )
printf("p4_escr_addr(%u, %u) is 0x%03x, should be 0x%03x\n",
pmc, escr_select, escr_addr, escr_addr_orig);
}
}
}
/*
* The events.
*/
struct event {
const char name[32];
unsigned int select; /* ESCR[31:25] */
enum escr_num escr0;
unsigned int escr1; /* escr_num or -1 */
};
static const struct event events[] = {
/* Non-Retirement Events: */
{ "TC_deliver_mode", 0x01, TC_ESCR0, TC_ESCR1 },
{ "BPU_fetch_request", 0x03, BPU_ESCR0, BPU_ESCR1 },
{ "ITLB_reference", 0x18, ITLB_ESCR0, ITLB_ESCR1 },
{ "memory_cancel", 0x02, DAC_ESCR0, DAC_ESCR1 },
{ "memory_complete", 0x08, SAAT_ESCR0, SAAT_ESCR1 },
{ "load_port_replay", 0x04, SAAT_ESCR0, SAAT_ESCR1 },
{ "store_port_replay", 0x05, SAAT_ESCR0, SAAT_ESCR1 },
{ "MOB_load_replay", 0x03, MOB_ESCR0, MOB_ESCR1 },
{ "page_walk_type", 0x01, PMH_ESCR0, PMH_ESCR1 },
{ "BSQ_cache_reference", 0x0C, BSU_ESCR0, BSU_ESCR1 },
{ "IOQ_allocation", 0x03, FSB_ESCR0, FSB_ESCR1 }, /* ESCR1 unavailable if CPUID < 0xF27 */
{ "IOQ_active_entries", 0x1A, FSB_ESCR1, -1 },
{ "FSB_data_activity", 0x17, FSB_ESCR0, FSB_ESCR1 },
{ "BSQ_allocation", 0x05, BSU_ESCR0, -1 },
{ "bsq_active_entries", 0x06, BSU_ESCR1, -1 },
{ "SSE_input_assist", 0x34, FIRM_ESCR0, FIRM_ESCR1 },
{ "packed_SP_uop", 0x08, FIRM_ESCR0, FIRM_ESCR1 },
{ "packed_DP_uop", 0x0C, FIRM_ESCR0, FIRM_ESCR1 },
{ "scalar_SP_uop", 0x0A, FIRM_ESCR0, FIRM_ESCR1 },
{ "scalar_DP_uop", 0x0E, FIRM_ESCR0, FIRM_ESCR1 },
{ "64bit_MMX_uop", 0x02, FIRM_ESCR0, FIRM_ESCR1 },
{ "128bit_MMX_uop", 0x1A, FIRM_ESCR0, FIRM_ESCR1 },
{ "x87_FP_uop", 0x04, FIRM_ESCR0, FIRM_ESCR1 },
{ "x87_SIMD_moves_uop", 0x2E, FIRM_ESCR0, FIRM_ESCR1 },
{ "TC_misc", 0x06, TC_ESCR0, TC_ESCR1 },
{ "global_power_events", 0x13, FSB_ESCR0, FSB_ESCR1 },
{ "tc_ms_xfer", 0x05, MS_ESCR0, MS_ESCR1 },
{ "uop_queue_writes", 0x09, MS_ESCR0, MS_ESCR1 },
{ "retired_mispred_branch_type", 0x05, TBPU_ESCR0, TBPU_ESCR1 },
{ "retired_branch_type", 0x04, TBPU_ESCR0, TBPU_ESCR1 },
{ "resource_stall", 0x01, ALF_ESCR0, ALF_ESCR1 },
{ "WC_Buffer", 0x05, DAC_ESCR0, DAC_ESCR1 },
{ "b2b_cycles", 0x16, FSB_ESCR0, FSB_ESCR1 },
{ "bnr", 0x08, FSB_ESCR0, FSB_ESCR1 },
{ "snoop", 0x06, FSB_ESCR0, FSB_ESCR1 },
{ "response", 0x04, FSB_ESCR0, FSB_ESCR1 },
/* At-Retirement Events: */
{ "front_end_event", 0x08, CRU_ESCR2, CRU_ESCR3 }, /* filters uop_type */
{ "execution_event", 0x0C, CRU_ESCR2, CRU_ESCR3 }, /* filters packed_SP_uop, packed_DP_uop, scalar_SP_uop, scalar_DP_uop, 128bit_MMX_uop, 64bit_MMX_uop, x87_FP_uop, x86_SIMD_moves_uop */
{ "replay_event", 0x09, CRU_ESCR2, CRU_ESCR3 }, /* filters MOB_load_replay, load_port_replay(SAAT_ESCR1), store_port_replay(SAAT_ESCR0), MSR_IA32_PEBS_ENABLE, MSR_PEBS_MATRIX_VERT */
{ "instr_retired", 0x02, CRU_ESCR0, CRU_ESCR1 }, /* seems to be sensitive to tagged uops */
{ "uops_retired", 0x01, CRU_ESCR0, CRU_ESCR1 },
{ "uop_type", 0x02, RAT_ESCR0, RAT_ESCR1 }, /* can tag uops for front_end_event */
{ "branch_retired", 0x06, CRU_ESCR2, CRU_ESCR3 },
{ "mispred_branch_retired", 0x03, CRU_ESCR0, CRU_ESCR1 },
{ "x87_assist", 0x03, CRU_ESCR2, CRU_ESCR3 },
{ "machine_clear", 0x02, CRU_ESCR2, CRU_ESCR3 },
/* Model 3 only */
{ "instr_completed", 0x07, CRU_ESCR0, CRU_ESCR1 },
};
static void do_escr(unsigned int escr_num)
{
unsigned int pmc;
unsigned int escr_select;
unsigned int msr;
const char *name;
msr = escr_msr(escr_num);
name = escr_name(escr_num);
for(pmc = 0; pmc < ARRAY_SIZE(counters); ++pmc) {
for(escr_select = 0; escr_select < 8; ++escr_select) {
if( p4_escr_addr(pmc, escr_select) == msr )
printf("counter %s escr %s\n", counter_name(pmc), name);
}
}
}
static void do_events(void)
{
unsigned int i;
for(i = 0; i < ARRAY_SIZE(events); ++i) {
const struct event *event = &events[i];
printf("escr %s event %s\n",
escr_name(event->escr0),
event->name);
do_escr(event->escr0);
if( event->escr1 != -1 ) {
printf("escr %s event %s\n",
escr_name(event->escr1),
event->name);
do_escr(event->escr1);
}
}
}
int main(void)
{
check_p4_escr_addr();
do_events();
return 0;
}