/* $Id: x86_tests.c,v 1.23.2.14 2007/10/07 17:18:32 mikpe Exp $ * Performance-monitoring counters driver. * Optional x86/x86_64-specific init-time tests. * * Copyright (C) 1999-2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #define __NO_VERSION__ #include #include #include #include #include #include #undef MSR_P6_PERFCTR0 #undef MSR_P6_EVNTSEL0 #undef MSR_K7_PERFCTR0 #undef MSR_K7_EVNTSEL0 #undef MSR_CORE_PERF_FIXED_CTR_CTRL #undef MSR_P4_IQ_CCCR0 #undef MSR_P4_CRU_ESCR0 #include #include #include "x86_compat.h" #include "x86_tests.h" #define MSR_P5_CESR 0x11 #define MSR_P5_CTR0 0x12 #define P5_CESR_VAL (0x16 | (3<<6)) #define MSR_P6_PERFCTR0 0xC1 #define MSR_P6_EVNTSEL0 0x186 #define P6_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) #define MSR_K7_EVNTSEL0 0xC0010000 #define MSR_K7_PERFCTR0 0xC0010004 #define K7_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) #define VC3_EVNTSEL1_VAL 0xC0 #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38D #define CORE2_PMC_FIXED_CTR0 ((1<<30) | 0) #define MSR_P4_IQ_COUNTER0 0x30C #define MSR_P4_IQ_CCCR0 0x36C #define MSR_P4_CRU_ESCR0 0x3B8 #define P4_CRU_ESCR0_VAL ((2<<25) | (1<<9) | (0x3<<2)) #define P4_IQ_CCCR0_VAL ((0x3<<16) | (4<<13) | (1<<12)) #define NITER 64 #define X2(S) S";"S #define X8(S) X2(X2(X2(S))) #ifdef __x86_64__ #define CR4MOV "movq" #else #define CR4MOV "movl" #endif #ifndef CONFIG_X86_LOCAL_APIC #undef apic_write #define apic_write(reg,vector) do{}while(0) #endif #define rdtsc_low(low) \ __asm__ __volatile__("rdtsc" : "=a"(low) : : "edx") static void __init do_rdpmc(unsigned pmc, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8("rdpmc") : : "c"(pmc) : "eax", "edx"); } static void __init do_rdmsr(unsigned msr, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); } static void __init do_wrmsr(unsigned msr, unsigned data) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); } static void __init do_rdcr4(unsigned unused1, unsigned unused2) { unsigned i; unsigned long dummy; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8(CR4MOV" %%cr4,%0") : "=r"(dummy)); } static void __init do_wrcr4(unsigned cr4, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8(CR4MOV" %0,%%cr4") : : "r"((long)cr4)); } static void __init do_rdtsc(unsigned unused1, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); } static void __init do_wrlvtpc(unsigned val, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) { apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); apic_write(APIC_LVTPC, val); } } static void __init do_sync_core(unsigned unused1, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) { sync_core(); sync_core(); sync_core(); sync_core(); sync_core(); sync_core(); sync_core(); sync_core(); } } static void __init do_empty_loop(unsigned unused1, unsigned unused2) { unsigned i; for(i = 0; i < NITER/8; ++i) __asm__ __volatile__("" : : "c"(0)); } static unsigned __init run(void (*doit)(unsigned, unsigned), unsigned arg1, unsigned arg2) { unsigned start, stop; sync_core(); rdtsc_low(start); (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ sync_core(); rdtsc_low(stop); return stop - start; } static void __init init_tests_message(void) { printk(KERN_INFO "Please email the following PERFCTR INIT lines " "to mikpe@it.uu.se\n" KERN_INFO "To remove this message, rebuild the driver " "with CONFIG_PERFCTR_INIT_TESTS=n\n"); printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %u kHz\n", current_cpu_data.x86_vendor, current_cpu_data.x86, current_cpu_data.x86_model, current_cpu_data.x86_mask, perfctr_cpu_khz()); } static void __init measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0, unsigned msr_cccr, unsigned cccr_val, unsigned is_core2) { int i; unsigned int loop, ticks[15]; const char *name[15]; if (msr_evntsel0) wrmsr(msr_evntsel0, 0, 0); if (msr_cccr) wrmsr(msr_cccr, 0, 0); name[0] = "rdtsc"; ticks[0] = run(do_rdtsc, 0, 0); name[1] = "rdpmc"; ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) ? run(do_rdpmc,1,0) : 0; name[2] = "rdmsr (counter)"; ticks[2] = msr_perfctr0 ? run(do_rdmsr, msr_perfctr0, 0) : 0; name[3] = msr_cccr ? "rdmsr (escr)" : "rdmsr (evntsel)"; ticks[3] = msr_evntsel0 ? run(do_rdmsr, msr_evntsel0, 0) : 0; name[4] = "wrmsr (counter)"; ticks[4] = msr_perfctr0 ? run(do_wrmsr, msr_perfctr0, 0) : 0; name[5] = msr_cccr ? "wrmsr (escr)" : "wrmsr (evntsel)"; ticks[5] = msr_evntsel0 ? run(do_wrmsr, msr_evntsel0, evntsel0) : 0; name[6] = "read cr4"; ticks[6] = run(do_rdcr4, 0, 0); name[7] = "write cr4"; ticks[7] = run(do_wrcr4, read_cr4(), 0); name[8] = "rdpmc (fast)"; ticks[8] = msr_cccr ? run(do_rdpmc, 0x80000001, 0) : 0; name[9] = "rdmsr (cccr)"; ticks[9] = msr_cccr ? run(do_rdmsr, msr_cccr, 0) : 0; name[10] = "wrmsr (cccr)"; ticks[10] = msr_cccr ? run(do_wrmsr, msr_cccr, cccr_val) : 0; name[11] = "write LVTPC"; ticks[11] = (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) ? run(do_wrlvtpc, APIC_DM_NMI|APIC_LVT_MASKED, 0) : 0; name[12] = "sync_core"; ticks[12] = run(do_sync_core, 0, 0); name[13] = "read fixed_ctr0"; ticks[13] = is_core2 ? run(do_rdpmc, CORE2_PMC_FIXED_CTR0, 0) : 0; name[14] = "wrmsr fixed_ctr_ctrl"; ticks[14] = is_core2 ? run(do_wrmsr, MSR_CORE_PERF_FIXED_CTR_CTRL, 0) : 0; loop = run(do_empty_loop, 0, 0); if (msr_evntsel0) wrmsr(msr_evntsel0, 0, 0); if (msr_cccr) wrmsr(msr_cccr, 0, 0); init_tests_message(); printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); for(i = 0; i < ARRAY_SIZE(ticks); ++i) { unsigned int x; if (!ticks[i]) continue; x = ((ticks[i] - loop) * 10) / NITER; printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", name[i], x/10, x%10, ticks[i]); } } #ifndef __x86_64__ static inline void perfctr_p5_init_tests(void) { measure_overheads(MSR_P5_CESR, P5_CESR_VAL, MSR_P5_CTR0, 0, 0, 0); } #if !defined(CONFIG_X86_TSC) static inline void perfctr_c6_init_tests(void) { unsigned int cesr, dummy; rdmsr(MSR_P5_CESR, cesr, dummy); init_tests_message(); printk(KERN_INFO "PERFCTR INIT: boot CESR == %#08x\n", cesr); } #endif static inline void perfctr_vc3_init_tests(void) { measure_overheads(MSR_P6_EVNTSEL0+1, VC3_EVNTSEL1_VAL, MSR_P6_PERFCTR0+1, 0, 0, 0); } #endif /* !__x86_64__ */ static inline void perfctr_p6_init_tests(void) { measure_overheads(MSR_P6_EVNTSEL0, P6_EVNTSEL0_VAL, MSR_P6_PERFCTR0, 0, 0, 0); } static inline void perfctr_core2_init_tests(void) { measure_overheads(MSR_P6_EVNTSEL0, P6_EVNTSEL0_VAL, MSR_P6_PERFCTR0, 0, 0, 1); } static inline void perfctr_p4_init_tests(void) { measure_overheads(MSR_P4_CRU_ESCR0, P4_CRU_ESCR0_VAL, MSR_P4_IQ_COUNTER0, MSR_P4_IQ_CCCR0, P4_IQ_CCCR0_VAL, 0); } static inline void perfctr_k7_init_tests(void) { measure_overheads(MSR_K7_EVNTSEL0, K7_EVNTSEL0_VAL, MSR_K7_PERFCTR0, 0, 0, 0); } static inline void perfctr_generic_init_tests(void) { measure_overheads(0, 0, 0, 0, 0, 0); } enum perfctr_x86_tests_type perfctr_x86_tests_type __initdata = PTT_UNKNOWN; void __init perfctr_x86_init_tests(void) { switch (perfctr_x86_tests_type) { #ifndef __x86_64__ case PTT_P5: /* Intel P5, P5MMX; Cyrix 6x86MX, MII, III */ perfctr_p5_init_tests(); break; #if !defined(CONFIG_X86_TSC) case PTT_WINCHIP: /* WinChip C6, 2, 3 */ perfctr_c6_init_tests(); break; #endif case PTT_VC3: /* VIA C3 */ perfctr_vc3_init_tests(); break; #endif /* !__x86_64__ */ case PTT_P6: /* Intel PPro, PII, PIII, PENTM, CORE */ perfctr_p6_init_tests(); break; case PTT_CORE2: /* Intel Core 2 */ perfctr_core2_init_tests(); break; case PTT_P4: /* Intel P4 */ perfctr_p4_init_tests(); break; case PTT_AMD: /* AMD K7, K8 */ perfctr_k7_init_tests(); break; case PTT_GENERIC: perfctr_generic_init_tests(); break; default: printk(KERN_INFO "%s: unknown CPU type %u\n", __FUNCTION__, perfctr_x86_tests_type); break; } }