/* * PPC64 performance-monitoring counters driver. * * based on Mikael Pettersson's 32 bit ppc code * Copyright (C) 2004 David Gibson, IBM Corporation. * Copyright (C) 2004, 2007 Mikael Pettersson */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #include #endif #include #include #include #include #include #include /* tb_ticks_per_jiffy */ #include #include #include "ppc64_tests.h" extern void ppc64_enable_pmcs(void); /* Support for lazy perfctr SPR updates. */ struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ unsigned int id; /* cache owner id */ /* Physically indexed cache of the MMCRs. */ unsigned long ppc64_mmcr0, ppc64_mmcr1, ppc64_mmcra; }; static DEFINE_PER_CPU(struct per_cpu_cache, per_cpu_cache); #define __get_cpu_cache(cpu) (&per_cpu(per_cpu_cache, cpu)) #define get_cpu_cache() (&__get_cpu_var(per_cpu_cache)) /* Structure for counter snapshots, as 32-bit values. */ struct perfctr_low_ctrs { u64 tsc; u32 pmc[8]; }; static unsigned int new_id(void) { static DEFINE_SPINLOCK(lock); static unsigned int counter; int id; spin_lock(&lock); id = ++counter; spin_unlock(&lock); return id; } static inline u32 read_pmc(int pmc) { switch (pmc) { case 0: return mfspr(SPRN_PMC1); break; case 1: return mfspr(SPRN_PMC2); break; case 2: return mfspr(SPRN_PMC3); break; case 3: return mfspr(SPRN_PMC4); break; case 4: return mfspr(SPRN_PMC5); break; case 5: return mfspr(SPRN_PMC6); break; case 6: return mfspr(SPRN_PMC7); break; case 7: return mfspr(SPRN_PMC8); break; default: return -EINVAL; } } static inline void write_pmc(int pmc, u32 val) { switch (pmc) { case 0: mtspr(SPRN_PMC1, val); break; case 1: mtspr(SPRN_PMC2, val); break; case 2: mtspr(SPRN_PMC3, val); break; case 3: mtspr(SPRN_PMC4, val); break; case 4: mtspr(SPRN_PMC5, val); break; case 5: mtspr(SPRN_PMC6, val); break; case 6: mtspr(SPRN_PMC7, val); break; case 7: mtspr(SPRN_PMC8, val); break; } } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT static void perfctr_default_ihandler(unsigned long pc) { unsigned int mmcr0 = mfspr(SPRN_MMCR0); mmcr0 &= ~MMCR0_PMXE; mtspr(SPRN_MMCR0, mmcr0); } static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; void do_perfctr_interrupt(struct pt_regs *regs) { unsigned long mmcr0; /* interrupts are disabled here, so we don't need to * preempt_disable() */ (*perfctr_ihandler)(instruction_pointer(regs)); /* clear PMAO so the interrupt doesn't reassert immediately */ mmcr0 = mfspr(SPRN_MMCR0) & ~MMCR0_PMAO; mtspr(SPRN_MMCR0, mmcr0); } void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) { perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; } #else #define perfctr_cstatus_has_ictrs(cstatus) 0 #endif #if defined(CONFIG_SMP) && defined(CONFIG_PERFCTR_INTERRUPT_SUPPORT) static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { state->isuspend_cpu = cpu; } static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return state->isuspend_cpu == cpu; } static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { state->isuspend_cpu = NR_CPUS; } #else static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } #endif static void ppc64_clear_counters(void) { mtspr(SPRN_MMCR0, 0); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCRA, 0); if (cur_cpu_spec->num_pmcs >= 1) mtspr(SPRN_PMC1, 0); if (cur_cpu_spec->num_pmcs >= 2) mtspr(SPRN_PMC2, 0); if (cur_cpu_spec->num_pmcs >= 3) mtspr(SPRN_PMC3, 0); if (cur_cpu_spec->num_pmcs >= 4) mtspr(SPRN_PMC4, 0); if (cur_cpu_spec->num_pmcs >= 5) mtspr(SPRN_PMC5, 0); if (cur_cpu_spec->num_pmcs >= 6) mtspr(SPRN_PMC6, 0); if (cur_cpu_spec->num_pmcs >= 7) mtspr(SPRN_PMC7, 0); if (cur_cpu_spec->num_pmcs >= 8) mtspr(SPRN_PMC8, 0); } /* * Driver methods, internal and exported. */ static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned long long value; cache = get_cpu_cache(); /* * Order matters here: update threshmult and event * selectors before updating global control, which * potentially enables PMIs. * * Since mtspr doesn't accept a runtime value for the * SPR number, unroll the loop so each mtspr targets * a constant SPR. * * For processors without MMCR2, we ensure that the * cache and the state indicate the same value for it, * preventing any actual mtspr to it. Ditto for MMCR1. */ value = state->control.mmcra; if (value != cache->ppc64_mmcra) { cache->ppc64_mmcra = value; mtspr(SPRN_MMCRA, value); } value = state->control.mmcr1; if (value != cache->ppc64_mmcr1) { cache->ppc64_mmcr1 = value; mtspr(SPRN_MMCR1, value); } value = state->control.mmcr0; if (perfctr_cstatus_has_ictrs(state->user.cstatus)) value |= MMCR0_PMXE; if (value != cache->ppc64_mmcr0) { cache->ppc64_mmcr0 = value; mtspr(SPRN_MMCR0, value); } cache->id = state->id; } static void perfctr_cpu_read_counters(struct perfctr_cpu_state *state, struct perfctr_low_ctrs *ctrs) { unsigned int cstatus, i, pmc; cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) ctrs->tsc = mftb(); for (i = 0; i < perfctr_cstatus_nractrs(cstatus); ++i) { pmc = state->control.pmc_map[i]; ctrs->pmc[i] = read_pmc(pmc); } } #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { unsigned int cstatus, nrctrs, i; int cpu; cpu = smp_processor_id(); set_isuspend_cpu(state, cpu); /* early to limit cpu's live range */ cstatus = state->user.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for (i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { int pmc = state->control.pmc_map[i]; u32 now = read_pmc(pmc); state->user.pmc[i].sum += (u32)(now-state->user.pmc[i].start); state->user.pmc[i].start = now; } } static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { struct per_cpu_cache *cache; unsigned int cstatus, nrctrs, i; int cpu; cpu = smp_processor_id(); cache = __get_cpu_cache(cpu); if (cache->id == state->id) { /* Clearing cache->id to force write_control() to unfreeze MMCR0 would be done here, but it is subsumed by resume()'s MMCR0 reload logic. */ if (is_isuspend_cpu(state, cpu)) { return; /* skip reload of PMCs */ } } /* * The CPU state wasn't ours. * * The counters must be frozen before being reinitialised, * to prevent unexpected increments and missed overflows. * * All unused counters must be reset to a non-overflow state. */ if (!(cache->ppc64_mmcr0 & MMCR0_FC)) { cache->ppc64_mmcr0 |= MMCR0_FC; mtspr(SPRN_MMCR0, cache->ppc64_mmcr0); } cstatus = state->user.cstatus; nrctrs = perfctr_cstatus_nrctrs(cstatus); for (i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { write_pmc(state->control.pmc_map[i], state->user.pmc[i].start); } } /* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to bypass internal caching and force a reload if the I-mode PMCs. */ void perfctr_cpu_ireload(struct perfctr_cpu_state *state) { #ifdef CONFIG_SMP clear_isuspend_cpu(state); #else get_cpu_cache()->id = 0; #endif } /* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) { unsigned int cstatus, nractrs, nrctrs, i; unsigned int pmc_mask = 0; int nr_pmcs = cur_cpu_spec->num_pmcs; cstatus = state->user.cstatus; nractrs = perfctr_cstatus_nractrs(cstatus); nrctrs = perfctr_cstatus_nrctrs(cstatus); /* Ickity, ickity, ick. We don't have fine enough interrupt * control to disable interrupts on all the counters we're not * interested in. So, we have to deal with overflows on actrs * amd unused PMCs as well as the ones we actually care * about. */ for (i = 0; i < nractrs; ++i) { int pmc = state->control.pmc_map[i]; u32 val = read_pmc(pmc); /* For actrs, force a sample if they overflowed */ if ((s32)val < 0) { state->user.pmc[i].sum += (u32)(val - state->user.pmc[i].start); state->user.pmc[i].start = 0; write_pmc(pmc, 0); } } for (; i < nrctrs; ++i) { if ((s32)state->user.pmc[i].start < 0) { /* PPC64-specific */ int pmc = state->control.pmc_map[i]; /* XXX: "+=" to correct for overshots */ state->user.pmc[i].start = state->control.ireset[pmc]; pmc_mask |= (1 << i); } } /* Clear any unused overflowed counters, so we don't loop on * the interrupt */ for (i = 0; i < nr_pmcs; ++i) { if (! (state->unused_pmcs & (1<control.header.nractrs; nrctrs = i + state->control.header.nrictrs; for(; i < nrctrs; ++i) { unsigned int pmc = state->control.pmc_map[i]; if ((int)state->control.ireset[pmc] < 0) /* PPC64-specific */ return -EINVAL; state->user.pmc[i].start = state->control.ireset[pmc]; } return 0; } #else /* CONFIG_PERFCTR_INTERRUPT_SUPPORT */ static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } static inline int check_ireset(struct perfctr_cpu_state *state) { return 0; } #endif /* CONFIG_PERFCTR_INTERRUPT_SUPPORT */ static int check_control(struct perfctr_cpu_state *state) { unsigned int i, nractrs, nrctrs, pmc_mask, pmc; unsigned int nr_pmcs = cur_cpu_spec->num_pmcs; nractrs = state->control.header.nractrs; nrctrs = nractrs + state->control.header.nrictrs; if (nrctrs < nractrs || nrctrs > nr_pmcs) return -EINVAL; pmc_mask = 0; for (i = 0; i < nrctrs; ++i) { pmc = state->control.pmc_map[i]; if (pmc >= nr_pmcs || (pmc_mask & (1<control.mmcr0 & MMCR0_PMXE) || (state->control.mmcr0 & MMCR0_PMAO) || (state->control.mmcr0 & MMCR0_TBEE) ) return -EINVAL; state->unused_pmcs = ((1 << nr_pmcs)-1) & ~pmc_mask; state->id = new_id(); return 0; } int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) { int err; clear_isuspend_cpu(state); state->user.cstatus = 0; /* disallow i-mode counters if we cannot catch the interrupts */ if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) && state->control.header.nrictrs) return -EPERM; err = check_control(state); /* may initialise state->cstatus */ if (err < 0) return err; err = check_ireset(state); if (err < 0) return err; state->user.cstatus |= perfctr_mk_cstatus(state->control.header.tsc_on, state->control.header.nractrs, state->control.header.nrictrs); return 0; } /* * get_reg_offset() maps SPR numbers to offsets into struct perfctr_cpu_control. */ static const struct { unsigned int spr; unsigned int offset; unsigned int size; } reg_offsets[] = { { SPRN_MMCR0, offsetof(struct perfctr_cpu_control, mmcr0), sizeof(long) }, { SPRN_MMCR1, offsetof(struct perfctr_cpu_control, mmcr1), sizeof(long) }, { SPRN_MMCRA, offsetof(struct perfctr_cpu_control, mmcra), sizeof(long) }, { SPRN_PMC1, offsetof(struct perfctr_cpu_control, ireset[1-1]), sizeof(int) }, { SPRN_PMC2, offsetof(struct perfctr_cpu_control, ireset[2-1]), sizeof(int) }, { SPRN_PMC3, offsetof(struct perfctr_cpu_control, ireset[3-1]), sizeof(int) }, { SPRN_PMC4, offsetof(struct perfctr_cpu_control, ireset[4-1]), sizeof(int) }, { SPRN_PMC5, offsetof(struct perfctr_cpu_control, ireset[5-1]), sizeof(int) }, { SPRN_PMC6, offsetof(struct perfctr_cpu_control, ireset[6-1]), sizeof(int) }, { SPRN_PMC7, offsetof(struct perfctr_cpu_control, ireset[7-1]), sizeof(int) }, { SPRN_PMC8, offsetof(struct perfctr_cpu_control, ireset[8-1]), sizeof(int) }, }; static int get_reg_offset(unsigned int spr, unsigned int *size) { unsigned int i; for(i = 0; i < ARRAY_SIZE(reg_offsets); ++i) if (spr == reg_offsets[i].spr) { *size = reg_offsets[i].size; return reg_offsets[i].offset; } return -1; } static int access_regs(struct perfctr_cpu_control *control, void *argp, unsigned int argbytes, int do_write) { struct perfctr_cpu_reg *regs; unsigned int i, nr_regs, size; int offset; nr_regs = argbytes / sizeof(struct perfctr_cpu_reg); if (nr_regs * sizeof(struct perfctr_cpu_reg) != argbytes) return -EINVAL; regs = (struct perfctr_cpu_reg*)argp; for(i = 0; i < nr_regs; ++i) { offset = get_reg_offset(regs[i].nr, &size); if (offset < 0) return -EINVAL; if (size == sizeof(long)) { unsigned long *where = (unsigned long*)((char*)control + offset); if (do_write) *where = regs[i].value; else regs[i].value = *where; } else { unsigned int *where = (unsigned int*)((char*)control + offset); if (do_write) *where = regs[i].value; else regs[i].value = *where; } } return argbytes; } int perfctr_cpu_control_write(struct perfctr_cpu_control *control, unsigned int domain, const void *srcp, unsigned int srcbytes) { if (domain != PERFCTR_DOMAIN_CPU_REGS) return -EINVAL; return access_regs(control, (void*)srcp, srcbytes, 1); } int perfctr_cpu_control_read(const struct perfctr_cpu_control *control, unsigned int domain, void *dstp, unsigned int dstbytes) { if (domain != PERFCTR_DOMAIN_CPU_REGS) return -EINVAL; return access_regs((struct perfctr_cpu_control*)control, dstp, dstbytes, 0); } void perfctr_cpu_suspend(struct perfctr_cpu_state *state) { unsigned int i, cstatus; struct perfctr_low_ctrs now; write_perfseq_begin(&state->user.sequence); /* quiesce the counters */ mtspr(SPRN_MMCR0, MMCR0_FC); get_cpu_cache()->ppc64_mmcr0 = MMCR0_FC; if (perfctr_cstatus_has_ictrs(state->user.cstatus)) perfctr_cpu_isuspend(state); perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) state->user.tsc_sum += now.tsc - state->user.tsc_start; for (i = 0; i < perfctr_cstatus_nractrs(cstatus); ++i) state->user.pmc[i].sum += (u32)(now.pmc[i]-state->user.pmc[i].start); write_perfseq_end(&state->user.sequence); } void perfctr_cpu_resume(struct perfctr_cpu_state *state) { struct perfctr_low_ctrs now; unsigned int i, cstatus; write_perfseq_begin(&state->user.sequence); if (perfctr_cstatus_has_ictrs(state->user.cstatus)) perfctr_cpu_iresume(state); perfctr_cpu_write_control(state); perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) state->user.tsc_start = now.tsc; for (i = 0; i < perfctr_cstatus_nractrs(cstatus); ++i) state->user.pmc[i].start = now.pmc[i]; write_perfseq_end(&state->user.sequence); } void perfctr_cpu_sample(struct perfctr_cpu_state *state) { unsigned int i, cstatus, nractrs; struct perfctr_low_ctrs now; write_perfseq_begin(&state->user.sequence); perfctr_cpu_read_counters(state, &now); cstatus = state->user.cstatus; if (perfctr_cstatus_has_tsc(cstatus)) { state->user.tsc_sum += now.tsc - state->user.tsc_start; state->user.tsc_start = now.tsc; } nractrs = perfctr_cstatus_nractrs(cstatus); for(i = 0; i < nractrs; ++i) { state->user.pmc[i].sum += (u32)(now.pmc[i]-state->user.pmc[i].start); state->user.pmc[i].start = now.pmc[i]; } write_perfseq_end(&state->user.sequence); } static void perfctr_cpu_clear_counters(void) { struct per_cpu_cache *cache; cache = get_cpu_cache(); memset(cache, 0, sizeof *cache); cache->id = 0; ppc64_clear_counters(); } /**************************************************************** * * * Processor detection and initialisation procedures. * * * ****************************************************************/ static void ppc64_cpu_setup(void) { /* allow user to initialize these???? */ unsigned long long mmcr0 = mfspr(SPRN_MMCR0); unsigned long long mmcra = mfspr(SPRN_MMCRA); ppc64_enable_pmcs(); mmcr0 |= MMCR0_FC; mtspr(SPRN_MMCR0, mmcr0); mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE; mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE; mtspr(SPRN_MMCR0, mmcr0); mmcra |= MMCRA_SAMPLE_ENABLE; mtspr(SPRN_MMCRA, mmcra); printk("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(), mfspr(SPRN_MMCR0)); printk("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(), mfspr(SPRN_MMCR1)); printk("setup on cpu %d, mmcra %lx\n", smp_processor_id(), mfspr(SPRN_MMCRA)); /* mtmsrd(mfmsr() | MSR_PMM); */ ppc64_clear_counters(); mmcr0 = mfspr(SPRN_MMCR0); mmcr0 &= ~MMCR0_PMAO; mmcr0 &= ~MMCR0_FC; mtspr(SPRN_MMCR0, mmcr0); printk("start on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0); } static void perfctr_cpu_clear_one(void *ignore) { /* PREEMPT note: when called via on_each_cpu(), this is in IRQ context with preemption disabled. */ perfctr_cpu_clear_counters(); } static void perfctr_cpu_reset(void) { on_each_cpu(perfctr_cpu_clear_one, NULL, 1, 1); perfctr_cpu_set_ihandler(NULL); } int __init perfctr_cpu_init(void) { extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; perfctr_info.cpu_features = PERFCTR_FEATURE_RDTSC | PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_PCINT; perfctr_cpu_name = "PowerPC64"; perfctr_info.cpu_khz = ppc_proc_freq / 1000; /* We need to round here rather than truncating, because in a * few cases the raw ratio can end up being 7.9999 or * suchlike */ perfctr_info.tsc_to_cpu_mult = (ppc_proc_freq + ppc_tb_freq - 1) / ppc_tb_freq; on_each_cpu((void *)ppc64_cpu_setup, NULL, 0, 1); perfctr_ppc64_init_tests(); perfctr_cpu_reset(); return 0; } void __exit perfctr_cpu_exit(void) { perfctr_cpu_reset(); } /**************************************************************** * * * Hardware reservation. * * * ****************************************************************/ static spinlock_t service_mutex = SPIN_LOCK_UNLOCKED; static const char *current_service = NULL; const char *perfctr_cpu_reserve(const char *service) { const char *ret; spin_lock(&service_mutex); ret = current_service; if (ret) goto out; ret = "unknown driver (oprofile?)"; if (reserve_pmc_hardware(do_perfctr_interrupt) != 0) goto out; current_service = service; ret = NULL; out: spin_unlock(&service_mutex); return ret; } void perfctr_cpu_release(const char *service) { spin_lock(&service_mutex); if (service != current_service) { printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", __FUNCTION__, service, current_service); goto out; } /* power down the counters */ perfctr_cpu_reset(); current_service = NULL; release_pmc_hardware(); out: spin_unlock(&service_mutex); }