#include #include #include #include #include #include #include #include #include #include #include #include #include "libpfms.h" //#define dprint(format, arg...) fprintf(stderr, "%s.%d: " format , __FUNCTION__ , __LINE__, ## arg) #define dprint(format, arg...) typedef enum { CMD_NONE, CMD_CTX, CMD_LOAD, CMD_UNLOAD, CMD_WPMCS, CMD_WPMDS, CMD_RPMDS, CMD_STOP, CMD_START, CMD_CLOSE } pfms_cmd_t; typedef struct _barrier { pthread_mutex_t mutex; pthread_cond_t cond; uint32_t counter; uint32_t max; uint64_t generation; /* avoid race condition on wake-up */ } barrier_t; typedef struct { uint32_t cpu; uint32_t fd; void *smpl_vaddr; size_t smpl_buf_size; } pfms_cpu_t; typedef struct _pfms_thread { uint32_t cpu; pfms_cmd_t cmd; void *data; uint32_t ndata; sem_t cmd_sem; int ret; pthread_t tid; barrier_t *barrier; } pfms_thread_t; typedef struct { barrier_t barrier; uint32_t ncpus; } pfms_session_t; static uint32_t ncpus; static pfms_thread_t *tds; static pthread_mutex_t tds_lock = PTHREAD_MUTEX_INITIALIZER; static int barrier_init(barrier_t *b, uint32_t count) { int r; r = pthread_mutex_init(&b->mutex, NULL); if (r == -1) return -1; r = pthread_cond_init(&b->cond, NULL); if (r == -1) return -1; b->max = b->counter = count; b->generation = 0; return 0; } static void cleanup_barrier(void *arg) { barrier_t *b = (barrier_t *)arg; int r; r = pthread_mutex_unlock(&b->mutex); dprint("free barrier mutex r=%d\n", r); (void) r; } static int barrier_wait(barrier_t *b) { uint64_t generation; int oldstate; pthread_cleanup_push(cleanup_barrier, b); pthread_mutex_lock(&b->mutex); pthread_testcancel(); if (--b->counter == 0) { /* reset barrier */ b->counter = b->max; /* * bump generation number, this avoids thread getting stuck in the * wake up loop below in case a thread just out of the barrier goes * back in right away before all the thread from the previous "round" * have "escaped". */ b->generation++; pthread_cond_broadcast(&b->cond); } else { generation = b->generation; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); while (b->counter != b->max && generation == b->generation) { pthread_cond_wait(&b->cond, &b->mutex); } pthread_setcancelstate(oldstate, NULL); } pthread_mutex_unlock(&b->mutex); pthread_cleanup_pop(0); return 0; } /* * placeholder for pthread_setaffinity_np(). This stuff is ugly * and I could not figure out a way to get it compiled while also preserving * the pthread_*cancel(). There are issues with LinuxThreads and NPTL. I * decided to quit on this and implement my own affinity call until this * settles. */ static int pin_cpu(uint32_t cpu) { uint64_t *mask; size_t size; pid_t pid; int ret; pid = syscall(__NR_gettid); size = ncpus * sizeof(uint64_t); mask = calloc(1, size); if (mask == NULL) { dprint("CPU%u: cannot allocate bitvector\n", cpu); return -1; } mask[cpu>>6] = 1ULL << (cpu & 63); ret = syscall(__NR_sched_setaffinity, pid, size, mask); free(mask); return ret; } static void pfms_thread_mainloop(void *arg) { long k = (long )arg; uint32_t mycpu = (uint32_t)k; pfarg_ctx_t myctx, *ctx; pfarg_load_t load_args; int fd = -1; pfms_thread_t *td; sem_t *cmd_sem; int ret = 0; memset(&load_args, 0, sizeof(load_args)); load_args.load_pid = mycpu; td = tds+mycpu; ret = pin_cpu(mycpu); dprint("CPU%u wthread created and pinned ret=%d\n", mycpu, ret); cmd_sem = &tds[mycpu].cmd_sem; for(;;) { dprint("CPU%u waiting for cmd\n", mycpu); sem_wait(cmd_sem); switch(td->cmd) { case CMD_NONE: ret = 0; break; case CMD_CTX: /* * copy context to get private fd */ ctx = td->data; myctx = *ctx; fd = pfm_create_context(&myctx, NULL, NULL, 0); ret = fd < 0 ? -1 : 0; dprint("CPU%u CMD_CTX ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_LOAD: ret = pfm_load_context(fd, &load_args); dprint("CPU%u CMD_LOAD ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_UNLOAD: ret = pfm_unload_context(fd); dprint("CPU%u CMD_UNLOAD ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_START: ret = pfm_start(fd, NULL); dprint("CPU%u CMD_START ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_STOP: ret = pfm_stop(fd); dprint("CPU%u CMD_STOP ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_WPMCS: ret = pfm_write_pmcs(fd,(pfarg_pmc_t *)td->data, td->ndata); dprint("CPU%u CMD_WPMCS ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_WPMDS: ret = pfm_write_pmds(fd,(pfarg_pmd_t *)td->data, td->ndata); dprint("CPU%u CMD_WPMDS ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_RPMDS: ret = pfm_read_pmds(fd,(pfarg_pmd_t *)td->data, td->ndata); dprint("CPU%u CMD_RPMDS ret=%d errno=%d fd=%d\n", mycpu, ret, errno, fd); break; case CMD_CLOSE: dprint("CPU%u CMD_CLOSE fd=%d\n", mycpu, fd); ret = close(fd); fd = -1; break; default: break; } td->ret = ret; dprint("CPU%u td->ret=%d\n", mycpu, ret); barrier_wait(td->barrier); } } static int create_one_wthread(int cpu) { int ret; sem_init(&tds[cpu].cmd_sem, 0, 0); ret = pthread_create(&tds[cpu].tid, NULL, (void *(*)(void *))pfms_thread_mainloop, (void *)(long)cpu); return ret; } /* * must be called with tds_lock held */ static int create_wthreads(uint64_t *cpu_list, uint32_t n) { uint64_t v; uint32_t i,k, cpu; int ret = 0; for(k=0, cpu = 0; k < n; k++, cpu+= 64) { v = cpu_list[k]; for(i=0; v && i < 63; i++, v>>=1, cpu++) { if ((v & 0x1) && tds[cpu].tid == 0) { ret = create_one_wthread(cpu); if (ret) break; } } } if (ret) dprint("cannot create wthread on CPU%u\n", cpu); return ret; } int pfms_initialize(void) { printf("cpu_t=%zu thread=%zu session_t=%zu\n", sizeof(pfms_cpu_t), sizeof(pfms_thread_t), sizeof(pfms_session_t)); ncpus = (uint32_t)sysconf(_SC_NPROCESSORS_ONLN); if (ncpus == -1) { dprint("cannot retrieve number of online processors\n"); return -1; } dprint("configured for %u CPUs\n", ncpus); /* * XXX: assuming CPU are contiguously indexed */ tds = calloc(ncpus, sizeof(*tds)); if (tds == NULL) { dprint("cannot allocate thread descriptors\n"); return -1; } return 0; } int pfms_create(uint64_t *cpu_list, size_t n, pfarg_ctx_t *ctx, pfms_ovfl_t *ovfl, void **desc) { uint64_t v; size_t k, i; uint32_t num, cpu; pfms_session_t *s; int ret; if (cpu_list == NULL || n == 0 || ctx == NULL || desc == NULL) { dprint("invalid parameters\n"); return -1; } if ((ctx->ctx_flags & PFM_FL_SYSTEM_WIDE) == 0) { dprint("only works for system wide\n"); return -1; } *desc = NULL; /* * XXX: assuming CPU are contiguously indexed */ num = 0; for(k=0, cpu = 0; k < n; k++, cpu+=64) { v = cpu_list[k]; for(i=0; v && i < 63; i++, v>>=1, cpu++) { if (v & 0x1) { if (cpu >= ncpus) { dprint("unavailable CPU%u\n", cpu); return -1; } num++; } } } if (num == 0) return 0; s = calloc(1, sizeof(*s)); if (s == NULL) { dprint("cannot allocate %u contexts\n", num); return -1; } s->ncpus = num; printf("%u-way session\n", num); /* * +1 to account for main thread waiting */ ret = barrier_init(&s->barrier, num + 1); if (ret) { dprint("cannot init barrier\n"); goto error_free; } /* * lock thread descriptor table, no other create_session, close_session * can occur */ pthread_mutex_lock(&tds_lock); if (create_wthreads(cpu_list, n)) goto error_free_unlock; /* * check all needed threads are available */ for(k=0, cpu = 0; k < n; k++, cpu += 64) { v = cpu_list[k]; for(i=0; v && i < 63; i++, v>>=1, cpu++) { if (v & 0x1) { if (tds[cpu].barrier) { dprint("CPU%u already managing a session\n", cpu); goto error_free_unlock; } } } } /* * send create context order */ for(k=0, cpu = 0; k < n; k++, cpu += 64) { v = cpu_list[k]; for(i=0; v && i < 63; i++, v>>=1, cpu++) { if (v & 0x1) { tds[cpu].cmd = CMD_CTX; tds[cpu].data = ctx; tds[cpu].barrier = &s->barrier; sem_post(&tds[cpu].cmd_sem); } } } barrier_wait(&s->barrier); ret = 0; /* * check for errors */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { ret = tds[k].ret; if (ret) break; } } /* * undo if error found */ if (k < ncpus) { for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { if (tds[k].ret == 0) { tds[k].cmd = CMD_CLOSE; sem_post(&tds[k].cmd_sem); } /* mark as free */ tds[k].barrier = NULL; } } } pthread_mutex_unlock(&tds_lock); if (ret == 0) *desc = s; return ret ? -1 : 0; error_free_unlock: pthread_mutex_unlock(&tds_lock); error_free: free(s); return -1; } int pfms_load(void *desc) { uint32_t k; pfms_session_t *s; int ret; if (desc == NULL) { dprint("invalid parameters\n"); return -1; } s = (pfms_session_t *)desc; if (s->ncpus == 0) { dprint("invalid session content 0 CPUS\n"); return -1; } /* * send create context order */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { tds[k].cmd = CMD_LOAD; sem_post(&tds[k].cmd_sem); } } barrier_wait(&s->barrier); ret = 0; /* * check for errors */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { ret = tds[k].ret; if (ret) { dprint("failure on CPU%u\n", k); break; } } } /* * if error, unload all others */ if (k < ncpus) { for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { if (tds[k].ret == 0) { tds[k].cmd = CMD_UNLOAD; sem_post(&tds[k].cmd_sem); } } } } return ret ? -1 : 0; } static int __pfms_do_simple_cmd(pfms_cmd_t cmd, void *desc, void *data, uint32_t n) { size_t k; pfms_session_t *s; int ret; if (desc == NULL) { dprint("invalid parameters\n"); return -1; } s = (pfms_session_t *)desc; if (s->ncpus == 0) { dprint("invalid session content 0 CPUS\n"); return -1; } /* * send create context order */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { tds[k].cmd = cmd; tds[k].data = data; tds[k].ndata = n; sem_post(&tds[k].cmd_sem); } } barrier_wait(&s->barrier); ret = 0; /* * check for errors */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { ret = tds[k].ret; if (ret) { dprint("failure on CPU%zu\n", k); break; } } } /* * simple commands cannot be undone */ return ret ? -1 : 0; } int pfms_unload(void *desc) { return __pfms_do_simple_cmd(CMD_UNLOAD, desc, NULL, 0); } int pfms_start(void *desc) { return __pfms_do_simple_cmd(CMD_START, desc, NULL, 0); } int pfms_stop(void *desc) { return __pfms_do_simple_cmd(CMD_STOP, desc, NULL, 0); } int pfms_write_pmcs(void *desc, pfarg_pmc_t *pmcs, uint32_t n) { return __pfms_do_simple_cmd(CMD_WPMCS, desc, pmcs, n); } int pfms_write_pmds(void *desc, pfarg_pmd_t *pmds, uint32_t n) { return __pfms_do_simple_cmd(CMD_WPMDS, desc, pmds, n); } int pfms_close(void *desc) { size_t k; pfms_session_t *s; int ret; if (desc == NULL) { dprint("invalid parameters\n"); return -1; } s = (pfms_session_t *)desc; if (s->ncpus == 0) { dprint("invalid session content 0 CPUS\n"); return -1; } for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { tds[k].cmd = CMD_CLOSE; sem_post(&tds[k].cmd_sem); } } barrier_wait(&s->barrier); ret = 0; pthread_mutex_lock(&tds_lock); /* * check for errors */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { if (tds[k].ret) { dprint("failure on CPU%zu\n", k); } ret |= tds[k].ret; tds[k].barrier = NULL; } } pthread_mutex_unlock(&tds_lock); free(s); /* * XXX: we cannot undo close */ return ret ? -1 : 0; } int pfms_read_pmds(void *desc, pfarg_pmd_t *pmds, uint32_t n) { pfms_session_t *s; uint32_t k, pmds_per_cpu; int ret; if (desc == NULL) { dprint("invalid parameters\n"); return -1; } s = (pfms_session_t *)desc; if (s->ncpus == 0) { dprint("invalid session content 0 CPUS\n"); return -1; } if (n % s->ncpus) { dprint("invalid number of pfarg_pmd_t provided, must be multiple of %u\n", s->ncpus); return -1; } pmds_per_cpu = n / s->ncpus; dprint("n=%u ncpus=%u per_cpu=%u\n", n, s->ncpus, pmds_per_cpu); for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { tds[k].cmd = CMD_RPMDS; tds[k].data = pmds; tds[k].ndata= pmds_per_cpu; sem_post(&tds[k].cmd_sem); pmds += pmds_per_cpu; } } barrier_wait(&s->barrier); ret = 0; /* * check for errors */ for(k=0; k < ncpus; k++) { if (tds[k].barrier == &s->barrier) { ret = tds[k].ret; if (ret) { dprint("failure on CPU%u\n", k); break; } } } /* * cannot undo pfm_read_pmds */ return ret ? -1 : 0; } #if 0 /* * beginning of test program */ #include #define NUM_PMCS PFMLIB_MAX_PMCS #define NUM_PMDS PFMLIB_MAX_PMDS static void fatal_error(char *fmt,...) __attribute__((noreturn)); static void fatal_error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); exit(1); } static uint32_t popcount(uint64_t c) { uint32_t count = 0; for(; c; c>>=1) { if (c & 0x1) count++; } return count; } int main(int argc, char **argv) { pfarg_ctx_t ctx; pfarg_pmc_t pc[NUM_PMCS]; pfarg_pmd_t *pd; pfmlib_input_param_t inp; pfmlib_output_param_t outp; uint64_t cpu_list; void *desc; unsigned int num_counters; uint32_t i, j, k, l, ncpus, npmds; size_t len; int ret; char *name; if (pfm_initialize() != PFMLIB_SUCCESS) fatal_error("cannot initialize libpfm\n"); if (pfms_initialize()) fatal_error("cannot initialize libpfms\n"); pfm_get_num_counters(&num_counters); pfm_get_max_event_name_len(&len); name = malloc(len+1); if (name == NULL) fatal_error("cannot allocate memory for event name\n"); memset(&ctx, 0, sizeof(ctx)); memset(pc, 0, sizeof(pc)); memset(&inp,0, sizeof(inp)); memset(&outp,0, sizeof(outp)); cpu_list = argc > 1 ? strtoul(argv[1], NULL, 0) : 0x3; ncpus = popcount(cpu_list); if (pfm_get_cycle_event(&inp.pfp_events[0].event) != PFMLIB_SUCCESS) fatal_error("cannot find cycle event\n"); if (pfm_get_inst_retired_event(&inp.pfp_events[1].event) != PFMLIB_SUCCESS) fatal_error("cannot find inst retired event\n"); i = 2; inp.pfp_dfl_plm = PFM_PLM3|PFM_PLM0; if (i > num_counters) { i = num_counters; printf("too many events provided (max=%d events), using first %d event(s)\n", num_counters, i); } /* * how many counters we use */ inp.pfp_event_count = i; /* * indicate we are using the monitors for a system-wide session. * This may impact the way the library sets up the PMC values. */ inp.pfp_flags = PFMLIB_PFP_SYSTEMWIDE; /* * let the library figure out the values for the PMCS */ if ((ret=pfm_dispatch_events(&inp, NULL, &outp, NULL)) != PFMLIB_SUCCESS) fatal_error("cannot configure events: %s\n", pfm_strerror(ret)); npmds = ncpus * inp.pfp_event_count; dprint("ncpus=%u npmds=%u\n", ncpus, npmds); pd = calloc(npmds, sizeof(pfarg_pmd_t)); if (pd == NULL) fatal_error("cannot allocate pd array\n"); for (i=0; i < outp.pfp_pmc_count; i++) { pc[i].reg_num = outp.pfp_pmcs[i].reg_num; pc[i].reg_value = outp.pfp_pmcs[i].reg_value; } for(l=0, k = 0; l < ncpus; l++) { for (i=0, j=0; i < inp.pfp_event_count; i++, k++) { pd[k].reg_num = outp.pfp_pmcs[j].reg_pmd_num; for(; j < outp.pfp_pmc_count; j++) if (outp.pfp_pmcs[j].reg_evt_idx != i) break; } } /* * create a context on all CPUs we asked for * * libpfms only works for system-wide, so we set the flag in * the master context. the context argument is not modified by * call. * * desc is an opaque descriptor used to identify session. */ ctx.ctx_flags = PFM_FL_SYSTEM_WIDE; ret = pfms_create(&cpu_list, 1, &ctx, NULL, &desc); if (ret == -1) fatal_error("create error %d\n", ret); /* * program the PMC registers on all CPUs of interest */ ret = pfms_write_pmcs(desc, pc, outp.pfp_pmc_count); if (ret == -1) fatal_error("write_pmcs error %d\n", ret); /* * program the PMD registers on all CPUs of interest */ ret = pfms_write_pmds(desc, pd, inp.pfp_event_count); if (ret == -1) fatal_error("write_pmds error %d\n", ret); /* * load context on all CPUs of interest */ ret = pfms_load(desc); if (ret == -1) fatal_error("load error %d\n", ret); /* * start monitoring on all CPUs of interest */ ret = pfms_start(desc); if (ret == -1) fatal_error("start error %d\n", ret); /* * simulate some work */ sleep(10); /* * stop monitoring on all CPUs of interest */ ret = pfms_stop(desc); if (ret == -1) fatal_error("stop error %d\n", ret); /* * read the PMD registers on all CPUs of interest. * The pd[] array must be organized such that to * read 2 PMDs on each CPU you need: * - 2 * number of CPUs of interest * - the first 2 elements of pd[] read on 1st CPU * - the next 2 elements of pd[] read on the 2nd CPU * - and so on */ ret = pfms_read_pmds(desc, pd, npmds); if (ret == -1) fatal_error("read_pmds error %d\n", ret); /* * pre per-CPU results */ for(j=0, k= 0; j < ncpus; j++) { for (i=0; i < inp.pfp_event_count; i++, k++) { pfm_get_full_event_name(&inp.pfp_events[i], name, len); printf("CPU%-3d PMD%u %20"PRIu64" %s\n", j, pd[k].reg_num, pd[k].reg_value, name); } } /* * destroy context on all CPUs of interest. * After this call desc is invalid */ ret = pfms_close(desc); if (ret == -1) fatal_error("close error %d\n", ret); free(name); return 0; } #endif