/* * block queue tracing application * * Copyright (C) 2005 Jens Axboe * Copyright (C) 2006 Jens Axboe * * Rewrite to have a single thread per CPU (managing all devices on that CPU) * Alan D. Brunelle - January 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "btt/list.h" #include "blktrace.h" /* * You may want to increase this even more, if you are logging at a high * rate and see skipped/missed events */ #define BUF_SIZE (512 * 1024) #define BUF_NR (4) #define FILE_VBUF_SIZE (128 * 1024) #define DEBUGFS_TYPE (0x64626720) #define TRACE_NET_PORT (8462) enum { Net_none = 0, Net_server, Net_client, }; enum thread_status { Th_running, Th_leaving, Th_error }; /* * Generic stats collected: nevents can be _roughly_ estimated by data_read * (discounting pdu...) * * These fields are updated w/ pdc_dr_update & pdc_nev_update below. */ struct pdc_stats { unsigned long long data_read; unsigned long long nevents; }; struct devpath { struct list_head head; char *path; /* path to device special file */ char *buts_name; /* name returned from bt kernel code */ struct pdc_stats *stats; int fd, ncpus; unsigned long long drops; /* * For piped output only: * * Each tracer will have a tracer_devpath_head that it will add new * data onto. It's list is protected above (tracer_devpath_head.mutex) * and it will signal the processing thread using the dp_cond, * dp_mutex & dp_entries variables above. */ struct tracer_devpath_head *heads; /* * For network server mode only: */ struct cl_host *ch; u32 cl_id; time_t cl_connect_time; struct io_info *ios; }; /* * For piped output to stdout we will have each tracer thread (one per dev) * tack buffers read from the relay queues on a per-device list. * * The main thread will then collect trace buffers from each of lists in turn. * * We will use a mutex to guard each of the trace_buf list. The tracers * can then signal the main thread using and * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will * signal. When dp_entries is 0, the main thread will wait for that condition * to be signalled.) * * adb: It may be better just to have a large buffer per tracer per dev, * and then use it as a ring-buffer. This would certainly cut down a lot * of malloc/free thrashing, at the cost of more memory movements (potentially). */ struct trace_buf { struct list_head head; struct devpath *dpp; void *buf; int cpu, len; }; struct tracer_devpath_head { pthread_mutex_t mutex; struct list_head head; struct trace_buf *prev; }; /* * Used to handle the mmap() interfaces for output file (containing traces) */ struct mmap_info { void *fs_buf; unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len; unsigned long buf_size, buf_nr; int pagesize; }; /* * Each thread doing work on a (client) side of blktrace will have one * of these. The ios array contains input/output information, pfds holds * poll() data. The volatile's provide flags to/from the main executing * thread. */ struct tracer { struct list_head head; struct io_info *ios; struct pollfd *pfds; pthread_t thread; int cpu, nios; volatile int status, is_done; }; /* * networking stuff follows. we include a magic number so we know whether * to endianness convert or not. * * The len field is overloaded: * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu * 1 - Indicates a "close" - Shut down connection orderly * * The cpu field is overloaded on close: it will contain the number of drops. */ struct blktrace_net_hdr { u32 magic; /* same as trace magic */ char buts_name[32]; /* trace name */ u32 cpu; /* for which cpu */ u32 max_cpus; u32 len; /* length of following trace data */ u32 cl_id; /* id for set of client per-cpu connections */ u32 buf_size; /* client buf_size for this trace */ u32 buf_nr; /* client buf_nr for this trace */ u32 page_size; /* client page_size for this trace */ }; /* * Each host encountered has one of these. The head is used to link this * on to the network server's ch_list. Connections associated with this * host are linked on conn_list, and any devices traced on that host * are connected on the devpaths list. */ struct cl_host { struct list_head head; struct list_head conn_list; struct list_head devpaths; struct net_server_s *ns; char *hostname; struct in_addr cl_in_addr; int connects, ndevs, cl_opens; }; /* * Each connection (client to server socket ('fd')) has one of these. A * back reference to the host ('ch'), and lists headers (for the host * list, and the network server conn_list) are also included. */ struct cl_conn { struct list_head ch_head, ns_head; struct cl_host *ch; int fd, ncpus; time_t connect_time; }; /* * The network server requires some poll structures to be maintained - * one per conection currently on conn_list. The nchs/ch_list values * are for each host connected to this server. The addr field is used * for scratch as new connections are established. */ struct net_server_s { struct list_head conn_list; struct list_head ch_list; struct pollfd *pfds; int listen_fd, connects, nchs; struct sockaddr_in addr; }; /* * This structure is (generically) used to providide information * for a read-to-write set of values. * * ifn & ifd represent input information * * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally). */ struct io_info { struct devpath *dpp; FILE *ofp; char *obuf; struct cl_conn *nc; /* Server network connection */ /* * mmap controlled output files */ struct mmap_info mmap_info; /* * Client network fields */ unsigned int ready; unsigned long long data_queued; /* * Input/output file descriptors & names */ int ifd, ofd; char ifn[MAXPATHLEN + 64]; char ofn[MAXPATHLEN + 64]; }; static char blktrace_version[] = "2.0.0"; /* * Linkage to blktrace helper routines (trace conversions) */ int data_is_native = -1; static int ndevs; static int max_cpus; static int ncpus; static cpu_set_t *online_cpus; static int pagesize; static int act_mask = ~0U; static int kill_running_trace; static int stop_watch; static int piped_output; static char *debugfs_path = "/sys/kernel/debug"; static char *output_name; static char *output_dir; static unsigned long buf_size = BUF_SIZE; static unsigned long buf_nr = BUF_NR; static FILE *pfp; static LIST_HEAD(devpaths); static LIST_HEAD(tracers); static volatile int done; /* * tracer threads add entries, the main thread takes them off and processes * them. These protect the dp_entries variable. */ static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER; static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER; static volatile int dp_entries; /* * These synchronize master / thread interactions. */ static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER; static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER; static volatile int nthreads_running; static volatile int nthreads_leaving; static volatile int nthreads_error; static volatile int tracers_run; /* * network cmd line params */ static struct sockaddr_in hostname_addr; static char hostname[MAXHOSTNAMELEN]; static int net_port = TRACE_NET_PORT; static int net_use_sendfile = 1; static int net_mode; static int *cl_fds; static int (*handle_pfds)(struct tracer *, int, int); static int (*handle_list)(struct tracer_devpath_head *, struct list_head *); #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:" static struct option l_opts[] = { { .name = "dev", .has_arg = required_argument, .flag = NULL, .val = 'd' }, { .name = "input-devs", .has_arg = required_argument, .flag = NULL, .val = 'I' }, { .name = "act-mask", .has_arg = required_argument, .flag = NULL, .val = 'a' }, { .name = "set-mask", .has_arg = required_argument, .flag = NULL, .val = 'A' }, { .name = "relay", .has_arg = required_argument, .flag = NULL, .val = 'r' }, { .name = "output", .has_arg = required_argument, .flag = NULL, .val = 'o' }, { .name = "kill", .has_arg = no_argument, .flag = NULL, .val = 'k' }, { .name = "stopwatch", .has_arg = required_argument, .flag = NULL, .val = 'w' }, { .name = "version", .has_arg = no_argument, .flag = NULL, .val = 'v' }, { .name = "version", .has_arg = no_argument, .flag = NULL, .val = 'V' }, { .name = "buffer-size", .has_arg = required_argument, .flag = NULL, .val = 'b' }, { .name = "num-sub-buffers", .has_arg = required_argument, .flag = NULL, .val = 'n' }, { .name = "output-dir", .has_arg = required_argument, .flag = NULL, .val = 'D' }, { .name = "listen", .has_arg = no_argument, .flag = NULL, .val = 'l' }, { .name = "host", .has_arg = required_argument, .flag = NULL, .val = 'h' }, { .name = "port", .has_arg = required_argument, .flag = NULL, .val = 'p' }, { .name = "no-sendfile", .has_arg = no_argument, .flag = NULL, .val = 's' }, { .name = NULL, } }; static char usage_str[] = "\n\n" \ "-d | --dev=\n" \ "[ -r | --relay= ]\n" \ "[ -o | --output=]\n" \ "[ -D | --output-dir=\n" \ "[ -w