Blame libglusterfs/src/monitoring.c

Packit b2c0d9
/*
Packit b2c0d9
  Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
Packit b2c0d9
  This file is part of GlusterFS.
Packit b2c0d9
Packit b2c0d9
  This file is licensed to you under your choice of the GNU Lesser
Packit b2c0d9
  General Public License, version 3 or any later version (LGPLv3 or
Packit b2c0d9
  later), or the GNU General Public License, version 2 (GPLv2), in all
Packit b2c0d9
  cases as published by the Free Software Foundation.
Packit b2c0d9
*/
Packit b2c0d9
Packit b2c0d9
#include "glusterfs/monitoring.h"
Packit b2c0d9
#include "glusterfs/xlator.h"
Packit b2c0d9
#include "glusterfs/syscall.h"
Packit b2c0d9
Packit b2c0d9
#include <stdlib.h>
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_mem_acct_details(xlator_t *xl, int fd)
Packit b2c0d9
{
Packit b2c0d9
    struct mem_acct_rec *mem_rec;
Packit b2c0d9
    int i = 0;
Packit b2c0d9
Packit b2c0d9
    if (!xl || !xl->mem_acct || (xl->ctx->active != xl->graph))
Packit b2c0d9
        return;
Packit b2c0d9
Packit b2c0d9
    dprintf(fd, "# %s.%s.total.num_types %d\n", xl->type, xl->name,
Packit b2c0d9
            xl->mem_acct->num_types);
Packit b2c0d9
Packit b2c0d9
    dprintf(fd,
Packit b2c0d9
            "# type, in-use-size, in-use-units, max-size, "
Packit b2c0d9
            "max-units, total-allocs\n");
Packit b2c0d9
Packit b2c0d9
    for (i = 0; i < xl->mem_acct->num_types; i++) {
Packit b2c0d9
        mem_rec = &xl->mem_acct->rec[i];
Packit b2c0d9
        if (mem_rec->num_allocs == 0)
Packit b2c0d9
            continue;
Packit b2c0d9
        dprintf(fd, "# %s, %" PRIu64 ", %u, %" PRIu64 ", %u, %" PRIu64 "\n",
Packit b2c0d9
                mem_rec->typestr, mem_rec->size, mem_rec->num_allocs,
Packit b2c0d9
                mem_rec->max_size, mem_rec->max_num_allocs,
Packit b2c0d9
                mem_rec->total_allocs);
Packit b2c0d9
    }
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_global_memory_accounting(int fd)
Packit b2c0d9
{
Packit b2c0d9
#if MEMORY_ACCOUNTING_STATS
Packit b2c0d9
    int i = 0;
Packit b2c0d9
    uint64_t count = 0;
Packit b2c0d9
Packit b2c0d9
    uint64_t tcalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_calloc);
Packit b2c0d9
    uint64_t tmalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_malloc);
Packit b2c0d9
    uint64_t tfree = GF_ATOMIC_GET(gf_memory_stat_counts.total_free);
Packit b2c0d9
Packit b2c0d9
    dprintf(fd, "memory.total.calloc %lu\n", tcalloc);
Packit b2c0d9
    dprintf(fd, "memory.total.malloc %lu\n", tmalloc);
Packit b2c0d9
    dprintf(fd, "memory.total.realloc %lu\n",
Packit b2c0d9
            GF_ATOMIC_GET(gf_memory_stat_counts.total_realloc));
Packit b2c0d9
    dprintf(fd, "memory.total.free %lu\n", tfree);
Packit b2c0d9
    dprintf(fd, "memory.total.in-use %lu\n", ((tcalloc + tmalloc) - tfree));
Packit b2c0d9
Packit b2c0d9
    for (i = 0; i < GF_BLK_MAX_VALUE; i++) {
Packit b2c0d9
        count = GF_ATOMIC_GET(gf_memory_stat_counts.blk_size[i]);
Packit b2c0d9
        dprintf(fd, "memory.total.blk_size.%s %lu\n",
Packit b2c0d9
                gf_mem_stats_blk[i].blk_size_str, count);
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    dprintf(fd, "#----\n");
Packit b2c0d9
#endif
Packit b2c0d9
Packit b2c0d9
    /* This is not a metric to be watched in admin guide,
Packit b2c0d9
       but keeping it here till we resolve all leak-issues
Packit b2c0d9
       would be great */
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_latency_and_count(xlator_t *xl, int fd)
Packit b2c0d9
{
Packit b2c0d9
    int32_t index = 0;
Packit b2c0d9
    uint64_t fop;
Packit b2c0d9
    uint64_t cbk;
Packit b2c0d9
    uint64_t count;
Packit b2c0d9
Packit b2c0d9
    if (xl->winds) {
Packit b2c0d9
        dprintf(fd, "%s.total.pending-winds.count %" PRIu64 "\n", xl->name,
Packit b2c0d9
                xl->winds);
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    /* Need 'fuse' data, and don't need all the old graph info */
Packit b2c0d9
    if ((xl != xl->ctx->master) && (xl->ctx->active != xl->graph))
Packit b2c0d9
        return;
Packit b2c0d9
Packit b2c0d9
    count = GF_ATOMIC_GET(xl->stats.total.count);
Packit b2c0d9
    dprintf(fd, "%s.total.fop-count %" PRIu64 "\n", xl->name, count);
Packit b2c0d9
Packit b2c0d9
    count = GF_ATOMIC_GET(xl->stats.interval.count);
Packit b2c0d9
    dprintf(fd, "%s.interval.fop-count %" PRIu64 "\n", xl->name, count);
Packit b2c0d9
    GF_ATOMIC_INIT(xl->stats.interval.count, 0);
Packit b2c0d9
Packit b2c0d9
    for (index = 0; index < GF_FOP_MAXVALUE; index++) {
Packit b2c0d9
        fop = GF_ATOMIC_GET(xl->stats.total.metrics[index].fop);
Packit b2c0d9
        if (fop) {
Packit b2c0d9
            dprintf(fd, "%s.total.%s.count %" PRIu64 "\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index], fop);
Packit b2c0d9
        }
Packit b2c0d9
        fop = GF_ATOMIC_GET(xl->stats.interval.metrics[index].fop);
Packit b2c0d9
        if (fop) {
Packit b2c0d9
            dprintf(fd, "%s.interval.%s.count %" PRIu64 "\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index], fop);
Packit b2c0d9
        }
Packit b2c0d9
        cbk = GF_ATOMIC_GET(xl->stats.interval.metrics[index].cbk);
Packit b2c0d9
        if (cbk) {
Packit b2c0d9
            dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index], cbk);
Packit b2c0d9
        }
Packit b2c0d9
        if (xl->stats.interval.latencies[index].count != 0.0) {
Packit b2c0d9
            dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index],
Packit b2c0d9
                    (xl->stats.interval.latencies[index].total /
Packit b2c0d9
                     xl->stats.interval.latencies[index].count));
Packit b2c0d9
            dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index],
Packit b2c0d9
                    xl->stats.interval.latencies[index].max);
Packit b2c0d9
            dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
Packit b2c0d9
                    gf_fop_list[index],
Packit b2c0d9
                    xl->stats.interval.latencies[index].min);
Packit b2c0d9
        }
Packit b2c0d9
        GF_ATOMIC_INIT(xl->stats.interval.metrics[index].cbk, 0);
Packit b2c0d9
        GF_ATOMIC_INIT(xl->stats.interval.metrics[index].fop, 0);
Packit b2c0d9
    }
Packit b2c0d9
    memset(xl->stats.interval.latencies, 0,
Packit b2c0d9
           sizeof(xl->stats.interval.latencies));
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static inline void
Packit b2c0d9
dump_call_stack_details(glusterfs_ctx_t *ctx, int fd)
Packit b2c0d9
{
Packit b2c0d9
    dprintf(fd, "total.stack.count %" PRIu64 "\n",
Packit b2c0d9
            GF_ATOMIC_GET(ctx->pool->total_count));
Packit b2c0d9
    dprintf(fd, "total.stack.in-flight %" PRIu64 "\n", ctx->pool->cnt);
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static inline void
Packit b2c0d9
dump_dict_details(glusterfs_ctx_t *ctx, int fd)
Packit b2c0d9
{
Packit b2c0d9
    uint64_t total_dicts = 0;
Packit b2c0d9
    uint64_t total_pairs = 0;
Packit b2c0d9
Packit b2c0d9
    total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
Packit b2c0d9
    total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
Packit b2c0d9
Packit b2c0d9
    dprintf(fd, "total.dict.max-pairs-per %" PRIu64 "\n",
Packit b2c0d9
            GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
Packit b2c0d9
    dprintf(fd, "total.dict.pairs-used %" PRIu64 "\n", total_pairs);
Packit b2c0d9
    dprintf(fd, "total.dict.used %" PRIu64 "\n", total_dicts);
Packit b2c0d9
    dprintf(fd, "total.dict.average-pairs %" PRIu64 "\n",
Packit b2c0d9
            (total_pairs / total_dicts));
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_inode_stats(glusterfs_ctx_t *ctx, int fd)
Packit b2c0d9
{
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_global_metrics(glusterfs_ctx_t *ctx, int fd)
Packit b2c0d9
{
Packit b2c0d9
    struct timeval tv;
Packit b2c0d9
    time_t nowtime;
Packit b2c0d9
    struct tm *nowtm;
Packit b2c0d9
    char tmbuf[64] = {
Packit b2c0d9
        0,
Packit b2c0d9
    };
Packit b2c0d9
Packit b2c0d9
    gettimeofday(&tv, NULL);
Packit b2c0d9
    nowtime = tv.tv_sec;
Packit b2c0d9
    nowtm = localtime(&nowtime);
Packit b2c0d9
    strftime(tmbuf, sizeof tmbuf, "%Y-%m-%d %H:%M:%S", nowtm);
Packit b2c0d9
Packit b2c0d9
    /* Let every file have information on which process dumped info */
Packit b2c0d9
    dprintf(fd, "## %s\n", ctx->cmdlinestr);
Packit b2c0d9
    dprintf(fd, "### %s\n", tmbuf);
Packit b2c0d9
    dprintf(fd, "### BrickName: %s\n", ctx->cmd_args.brick_name);
Packit b2c0d9
    dprintf(fd, "### MountName: %s\n", ctx->cmd_args.mount_point);
Packit b2c0d9
    dprintf(fd, "### VolumeName: %s\n", ctx->cmd_args.volume_name);
Packit b2c0d9
Packit b2c0d9
    /* Dump memory accounting */
Packit b2c0d9
    dump_global_memory_accounting(fd);
Packit b2c0d9
    dprintf(fd, "# -----\n");
Packit b2c0d9
Packit b2c0d9
    dump_call_stack_details(ctx, fd);
Packit b2c0d9
    dump_dict_details(ctx, fd);
Packit b2c0d9
    dprintf(fd, "# -----\n");
Packit b2c0d9
Packit b2c0d9
    dump_inode_stats(ctx, fd);
Packit b2c0d9
    dprintf(fd, "# -----\n");
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
static void
Packit b2c0d9
dump_xl_metrics(glusterfs_ctx_t *ctx, int fd)
Packit b2c0d9
{
Packit b2c0d9
    xlator_t *xl;
Packit b2c0d9
Packit b2c0d9
    xl = ctx->active->top;
Packit b2c0d9
Packit b2c0d9
    while (xl) {
Packit b2c0d9
        dump_latency_and_count(xl, fd);
Packit b2c0d9
        dump_mem_acct_details(xl, fd);
Packit b2c0d9
        if (xl->dump_metrics)
Packit b2c0d9
            xl->dump_metrics(xl, fd);
Packit b2c0d9
        xl = xl->next;
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    if (ctx->master) {
Packit b2c0d9
        xl = ctx->master;
Packit b2c0d9
Packit b2c0d9
        dump_latency_and_count(xl, fd);
Packit b2c0d9
        dump_mem_acct_details(xl, fd);
Packit b2c0d9
        if (xl->dump_metrics)
Packit b2c0d9
            xl->dump_metrics(xl, fd);
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    return;
Packit b2c0d9
}
Packit b2c0d9
Packit b2c0d9
char *
Packit b2c0d9
gf_monitor_metrics(glusterfs_ctx_t *ctx)
Packit b2c0d9
{
Packit b2c0d9
    int ret = -1;
Packit b2c0d9
    int fd = 0;
Packit b2c0d9
    char *filepath = NULL, *dumppath = NULL;
Packit b2c0d9
Packit b2c0d9
    dumppath = ctx->config.metrics_dumppath;
Packit b2c0d9
    if (dumppath == NULL) {
Packit b2c0d9
        dumppath = GLUSTER_METRICS_DIR;
Packit b2c0d9
    }
Packit b2c0d9
    ret = mkdir_p(dumppath, 0755, true);
Packit b2c0d9
    if (ret) {
Packit b2c0d9
        /* EEXIST is handled in mkdir_p() itself */
Packit b2c0d9
        gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
Packit b2c0d9
               "failed to create metrics dir %s (%s)", dumppath,
Packit b2c0d9
               strerror(errno));
Packit b2c0d9
        return NULL;
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath);
Packit b2c0d9
    if (ret < 0) {
Packit b2c0d9
        return NULL;
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
Packit b2c0d9
    fd = mkstemp(filepath);
Packit b2c0d9
    if (fd < 0) {
Packit b2c0d9
        gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
Packit b2c0d9
               "failed to open tmp file %s (%s)", filepath, strerror(errno));
Packit b2c0d9
        GF_FREE(filepath);
Packit b2c0d9
        return NULL;
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    dump_global_metrics(ctx, fd);
Packit b2c0d9
Packit b2c0d9
    dump_xl_metrics(ctx, fd);
Packit b2c0d9
Packit b2c0d9
    /* This below line is used just to capture any errors with dprintf() */
Packit b2c0d9
    ret = dprintf(fd, "\n# End of metrics\n");
Packit b2c0d9
    if (ret < 0) {
Packit b2c0d9
        gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
Packit b2c0d9
               "dprintf() failed: %s", strerror(errno));
Packit b2c0d9
    }
Packit b2c0d9
Packit b2c0d9
    ret = sys_fsync(fd);
Packit b2c0d9
    if (ret < 0) {
Packit b2c0d9
        gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
Packit b2c0d9
               "fsync() failed: %s", strerror(errno));
Packit b2c0d9
    }
Packit b2c0d9
    sys_close(fd);
Packit b2c0d9
Packit b2c0d9
    /* Figure this out, not happy with returning this string */
Packit b2c0d9
    return filepath;
Packit b2c0d9
}