/**
* @file normalizer.c
* @brief A small tool to normalize data.
*
* This is the most basic example demonstrating how to use liblognorm.
* It loads log samples from the files specified on the command line,
* reads to-be-normalized data from stdin and writes the normalized
* form to stdout. Besides being an example, it also carries out useful
* processing.
*
* @author Rainer Gerhards <rgerhards@adiscon.com>
*
*//*
* liblognorm - a fast samples-based log normalization library
* Copyright 2010-2016 by Rainer Gerhards and Adiscon GmbH.
*
* This file is part of liblognorm.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution.
*/
#include "config.h"
#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <libestr.h>
#include "liblognorm.h"
#include "lognorm.h"
#include "enc.h"
/* we need to turn off this warning, as it also comes up in C99 mode, which
* we use.
*/
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
static ln_ctx ctx;
static int verbose = 0;
#define OUTPUT_PARSED_RECS 0x01
#define OUTPUT_UNPARSED_RECS 0x02
static int recOutput = OUTPUT_PARSED_RECS | OUTPUT_UNPARSED_RECS;
/**< controls which records to output */
static int outputSummaryLine = 0;
static int outputNbrUnparsed = 0;
static int addErrLineNbr = 0; /**< add line number info to unparsed events */
static int flatTags = 0; /**< print event.tags in JSON? */
static FILE *fpDOT;
static es_str_t *encFmt = NULL; /**< a format string for encoder use */
static es_str_t *mandatoryTag = NULL; /**< tag which must be given so that mesg will
be output. NULL=all */
static enum { f_syslog, f_json, f_xml, f_csv, f_raw } outfmt = f_json;
static void
errCallBack(void __attribute__((unused)) *cookie, const char *msg,
size_t __attribute__((unused)) lenMsg)
{
fprintf(stderr, "liblognorm error: %s\n", msg);
}
static void
dbgCallBack(void __attribute__((unused)) *cookie, const char *msg,
size_t __attribute__((unused)) lenMsg)
{
fprintf(stderr, "liblognorm: %s\n", msg);
}
static void
complain(const char *errmsg)
{
fprintf(stderr, "%s\n", errmsg);
}
/* rawmsg is, as the name says, the raw message, in case we have
* "raw" formatter requested.
*/
static void
outputEvent(struct json_object *json, const char *const rawmsg)
{
char *cstr = NULL;
es_str_t *str = NULL;
if(outfmt == f_raw) {
printf("%s\n", rawmsg);
return;
}
switch(outfmt) {
case f_json:
if(!flatTags) {
json_object_object_del(json, "event.tags");
}
cstr = (char*)json_object_to_json_string(json);
break;
case f_syslog:
ln_fmtEventToRFC5424(json, &str);
break;
case f_xml:
ln_fmtEventToXML(json, &str);
break;
case f_csv:
ln_fmtEventToCSV(json, &str, encFmt);
break;
case f_raw:
fprintf(stderr, "program error: f_raw should not occur "
"here (file %s, line %d)\n", __FILE__, __LINE__);
abort();
break;
default:
fprintf(stderr, "program error: default case should not occur "
"here (file %s, line %d)\n", __FILE__, __LINE__);
abort();
break;
}
if (str != NULL)
cstr = es_str2cstr(str, NULL);
if(verbose > 0) fprintf(stderr, "normalized: '%s'\n", cstr);
printf("%s\n", cstr);
if (str != NULL)
free(cstr);
es_deleteStr(str);
}
/* test if the tag exists */
static int
eventHasTag(struct json_object *json, const char *tag)
{
struct json_object *tagbucket, *tagObj;
int i;
const char *tagCstr;
if (tag == NULL)
return 1;
if (json_object_object_get_ex(json, "event.tags", &tagbucket)) {
if (json_object_get_type(tagbucket) == json_type_array) {
for (i = json_object_array_length(tagbucket) - 1; i >= 0; i--) {
tagObj = json_object_array_get_idx(tagbucket, i);
tagCstr = json_object_get_string(tagObj);
if (!strcmp(tag, tagCstr))
return 1;
}
}
}
if (verbose > 1)
printf("Mandatory tag '%s' has not been found\n", tag);
return 0;
}
static void
amendLineNbr(json_object *const json, const int line_nbr)
{
if(addErrLineNbr) {
struct json_object *jval;
jval = json_object_new_int(line_nbr);
json_object_object_add(json, "lognormalizer.line_nbr", jval);
}
}
#define DEFAULT_LINE_SIZE (10 * 1024)
static char *
read_line(FILE *fp)
{
size_t line_capacity = DEFAULT_LINE_SIZE;
char *line = NULL;
size_t line_len = 0;
int ch = 0;
do {
ch = fgetc(fp);
if (ch == EOF) break;
if (line == NULL) {
line = malloc(line_capacity);
} else if (line_len == line_capacity) {
line_capacity *= 2;
line = realloc(line, line_capacity);
}
if (line == NULL) {
fprintf(stderr, "Couldn't allocate working-buffer for log-line\n");
return NULL;
}
line[line_len++] = ch;
} while(ch != '\n');
if (line != NULL) {
line[--line_len] = '\0';
if(line_len > 0 && line[line_len - 1] == '\r')
line[--line_len] = '\0';
}
return line;
}
/* normalize input data
*/
static void
normalize(void)
{
FILE *fp = stdin;
char *line = NULL;
struct json_object *json = NULL;
long long unsigned numParsed = 0;
long long unsigned numUnparsed = 0;
long long unsigned numWrongTag = 0;
char *mandatoryTagCstr = NULL;
int line_nbr = 0; /* must be int to keep compatible with older json-c */
if (mandatoryTag != NULL) {
mandatoryTagCstr = es_str2cstr(mandatoryTag, NULL);
}
while((line = read_line(fp)) != NULL) {
++line_nbr;
if(verbose > 0) fprintf(stderr, "To normalize: '%s'\n", line);
ln_normalize(ctx, line, strlen(line), &json);
if(json != NULL) {
if(eventHasTag(json, mandatoryTagCstr)) {
struct json_object *dummy;
const int parsed = !json_object_object_get_ex(json,
"unparsed-data", &dummy);
if(parsed) {
numParsed++;
if(recOutput & OUTPUT_PARSED_RECS) {
outputEvent(json, line);
}
} else {
numUnparsed++;
amendLineNbr(json, line_nbr);
if(recOutput & OUTPUT_UNPARSED_RECS) {
outputEvent(json, line);
}
}
} else {
numWrongTag++;
}
json_object_put(json);
json = NULL;
}
free(line);
}
if(outputNbrUnparsed && numUnparsed > 0)
fprintf(stderr, "%llu unparsable entries\n", numUnparsed);
if(numWrongTag > 0)
fprintf(stderr, "%llu entries with wrong tag dropped\n", numWrongTag);
if(outputSummaryLine) {
fprintf(stderr, "%llu records processed, %llu parsed, %llu unparsed\n",
numParsed+numUnparsed, numParsed, numUnparsed);
}
free(mandatoryTagCstr);
}
/**
* Generate a command file for the GNU DOT tools.
*/
static void
genDOT(void)
{
es_str_t *str;
str = es_newStr(1024);
ln_genDotPDAGGraph(ctx->pdag, &str);
fwrite(es_getBufAddr(str), 1, es_strlen(str), fpDOT);
}
static
void printVersion(void)
{
fprintf(stderr, "lognormalizer version: " VERSION "\n");
fprintf(stderr, "liblognorm version: %s\n", ln_version());
fprintf(stderr, "\tadvanced stats: %s\n",
ln_hasAdvancedStats() ? "available" : "not available");
}
static void
handle_generic_option(const char* opt) {
if (strcmp("allowRegex", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ALLOW_REGEX);
} else if (strcmp("addExecPath", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_EXEC_PATH);
} else if (strcmp("addOriginalMsg", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_ORIGINALMSG);
} else if (strcmp("addRule", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE);
} else if (strcmp("addRuleLocation", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE_LOCATION);
} else {
fprintf(stderr, "invalid -o option '%s'\n", opt);
exit(1);
}
}
static void usage(void)
{
fprintf(stderr,
"Options:\n"
" -r<rulebase> Rulebase to use. This is required option\n"
" -H print summary line (nbr of msgs Handled)\n"
" -U print number of unparsed messages (only if non-zero)\n"
" -e<json|xml|csv|cee-syslog|raw>\n"
" Change output format. By default, json is used\n"
" Raw is exactly like the input. It is useful in combination\n"
" with -p/-P options to extract known good/bad messages\n"
" -E<format> Encoder-specific format (used for CSV, read docs)\n"
" -T Include 'event.tags' in JSON format\n"
" -oallowRegex Allow regexp matching (read docs about performance penalty)\n"
" -oaddRule Add a mockup of the matching rule.\n"
" -oaddRuleLocation Add location of matching rule to metadata\n"
" -oaddExecPath Add exec_path attribute to output\n"
" -oaddOriginalMsg Always add original message to output, not just in error case\n"
" -p Print back only if the message has been parsed succesfully\n"
" -P Print back only if the message has NOT been parsed succesfully\n"
" -L Add source file line number information to unparsed line output\n"
" -t<tag> Print back only messages matching the tag\n"
" -v Print debug. When used 3 times, prints parse DAG\n"
" -V Print version information\n"
" -d Print DOT file to stdout and exit\n"
" -d<filename> Save DOT file to the filename\n"
" -s<filename> Print parse dag statistics and exit\n"
" -S<filename> Print extended parse dag statistics and exit (includes -s)\n"
" -x<filename> Print statistics as dot file (called only)\n"
"\n"
);
}
int main(int argc, char *argv[])
{
int opt;
char *repository = NULL;
int usedRB = 0; /* 0=no rule; 1=rule from rulebase; 2=rule from string */
int ret = 0;
FILE *fpStats = NULL;
FILE *fpStatsDOT = NULL;
int extendedStats = 0;
if((ctx = ln_initCtx()) == NULL) {
complain("Could not initialize liblognorm context");
ret = 1;
goto exit;
}
while((opt = getopt(argc, argv, "d:s:S:e:r:R:E:vVpPt:To:hHULx:")) != -1) {
switch (opt) {
case 'V':
printVersion();
exit(1);
break;
case 'd': /* generate DOT file */
if(!strcmp(optarg, "")) {
fpDOT = stdout;
} else {
if((fpDOT = fopen(optarg, "w")) == NULL) {
perror(optarg);
complain("Cannot open DOT file");
ret = 1;
goto exit;
}
}
break;
case 'x': /* generate statistics DOT file */
if(!strcmp(optarg, "")) {
fpStatsDOT = stdout;
} else {
if((fpStatsDOT = fopen(optarg, "w")) == NULL) {
perror(optarg);
complain("Cannot open statistics DOT file");
ret = 1;
goto exit;
}
}
break;
case 'S': /* generate pdag statistic file */
extendedStats = 1;
/* INTENTIONALLY NO BREAK! - KEEP order! */
/*FALLTHROUGH*/
case 's': /* generate pdag statistic file */
if(!strcmp(optarg, "-")) {
fpStats = stdout;
} else {
if((fpStats = fopen(optarg, "w")) == NULL) {
perror(optarg);
complain("Cannot open parser statistics file");
ret = 1;
goto exit;
}
}
break;
case 'v':
verbose++;
break;
case 'E': /* encoder-specific format string (will be validated by encoder) */
encFmt = es_newStrFromCStr(optarg, strlen(optarg));
break;
case 'p':
recOutput = OUTPUT_PARSED_RECS;
break;
case 'P':
recOutput = OUTPUT_UNPARSED_RECS;
break;
case 'H':
outputSummaryLine = 1;
break;
case 'U':
outputNbrUnparsed = 1;
break;
case 'L':
addErrLineNbr = 1;
break;
case 'T':
flatTags = 1;
break;
case 'e': /* encoder to use */
if(!strcmp(optarg, "json")) {
outfmt = f_json;
} else if(!strcmp(optarg, "xml")) {
outfmt = f_xml;
} else if(!strcmp(optarg, "cee-syslog")) {
outfmt = f_syslog;
} else if(!strcmp(optarg, "csv")) {
outfmt = f_csv;
} else if(!strcmp(optarg, "raw")) {
outfmt = f_raw;
}
break;
case 'r': /* rule base to use */
if(usedRB != 2) {
repository = optarg;
usedRB = 1;
} else {
usedRB = -1;
}
break;
case 'R':
if(usedRB != 1) {
repository = optarg;
usedRB = 2;
} else {
usedRB = -1;
}
break;
case 't': /* if given, only messages tagged with the argument
are output */
mandatoryTag = es_newStrFromCStr(optarg, strlen(optarg));
break;
case 'o':
handle_generic_option(optarg);
break;
case 'h':
default:
usage();
ret = 1;
goto exit;
break;
}
}
if(repository == NULL) {
complain("Samples repository or String must be given (-r or -R)");
ret = 1;
goto exit;
}
if(usedRB == -1) {
complain("Only use one rulebase (-r or -R)");
ret = 1;
goto exit;
}
ln_setErrMsgCB(ctx, errCallBack, NULL);
if(verbose) {
ln_setDebugCB(ctx, dbgCallBack, NULL);
ln_enableDebug(ctx, 1);
}
if(usedRB == 1) {
if(ln_loadSamples(ctx, repository)) {
fprintf(stderr, "fatal error: cannot load rulebase\n");
exit(1);
}
} else if(usedRB == 2) {
if(ln_loadSamplesFromString(ctx, repository)) {
fprintf(stderr, "fatal error: cannot load rule from String\n");
exit(1);
}
}
if(verbose > 0)
fprintf(stderr, "number of tree nodes: %d\n", ctx->nNodes);
if(fpDOT != NULL) {
genDOT();
ret=1;
goto exit;
}
if(verbose > 2) ln_displayPDAG(ctx);
normalize();
if(fpStats != NULL) {
ln_fullPdagStats(ctx, fpStats, extendedStats);
}
if(fpStatsDOT != NULL) {
ln_fullPDagStatsDOT(ctx, fpStatsDOT);
}
exit:
if (ctx) ln_exitCtx(ctx);
if (encFmt != NULL)
free(encFmt);
return ret;
}