Blob Blame History Raw
/**
 * @file ptree.h
 * @brief The parse tree object.
 * @class ln_ptree ptree.h
 *//*
 * Copyright 2013 by Rainer Gerhards and Adiscon GmbH.
 *
 * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013
 *
 * This file is meant to be included by applications using liblognorm.
 * For lognorm library files themselves, include "lognorm.h".
 *
 * This file is part of liblognorm.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution.
 */
#ifndef LIBLOGNORM_PTREE_H_INCLUDED
#define	LIBLOGNORM_PTREE_H_INCLUDED
#include <stdio.h>
#include <libestr.h>

#define ORIGINAL_MSG_KEY "originalmsg"
#define UNPARSED_DATA_KEY "unparsed-data"

typedef struct ln_ptree ln_ptree; /**< the parse tree object */
typedef struct ln_fieldList_s ln_fieldList_t;

/**
 * List of supported fields inside parse tree.
 * This list holds all fields and their description. While normalizing,
 * fields are tried in the order of this list. So the enqeue order
 * dictates precedence during parsing.
 *
 * value list. This is a single-linked list. In a later stage, we should
 * optimize it so that frequently used fields are moved "up" towards
 * the root of the list. In any case, we do NOT expect this list to
 * be long, as the parser should already have gotten quite specific when
 * we hit a fieldconst .
 */
struct ln_fieldList_s {
	es_str_t *name;		/**< field name */
	es_str_t *data;		/**< extra data to be passed to parser */
	es_str_t *raw_data;	/**< extra untouched (unescaping is not done) data availble to be used by parser */
	void *parser_data; /** opaque data that the field-parser understands */
	void (*parser_data_destructor)(void **); /** destroy opaque data that field-parser understands */
	int (*parser)(const char*, size_t, size_t*, const ln_fieldList_t *,
				  size_t*, struct json_object **); /**< parser to use */
	ln_ptree *subtree;	/**< subtree to follow if parser succeeded */
	ln_fieldList_t *next;	/**< list housekeeping, next node (or NULL) */
	unsigned char isIPTables; /**< special parser: iptables! */
};


/* parse tree object
 */
struct ln_ptree {
	ln_ctx		ctx;	/**< our context */
	ln_ptree	**parentptr; /**< pointer to *us* *inside* the parent
				BUT this is NOT a pointer to the parent! */
	ln_fieldList_t	*froot; /**< root of field list */
	ln_fieldList_t	*ftail; /**< tail of field list */
	struct {
		unsigned isTerminal:1;	/**< designates this node a terminal sequence? */
	} flags;
	struct json_object *tags;	/* tags to assign to events of this type */
	/* the respresentation below requires a lof of memory but is
	 * very fast. As an alternate approach, we can use a hash table
	 * where we ignore control characters. That should work quite well.
	 * But we do not do this in the initial step.
	 */
	ln_ptree	*subtree[256];
	unsigned short	lenPrefix;	/**< length of common prefix, 0->none */
	union {
		unsigned char *ptr;	/**< use if data element is too large */
		unsigned char data[16]; /**< fast lookup for small string */
	} prefix;	/**< a common prefix string for all of this node */
	struct {
		unsigned visited;
		unsigned backtracked;	/**< incremented when backtracking was initiated */
		unsigned terminated;
	} stats;	/**< usage statistics */
};


/* Methods */

/**
 * Allocates and initializes a new parse tree node.
 * @memberof ln_ptree
 *
 * @param[in] ctx current library context. This MUST match the
 * 		context of the parent.
 * @param[in] parent pointer to the new node inside the parent
 *
 * @return pointer to new node or NULL on error
 */
struct ln_ptree* ln_newPTree(ln_ctx ctx, struct ln_ptree** parent);


/**
 * Free a parse tree and destruct all members.
 * @memberof ln_ptree
 *
 * @param[in] tree pointer to ptree to free
 */
void ln_deletePTree(struct ln_ptree *tree);

/**
 * Free a parse tree node and destruct all members.
 * @memberof ln_ptree
 *
 * @param[in] node pointer to free
 */
void ln_deletePTreeNode(ln_fieldList_t *node);

/**
 * Add a field description to the a tree.
 * The field description will be added as last field. Fields are
 * parsed in the order they have been added, so be sure to care
 * about the order if that matters.
 * @memberof ln_ptree
 *
 * @param[in] tree pointer to ptree to modify
 * @param[in] fielddescr a fully populated (and initialized)
 * 		field description node
 * @returns 0 on success, something else otherwise
 */
int ln_addFDescrToPTree(struct ln_ptree **tree, ln_fieldList_t *node);


/**
 * Add a literal to a ptree.
 * Creates new tree nodes as necessary.
 * @memberof ln_ptree
 *
 * @param[in] tree root of tree where to add
 * @param[in] str literal (string) to add
 * @param[in] offs offset of where in literal adding should start
 *
 * @return NULL on error, otherwise pointer to deepest tree added
 */
struct ln_ptree*
ln_addPTree(struct ln_ptree *tree, es_str_t *str, size_t offs);


/**
 * Display the content of a ptree (debug function).
 * This is a debug aid that spits out a textual representation
 * of the provided ptree via multiple calls of the debug callback.
 *
 * @param tree ptree to display
 * @param level recursion level, must be set to 0 on initial call
 */
void ln_displayPTree(struct ln_ptree *tree, int level);


/**
 * Generate a DOT graph.
 * Well, actually it does not generate the graph itself, but a
 * control file that is suitable for the GNU DOT tool. Such a file
 * can be very useful to understand complex sample databases
 * (not to mention that it is probably fun for those creating
 * samples).
 * The dot commands are appended to the provided string.
 *
 * @param[in] tree ptree to display
 * @param[out] str string which receives the DOT commands.
 */
void ln_genDotPTreeGraph(struct ln_ptree *tree, es_str_t **str);


/**
 * Build a ptree based on the provided string, but only if necessary.
 * The passed-in tree is searched and traversed for str. If a node exactly
 * matching str is found, that node is returned. If no exact match is found,
 * a new node is added. Existing nodes may be split, if a so-far common
 * prefix needs to be split in order to add the new node.
 *
 * @param[in] tree root of the current tree
 * @param[in] str string to be added
 * @param[in] offs offset into str where match needs to start
 *             (this is required for recursive calls to handle
 *             common prefixes)
 * @return NULL on error, otherwise the ptree leaf that
 *         corresponds to the parameters passed.
 */
struct ln_ptree * ln_buildPTree(struct ln_ptree *tree, es_str_t *str, size_t offs);

/* internal helper for displaying stats */
void ln_fullPTreeStats(ln_ctx ctx, FILE *const fp, const int extendedStats);

#endif /* #ifndef LOGNORM_PTREE_H_INCLUDED */