Blame src/pdag.h

Packit 1422b7
/**
Packit 1422b7
 * @file pdag.h
Packit 1422b7
 * @brief The parse DAG object.
Packit 1422b7
 * @class ln_pdag pdag.h
Packit 1422b7
 *//*
Packit 1422b7
 * Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
Packit 1422b7
 *
Packit 1422b7
 * Released under ASL 2.0.
Packit 1422b7
 */
Packit 1422b7
#ifndef LIBLOGNORM_PDAG_H_INCLUDED
Packit 1422b7
#define	LIBLOGNORM_PDAG_H_INCLUDED
Packit 1422b7
#include <stdio.h>
Packit 1422b7
#include <libestr.h>
Packit 1422b7
#include <stdint.h>
Packit 1422b7
Packit 1422b7
#define META_KEY "metadata"
Packit 1422b7
#define ORIGINAL_MSG_KEY "originalmsg"
Packit 1422b7
#define UNPARSED_DATA_KEY "unparsed-data"
Packit 1422b7
#define EXEC_PATH_KEY "exec-path"
Packit 1422b7
#define META_RULE_KEY "rule"
Packit 1422b7
#define RULE_MOCKUP_KEY "mockup"
Packit 1422b7
#define RULE_LOCATION_KEY "location"
Packit 1422b7
Packit 1422b7
typedef struct ln_pdag ln_pdag; /**< the parse DAG object */
Packit 1422b7
typedef struct ln_parser_s ln_parser_t;
Packit 1422b7
typedef struct npb npb_t;
Packit 1422b7
typedef uint8_t prsid_t;
Packit 1422b7
Packit 1422b7
struct ln_type_pdag;
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * parser IDs.
Packit 1422b7
 *
Packit 1422b7
 * These identfy a parser. VERY IMPORTANT: they must start at zero
Packit 1422b7
 * and continously increment. They must exactly match the index
Packit 1422b7
 * of the respective parser inside the parser lookup table.
Packit 1422b7
 */
Packit 1422b7
#define PRS_LITERAL			0
Packit 1422b7
#define PRS_REPEAT			1
Packit 1422b7
#if 0
Packit 1422b7
#define PRS_DATE_RFC3164		1
Packit 1422b7
#define PRS_DATE_RFC5424		2
Packit 1422b7
#define PRS_NUMBER			3
Packit 1422b7
#define PRS_FLOAT			4
Packit 1422b7
#define PRS_HEXNUMBER			5
Packit 1422b7
#define PRS_KERNEL_TIMESTAMP		6
Packit 1422b7
#define PRS_WHITESPACE			7
Packit 1422b7
#define PRS_IPV4			8
Packit 1422b7
#define PRS_IPV6			9
Packit 1422b7
#define PRS_WORD			10
Packit 1422b7
#define PRS_ALPHA			11
Packit 1422b7
#define PRS_REST			12
Packit 1422b7
#define PRS_OP_QUOTED_STRING		13
Packit 1422b7
#define PRS_QUOTED_STRING		14
Packit 1422b7
#define PRS_DATE_ISO			15
Packit 1422b7
#define PRS_TIME_24HR			16
Packit 1422b7
#define PRS_TIME_12HR			17
Packit 1422b7
#define PRS_DURATION			18
Packit 1422b7
#define PRS_CISCO_INTERFACE_SPEC	19
Packit 1422b7
#define PRS_NAME_VALUE_LIST		20
Packit 1422b7
#define PRS_JSON			21
Packit 1422b7
#define PRS_CEE_SYSLOG			22
Packit 1422b7
#define PRS_MAC48			23
Packit 1422b7
#define PRS_CEF				24
Packit 1422b7
#define PRS_CHECKPOINT_LEA		25
Packit 1422b7
#define PRS_v2_IPTABLES			26
Packit 1422b7
#define PRS_STRING_TO			27
Packit 1422b7
#define PRS_CHAR_TO			28
Packit 1422b7
#define PRS_CHAR_SEP			29
Packit 1422b7
#endif
Packit 1422b7
Packit 1422b7
#define PRS_CUSTOM_TYPE			254
Packit 1422b7
#define PRS_INVALID			255
Packit 1422b7
/* NOTE: current max limit on parser ID is 255, because we use uint8_t
Packit 1422b7
 * for the prsid_t type (which gains cache performance). If more parsers
Packit 1422b7
 * come up, the type must be modified.
Packit 1422b7
 */
Packit 1422b7
/**
Packit 1422b7
 * object describing a specific parser instance.
Packit 1422b7
 */
Packit 1422b7
struct ln_parser_s {
Packit 1422b7
	prsid_t prsid;		/**< parser ID (for lookup table) */
Packit 1422b7
	ln_pdag *node;		/**< node to branch to if parser succeeded */
Packit 1422b7
	void *parser_data;	/**< opaque data that the field-parser understands */
Packit 1422b7
	struct ln_type_pdag *custType;	/**< points to custom type, if such is used */
Packit 1422b7
	int prio;		/**< priority (combination of user- and parser-specific parts) */
Packit 1422b7
	const char *name;	/**< field name */
Packit 1422b7
	const char *conf;	/**< configuration as printable json for comparison reasons */
Packit 1422b7
};
Packit 1422b7
Packit 1422b7
struct ln_parser_info {
Packit 1422b7
	const char *name;	/**< parser name as used in rule base */
Packit 1422b7
	int prio;		/**< parser specific prio in range 0..255 */
Packit 1422b7
	int (*construct)(ln_ctx ctx, json_object *const json, void **);
Packit 1422b7
	int (*parser)(npb_t *npb, size_t*, void *const,
Packit 1422b7
				  size_t*, struct json_object **); /**< parser to use */
Packit 1422b7
	void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */
Packit 1422b7
#ifdef ADVANCED_STATS
Packit 1422b7
	uint64_t called;
Packit 1422b7
	uint64_t success;
Packit 1422b7
#endif
Packit 1422b7
};
Packit 1422b7
Packit 1422b7
Packit 1422b7
/* parse DAG object
Packit 1422b7
 */
Packit 1422b7
struct ln_pdag {
Packit 1422b7
	ln_ctx ctx;			/**< our context */ // TODO: why do we need it?
Packit 1422b7
	ln_parser_t *parsers;		/* array of parsers to try */
Packit 1422b7
	prsid_t nparsers;		/**< current table size (prsid_t slighly abused) */
Packit 1422b7
	struct {
Packit 1422b7
		unsigned isTerminal:1;	/**< designates this node a terminal sequence */
Packit 1422b7
		unsigned visited:1;	/**< work var for recursive procedures */
Packit 1422b7
	} flags;
Packit 1422b7
	struct json_object *tags;	/**< tags to assign to events of this type */
Packit 1422b7
	int refcnt;			/**< reference count for deleting tracking */
Packit 1422b7
	struct {
Packit 1422b7
		unsigned called;
Packit 1422b7
		unsigned backtracked;	/**< incremented when backtracking was initiated */
Packit 1422b7
		unsigned terminated;
Packit 1422b7
	} stats;	/**< usage statistics */
Packit 1422b7
	const char *rb_id;		/**< human-readable rulebase identifier, for stats etc */
Packit 1422b7
	
Packit 1422b7
	// experimental, move outside later
Packit 1422b7
	const char *rb_file;
Packit 1422b7
	unsigned int rb_lineno;
Packit 1422b7
};
Packit 1422b7
Packit 1422b7
#ifdef ADVANCED_STATS
Packit 1422b7
struct advstats {
Packit 1422b7
	int pathlen;
Packit 1422b7
	int parser_calls;		/**< parser calls in general during path */
Packit 1422b7
	int lit_parser_calls;		/**< same just for the literal parser */
Packit 1422b7
	int backtracked;
Packit 1422b7
	int recursion_level;
Packit 1422b7
	es_str_t *exec_path;
Packit 1422b7
};
Packit 1422b7
#define ADVSTATS_MAX_ENTITIES 100
Packit 1422b7
extern int advstats_max_pathlen;
Packit 1422b7
extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES];
Packit 1422b7
extern int advstats_max_backtracked;
Packit 1422b7
extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES];
Packit 1422b7
#endif
Packit 1422b7
Packit 1422b7
/** the "normalization paramater block" (npb)
Packit 1422b7
 * This structure is passed to all normalization routines including
Packit 1422b7
 * parsers. It contains data that commonly needs to be passed,
Packit 1422b7
 * like the to be parsed string and its length, as well as read/write
Packit 1422b7
 * data which is used to track information over the general
Packit 1422b7
 * normalization process (like the execution path, if requested).
Packit 1422b7
 * The main purpose is to save stack writes by eliminating the
Packit 1422b7
 * need for using multiple function parameters. Note that it
Packit 1422b7
 * must be carefully considered which items to add to the
Packit 1422b7
 * npb - those that change from recursion level to recursion
Packit 1422b7
 * level are NOT to be placed here.
Packit 1422b7
 */
Packit 1422b7
struct npb {
Packit 1422b7
	ln_ctx ctx;
Packit 1422b7
	const char *str;		/**< to-be-normalized message */
Packit 1422b7
	size_t strLen;			/**< length of it */
Packit 1422b7
	size_t parsedTo;		/**< up to which byte could this be parsed? */
Packit 1422b7
	es_str_t *rule;			/**< a mock-up of the rule used to parse */
Packit 1422b7
	es_str_t *exec_path;
Packit 1422b7
#ifdef ADVANCED_STATS
Packit 1422b7
	int pathlen;
Packit 1422b7
	int backtracked;
Packit 1422b7
	int recursion_level;
Packit 1422b7
	struct advstats astats;
Packit 1422b7
#endif
Packit 1422b7
};
Packit 1422b7
Packit 1422b7
/* Methods */
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Allocates and initializes a new parse DAG node.
Packit 1422b7
 * @memberof ln_pdag
Packit 1422b7
 *
Packit 1422b7
 * @param[in] ctx current library context. This MUST match the
Packit 1422b7
 * 		context of the parent.
Packit 1422b7
 * @param[in] parent pointer to the new node inside the parent
Packit 1422b7
 *
Packit 1422b7
 * @return pointer to new node or NULL on error
Packit 1422b7
 */
Packit 1422b7
struct ln_pdag* ln_newPDAG(ln_ctx ctx);
Packit 1422b7
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Free a parse DAG and destruct all members.
Packit 1422b7
 * @memberof ln_pdag
Packit 1422b7
 *
Packit 1422b7
 * @param[in] DAG pointer to pdag to free
Packit 1422b7
 */
Packit 1422b7
void ln_pdagDelete(struct ln_pdag *DAG);
Packit 1422b7
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Add parser to dag node.
Packit 1422b7
 * Works on unoptimzed dag.
Packit 1422b7
 *
Packit 1422b7
 * @param[in] pdag pointer to pdag to modify
Packit 1422b7
 * @param[in] parser parser definition
Packit 1422b7
 * @returns 0 on success, something else otherwise
Packit 1422b7
 */
Packit 1422b7
int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *);
Packit 1422b7
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Display the content of a pdag (debug function).
Packit 1422b7
 * This is a debug aid that spits out a textual representation
Packit 1422b7
 * of the provided pdag via multiple calls of the debug callback.
Packit 1422b7
 *
Packit 1422b7
 * @param DAG pdag to display
Packit 1422b7
 */
Packit 1422b7
void ln_displayPDAG(ln_ctx ctx);
Packit 1422b7
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Generate a DOT graph.
Packit 1422b7
 * Well, actually it does not generate the graph itself, but a
Packit 1422b7
 * control file that is suitable for the GNU DOT tool. Such a file
Packit 1422b7
 * can be very useful to understand complex sample databases
Packit 1422b7
 * (not to mention that it is probably fun for those creating
Packit 1422b7
 * samples).
Packit 1422b7
 * The dot commands are appended to the provided string.
Packit 1422b7
 *
Packit 1422b7
 * @param[in] DAG pdag to display
Packit 1422b7
 * @param[out] str string which receives the DOT commands.
Packit 1422b7
 */
Packit 1422b7
void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str);
Packit 1422b7
Packit 1422b7
Packit 1422b7
/**
Packit 1422b7
 * Build a pdag based on the provided string, but only if necessary.
Packit 1422b7
 * The passed-in DAG is searched and traversed for str. If a node exactly
Packit 1422b7
 * matching str is found, that node is returned. If no exact match is found,
Packit 1422b7
 * a new node is added. Existing nodes may be split, if a so-far common
Packit 1422b7
 * prefix needs to be split in order to add the new node.
Packit 1422b7
 *
Packit 1422b7
 * @param[in] DAG root of the current DAG
Packit 1422b7
 * @param[in] str string to be added
Packit 1422b7
 * @param[in] offs offset into str where match needs to start
Packit 1422b7
 *             (this is required for recursive calls to handle
Packit 1422b7
 *             common prefixes)
Packit 1422b7
 * @return NULL on error, otherwise the pdag leaf that
Packit 1422b7
 *         corresponds to the parameters passed.
Packit 1422b7
 */
Packit 1422b7
struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs);
Packit 1422b7
Packit 1422b7
Packit 1422b7
prsid_t ln_parserName2ID(const char *const __restrict__ name);
Packit 1422b7
int ln_pdagOptimize(ln_ctx ctx);
Packit 1422b7
void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int);
Packit 1422b7
ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit);
Packit 1422b7
ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf);
Packit 1422b7
struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd);
Packit 1422b7
void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp);
Packit 1422b7
Packit 1422b7
/* friends */
Packit 1422b7
int
Packit 1422b7
ln_normalizeRec(npb_t *const __restrict__ npb,
Packit 1422b7
	struct ln_pdag *dag,
Packit 1422b7
	const size_t offs,
Packit 1422b7
	const int bPartialMatch,
Packit 1422b7
	struct json_object *json,
Packit 1422b7
	struct ln_pdag **endNode
Packit 1422b7
);
Packit 1422b7
Packit 1422b7
#endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */