Blob Blame History Raw
/**
 * @file pdag.h
 * @brief The parse DAG object.
 * @class ln_pdag pdag.h
 *//*
 * Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
 *
 * Released under ASL 2.0.
 */
#ifndef LIBLOGNORM_PDAG_H_INCLUDED
#define	LIBLOGNORM_PDAG_H_INCLUDED
#include <stdio.h>
#include <libestr.h>
#include <stdint.h>

#define META_KEY "metadata"
#define ORIGINAL_MSG_KEY "originalmsg"
#define UNPARSED_DATA_KEY "unparsed-data"
#define EXEC_PATH_KEY "exec-path"
#define META_RULE_KEY "rule"
#define RULE_MOCKUP_KEY "mockup"
#define RULE_LOCATION_KEY "location"

typedef struct ln_pdag ln_pdag; /**< the parse DAG object */
typedef struct ln_parser_s ln_parser_t;
typedef struct npb npb_t;
typedef uint8_t prsid_t;

struct ln_type_pdag;

/**
 * parser IDs.
 *
 * These identfy a parser. VERY IMPORTANT: they must start at zero
 * and continously increment. They must exactly match the index
 * of the respective parser inside the parser lookup table.
 */
#define PRS_LITERAL			0
#define PRS_REPEAT			1
#if 0
#define PRS_DATE_RFC3164		1
#define PRS_DATE_RFC5424		2
#define PRS_NUMBER			3
#define PRS_FLOAT			4
#define PRS_HEXNUMBER			5
#define PRS_KERNEL_TIMESTAMP		6
#define PRS_WHITESPACE			7
#define PRS_IPV4			8
#define PRS_IPV6			9
#define PRS_WORD			10
#define PRS_ALPHA			11
#define PRS_REST			12
#define PRS_OP_QUOTED_STRING		13
#define PRS_QUOTED_STRING		14
#define PRS_DATE_ISO			15
#define PRS_TIME_24HR			16
#define PRS_TIME_12HR			17
#define PRS_DURATION			18
#define PRS_CISCO_INTERFACE_SPEC	19
#define PRS_NAME_VALUE_LIST		20
#define PRS_JSON			21
#define PRS_CEE_SYSLOG			22
#define PRS_MAC48			23
#define PRS_CEF				24
#define PRS_CHECKPOINT_LEA		25
#define PRS_v2_IPTABLES			26
#define PRS_STRING_TO			27
#define PRS_CHAR_TO			28
#define PRS_CHAR_SEP			29
#endif

#define PRS_CUSTOM_TYPE			254
#define PRS_INVALID			255
/* NOTE: current max limit on parser ID is 255, because we use uint8_t
 * for the prsid_t type (which gains cache performance). If more parsers
 * come up, the type must be modified.
 */
/**
 * object describing a specific parser instance.
 */
struct ln_parser_s {
	prsid_t prsid;		/**< parser ID (for lookup table) */
	ln_pdag *node;		/**< node to branch to if parser succeeded */
	void *parser_data;	/**< opaque data that the field-parser understands */
	struct ln_type_pdag *custType;	/**< points to custom type, if such is used */
	int prio;		/**< priority (combination of user- and parser-specific parts) */
	const char *name;	/**< field name */
	const char *conf;	/**< configuration as printable json for comparison reasons */
};

struct ln_parser_info {
	const char *name;	/**< parser name as used in rule base */
	int prio;		/**< parser specific prio in range 0..255 */
	int (*construct)(ln_ctx ctx, json_object *const json, void **);
	int (*parser)(npb_t *npb, size_t*, void *const,
				  size_t*, struct json_object **); /**< parser to use */
	void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */
#ifdef ADVANCED_STATS
	uint64_t called;
	uint64_t success;
#endif
};


/* parse DAG object
 */
struct ln_pdag {
	ln_ctx ctx;			/**< our context */ // TODO: why do we need it?
	ln_parser_t *parsers;		/* array of parsers to try */
	prsid_t nparsers;		/**< current table size (prsid_t slighly abused) */
	struct {
		unsigned isTerminal:1;	/**< designates this node a terminal sequence */
		unsigned visited:1;	/**< work var for recursive procedures */
	} flags;
	struct json_object *tags;	/**< tags to assign to events of this type */
	int refcnt;			/**< reference count for deleting tracking */
	struct {
		unsigned called;
		unsigned backtracked;	/**< incremented when backtracking was initiated */
		unsigned terminated;
	} stats;	/**< usage statistics */
	const char *rb_id;		/**< human-readable rulebase identifier, for stats etc */
	
	// experimental, move outside later
	const char *rb_file;
	unsigned int rb_lineno;
};

#ifdef ADVANCED_STATS
struct advstats {
	int pathlen;
	int parser_calls;		/**< parser calls in general during path */
	int lit_parser_calls;		/**< same just for the literal parser */
	int backtracked;
	int recursion_level;
	es_str_t *exec_path;
};
#define ADVSTATS_MAX_ENTITIES 100
extern int advstats_max_pathlen;
extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES];
extern int advstats_max_backtracked;
extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES];
#endif

/** the "normalization paramater block" (npb)
 * This structure is passed to all normalization routines including
 * parsers. It contains data that commonly needs to be passed,
 * like the to be parsed string and its length, as well as read/write
 * data which is used to track information over the general
 * normalization process (like the execution path, if requested).
 * The main purpose is to save stack writes by eliminating the
 * need for using multiple function parameters. Note that it
 * must be carefully considered which items to add to the
 * npb - those that change from recursion level to recursion
 * level are NOT to be placed here.
 */
struct npb {
	ln_ctx ctx;
	const char *str;		/**< to-be-normalized message */
	size_t strLen;			/**< length of it */
	size_t parsedTo;		/**< up to which byte could this be parsed? */
	es_str_t *rule;			/**< a mock-up of the rule used to parse */
	es_str_t *exec_path;
#ifdef ADVANCED_STATS
	int pathlen;
	int backtracked;
	int recursion_level;
	struct advstats astats;
#endif
};

/* Methods */

/**
 * Allocates and initializes a new parse DAG node.
 * @memberof ln_pdag
 *
 * @param[in] ctx current library context. This MUST match the
 * 		context of the parent.
 * @param[in] parent pointer to the new node inside the parent
 *
 * @return pointer to new node or NULL on error
 */
struct ln_pdag* ln_newPDAG(ln_ctx ctx);


/**
 * Free a parse DAG and destruct all members.
 * @memberof ln_pdag
 *
 * @param[in] DAG pointer to pdag to free
 */
void ln_pdagDelete(struct ln_pdag *DAG);


/**
 * Add parser to dag node.
 * Works on unoptimzed dag.
 *
 * @param[in] pdag pointer to pdag to modify
 * @param[in] parser parser definition
 * @returns 0 on success, something else otherwise
 */
int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *);


/**
 * Display the content of a pdag (debug function).
 * This is a debug aid that spits out a textual representation
 * of the provided pdag via multiple calls of the debug callback.
 *
 * @param DAG pdag to display
 */
void ln_displayPDAG(ln_ctx ctx);


/**
 * Generate a DOT graph.
 * Well, actually it does not generate the graph itself, but a
 * control file that is suitable for the GNU DOT tool. Such a file
 * can be very useful to understand complex sample databases
 * (not to mention that it is probably fun for those creating
 * samples).
 * The dot commands are appended to the provided string.
 *
 * @param[in] DAG pdag to display
 * @param[out] str string which receives the DOT commands.
 */
void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str);


/**
 * Build a pdag based on the provided string, but only if necessary.
 * The passed-in DAG is searched and traversed for str. If a node exactly
 * matching str is found, that node is returned. If no exact match is found,
 * a new node is added. Existing nodes may be split, if a so-far common
 * prefix needs to be split in order to add the new node.
 *
 * @param[in] DAG root of the current DAG
 * @param[in] str string to be added
 * @param[in] offs offset into str where match needs to start
 *             (this is required for recursive calls to handle
 *             common prefixes)
 * @return NULL on error, otherwise the pdag leaf that
 *         corresponds to the parameters passed.
 */
struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs);


prsid_t ln_parserName2ID(const char *const __restrict__ name);
int ln_pdagOptimize(ln_ctx ctx);
void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int);
ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit);
ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf);
struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd);
void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp);

/* friends */
int
ln_normalizeRec(npb_t *const __restrict__ npb,
	struct ln_pdag *dag,
	const size_t offs,
	const int bPartialMatch,
	struct json_object *json,
	struct ln_pdag **endNode
);

#endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */