|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* @file pdag.h
|
|
Packit |
1422b7 |
* @brief The parse DAG object.
|
|
Packit |
1422b7 |
* @class ln_pdag pdag.h
|
|
Packit |
1422b7 |
*//*
|
|
Packit |
1422b7 |
* Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* Released under ASL 2.0.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
#ifndef LIBLOGNORM_PDAG_H_INCLUDED
|
|
Packit |
1422b7 |
#define LIBLOGNORM_PDAG_H_INCLUDED
|
|
Packit |
1422b7 |
#include <stdio.h>
|
|
Packit |
1422b7 |
#include <libestr.h>
|
|
Packit |
1422b7 |
#include <stdint.h>
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
#define META_KEY "metadata"
|
|
Packit |
1422b7 |
#define ORIGINAL_MSG_KEY "originalmsg"
|
|
Packit |
1422b7 |
#define UNPARSED_DATA_KEY "unparsed-data"
|
|
Packit |
1422b7 |
#define EXEC_PATH_KEY "exec-path"
|
|
Packit |
1422b7 |
#define META_RULE_KEY "rule"
|
|
Packit |
1422b7 |
#define RULE_MOCKUP_KEY "mockup"
|
|
Packit |
1422b7 |
#define RULE_LOCATION_KEY "location"
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
typedef struct ln_pdag ln_pdag; /**< the parse DAG object */
|
|
Packit |
1422b7 |
typedef struct ln_parser_s ln_parser_t;
|
|
Packit |
1422b7 |
typedef struct npb npb_t;
|
|
Packit |
1422b7 |
typedef uint8_t prsid_t;
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
struct ln_type_pdag;
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* parser IDs.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* These identfy a parser. VERY IMPORTANT: they must start at zero
|
|
Packit |
1422b7 |
* and continously increment. They must exactly match the index
|
|
Packit |
1422b7 |
* of the respective parser inside the parser lookup table.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
#define PRS_LITERAL 0
|
|
Packit |
1422b7 |
#define PRS_REPEAT 1
|
|
Packit |
1422b7 |
#if 0
|
|
Packit |
1422b7 |
#define PRS_DATE_RFC3164 1
|
|
Packit |
1422b7 |
#define PRS_DATE_RFC5424 2
|
|
Packit |
1422b7 |
#define PRS_NUMBER 3
|
|
Packit |
1422b7 |
#define PRS_FLOAT 4
|
|
Packit |
1422b7 |
#define PRS_HEXNUMBER 5
|
|
Packit |
1422b7 |
#define PRS_KERNEL_TIMESTAMP 6
|
|
Packit |
1422b7 |
#define PRS_WHITESPACE 7
|
|
Packit |
1422b7 |
#define PRS_IPV4 8
|
|
Packit |
1422b7 |
#define PRS_IPV6 9
|
|
Packit |
1422b7 |
#define PRS_WORD 10
|
|
Packit |
1422b7 |
#define PRS_ALPHA 11
|
|
Packit |
1422b7 |
#define PRS_REST 12
|
|
Packit |
1422b7 |
#define PRS_OP_QUOTED_STRING 13
|
|
Packit |
1422b7 |
#define PRS_QUOTED_STRING 14
|
|
Packit |
1422b7 |
#define PRS_DATE_ISO 15
|
|
Packit |
1422b7 |
#define PRS_TIME_24HR 16
|
|
Packit |
1422b7 |
#define PRS_TIME_12HR 17
|
|
Packit |
1422b7 |
#define PRS_DURATION 18
|
|
Packit |
1422b7 |
#define PRS_CISCO_INTERFACE_SPEC 19
|
|
Packit |
1422b7 |
#define PRS_NAME_VALUE_LIST 20
|
|
Packit |
1422b7 |
#define PRS_JSON 21
|
|
Packit |
1422b7 |
#define PRS_CEE_SYSLOG 22
|
|
Packit |
1422b7 |
#define PRS_MAC48 23
|
|
Packit |
1422b7 |
#define PRS_CEF 24
|
|
Packit |
1422b7 |
#define PRS_CHECKPOINT_LEA 25
|
|
Packit |
1422b7 |
#define PRS_v2_IPTABLES 26
|
|
Packit |
1422b7 |
#define PRS_STRING_TO 27
|
|
Packit |
1422b7 |
#define PRS_CHAR_TO 28
|
|
Packit |
1422b7 |
#define PRS_CHAR_SEP 29
|
|
Packit |
1422b7 |
#endif
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
#define PRS_CUSTOM_TYPE 254
|
|
Packit |
1422b7 |
#define PRS_INVALID 255
|
|
Packit |
1422b7 |
/* NOTE: current max limit on parser ID is 255, because we use uint8_t
|
|
Packit |
1422b7 |
* for the prsid_t type (which gains cache performance). If more parsers
|
|
Packit |
1422b7 |
* come up, the type must be modified.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* object describing a specific parser instance.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
struct ln_parser_s {
|
|
Packit |
1422b7 |
prsid_t prsid; /**< parser ID (for lookup table) */
|
|
Packit |
1422b7 |
ln_pdag *node; /**< node to branch to if parser succeeded */
|
|
Packit |
1422b7 |
void *parser_data; /**< opaque data that the field-parser understands */
|
|
Packit |
1422b7 |
struct ln_type_pdag *custType; /**< points to custom type, if such is used */
|
|
Packit |
1422b7 |
int prio; /**< priority (combination of user- and parser-specific parts) */
|
|
Packit |
1422b7 |
const char *name; /**< field name */
|
|
Packit |
1422b7 |
const char *conf; /**< configuration as printable json for comparison reasons */
|
|
Packit |
1422b7 |
};
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
struct ln_parser_info {
|
|
Packit |
1422b7 |
const char *name; /**< parser name as used in rule base */
|
|
Packit |
1422b7 |
int prio; /**< parser specific prio in range 0..255 */
|
|
Packit |
1422b7 |
int (*construct)(ln_ctx ctx, json_object *const json, void **);
|
|
Packit |
1422b7 |
int (*parser)(npb_t *npb, size_t*, void *const,
|
|
Packit |
1422b7 |
size_t*, struct json_object **); /**< parser to use */
|
|
Packit |
1422b7 |
void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */
|
|
Packit |
1422b7 |
#ifdef ADVANCED_STATS
|
|
Packit |
1422b7 |
uint64_t called;
|
|
Packit |
1422b7 |
uint64_t success;
|
|
Packit |
1422b7 |
#endif
|
|
Packit |
1422b7 |
};
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/* parse DAG object
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
struct ln_pdag {
|
|
Packit |
1422b7 |
ln_ctx ctx; /**< our context */ // TODO: why do we need it?
|
|
Packit |
1422b7 |
ln_parser_t *parsers; /* array of parsers to try */
|
|
Packit |
1422b7 |
prsid_t nparsers; /**< current table size (prsid_t slighly abused) */
|
|
Packit |
1422b7 |
struct {
|
|
Packit |
1422b7 |
unsigned isTerminal:1; /**< designates this node a terminal sequence */
|
|
Packit |
1422b7 |
unsigned visited:1; /**< work var for recursive procedures */
|
|
Packit |
1422b7 |
} flags;
|
|
Packit |
1422b7 |
struct json_object *tags; /**< tags to assign to events of this type */
|
|
Packit |
1422b7 |
int refcnt; /**< reference count for deleting tracking */
|
|
Packit |
1422b7 |
struct {
|
|
Packit |
1422b7 |
unsigned called;
|
|
Packit |
1422b7 |
unsigned backtracked; /**< incremented when backtracking was initiated */
|
|
Packit |
1422b7 |
unsigned terminated;
|
|
Packit |
1422b7 |
} stats; /**< usage statistics */
|
|
Packit |
1422b7 |
const char *rb_id; /**< human-readable rulebase identifier, for stats etc */
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
// experimental, move outside later
|
|
Packit |
1422b7 |
const char *rb_file;
|
|
Packit |
1422b7 |
unsigned int rb_lineno;
|
|
Packit |
1422b7 |
};
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
#ifdef ADVANCED_STATS
|
|
Packit |
1422b7 |
struct advstats {
|
|
Packit |
1422b7 |
int pathlen;
|
|
Packit |
1422b7 |
int parser_calls; /**< parser calls in general during path */
|
|
Packit |
1422b7 |
int lit_parser_calls; /**< same just for the literal parser */
|
|
Packit |
1422b7 |
int backtracked;
|
|
Packit |
1422b7 |
int recursion_level;
|
|
Packit |
1422b7 |
es_str_t *exec_path;
|
|
Packit |
1422b7 |
};
|
|
Packit |
1422b7 |
#define ADVSTATS_MAX_ENTITIES 100
|
|
Packit |
1422b7 |
extern int advstats_max_pathlen;
|
|
Packit |
1422b7 |
extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES];
|
|
Packit |
1422b7 |
extern int advstats_max_backtracked;
|
|
Packit |
1422b7 |
extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES];
|
|
Packit |
1422b7 |
#endif
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/** the "normalization paramater block" (npb)
|
|
Packit |
1422b7 |
* This structure is passed to all normalization routines including
|
|
Packit |
1422b7 |
* parsers. It contains data that commonly needs to be passed,
|
|
Packit |
1422b7 |
* like the to be parsed string and its length, as well as read/write
|
|
Packit |
1422b7 |
* data which is used to track information over the general
|
|
Packit |
1422b7 |
* normalization process (like the execution path, if requested).
|
|
Packit |
1422b7 |
* The main purpose is to save stack writes by eliminating the
|
|
Packit |
1422b7 |
* need for using multiple function parameters. Note that it
|
|
Packit |
1422b7 |
* must be carefully considered which items to add to the
|
|
Packit |
1422b7 |
* npb - those that change from recursion level to recursion
|
|
Packit |
1422b7 |
* level are NOT to be placed here.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
struct npb {
|
|
Packit |
1422b7 |
ln_ctx ctx;
|
|
Packit |
1422b7 |
const char *str; /**< to-be-normalized message */
|
|
Packit |
1422b7 |
size_t strLen; /**< length of it */
|
|
Packit |
1422b7 |
size_t parsedTo; /**< up to which byte could this be parsed? */
|
|
Packit |
1422b7 |
es_str_t *rule; /**< a mock-up of the rule used to parse */
|
|
Packit |
1422b7 |
es_str_t *exec_path;
|
|
Packit |
1422b7 |
#ifdef ADVANCED_STATS
|
|
Packit |
1422b7 |
int pathlen;
|
|
Packit |
1422b7 |
int backtracked;
|
|
Packit |
1422b7 |
int recursion_level;
|
|
Packit |
1422b7 |
struct advstats astats;
|
|
Packit |
1422b7 |
#endif
|
|
Packit |
1422b7 |
};
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/* Methods */
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Allocates and initializes a new parse DAG node.
|
|
Packit |
1422b7 |
* @memberof ln_pdag
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param[in] ctx current library context. This MUST match the
|
|
Packit |
1422b7 |
* context of the parent.
|
|
Packit |
1422b7 |
* @param[in] parent pointer to the new node inside the parent
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @return pointer to new node or NULL on error
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
struct ln_pdag* ln_newPDAG(ln_ctx ctx);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Free a parse DAG and destruct all members.
|
|
Packit |
1422b7 |
* @memberof ln_pdag
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param[in] DAG pointer to pdag to free
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
void ln_pdagDelete(struct ln_pdag *DAG);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Add parser to dag node.
|
|
Packit |
1422b7 |
* Works on unoptimzed dag.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param[in] pdag pointer to pdag to modify
|
|
Packit |
1422b7 |
* @param[in] parser parser definition
|
|
Packit |
1422b7 |
* @returns 0 on success, something else otherwise
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Display the content of a pdag (debug function).
|
|
Packit |
1422b7 |
* This is a debug aid that spits out a textual representation
|
|
Packit |
1422b7 |
* of the provided pdag via multiple calls of the debug callback.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param DAG pdag to display
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
void ln_displayPDAG(ln_ctx ctx);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Generate a DOT graph.
|
|
Packit |
1422b7 |
* Well, actually it does not generate the graph itself, but a
|
|
Packit |
1422b7 |
* control file that is suitable for the GNU DOT tool. Such a file
|
|
Packit |
1422b7 |
* can be very useful to understand complex sample databases
|
|
Packit |
1422b7 |
* (not to mention that it is probably fun for those creating
|
|
Packit |
1422b7 |
* samples).
|
|
Packit |
1422b7 |
* The dot commands are appended to the provided string.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param[in] DAG pdag to display
|
|
Packit |
1422b7 |
* @param[out] str string which receives the DOT commands.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/**
|
|
Packit |
1422b7 |
* Build a pdag based on the provided string, but only if necessary.
|
|
Packit |
1422b7 |
* The passed-in DAG is searched and traversed for str. If a node exactly
|
|
Packit |
1422b7 |
* matching str is found, that node is returned. If no exact match is found,
|
|
Packit |
1422b7 |
* a new node is added. Existing nodes may be split, if a so-far common
|
|
Packit |
1422b7 |
* prefix needs to be split in order to add the new node.
|
|
Packit |
1422b7 |
*
|
|
Packit |
1422b7 |
* @param[in] DAG root of the current DAG
|
|
Packit |
1422b7 |
* @param[in] str string to be added
|
|
Packit |
1422b7 |
* @param[in] offs offset into str where match needs to start
|
|
Packit |
1422b7 |
* (this is required for recursive calls to handle
|
|
Packit |
1422b7 |
* common prefixes)
|
|
Packit |
1422b7 |
* @return NULL on error, otherwise the pdag leaf that
|
|
Packit |
1422b7 |
* corresponds to the parameters passed.
|
|
Packit |
1422b7 |
*/
|
|
Packit |
1422b7 |
struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
prsid_t ln_parserName2ID(const char *const __restrict__ name);
|
|
Packit |
1422b7 |
int ln_pdagOptimize(ln_ctx ctx);
|
|
Packit |
1422b7 |
void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int);
|
|
Packit |
1422b7 |
ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit);
|
|
Packit |
1422b7 |
ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf);
|
|
Packit |
1422b7 |
struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd);
|
|
Packit |
1422b7 |
void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
/* friends */
|
|
Packit |
1422b7 |
int
|
|
Packit |
1422b7 |
ln_normalizeRec(npb_t *const __restrict__ npb,
|
|
Packit |
1422b7 |
struct ln_pdag *dag,
|
|
Packit |
1422b7 |
const size_t offs,
|
|
Packit |
1422b7 |
const int bPartialMatch,
|
|
Packit |
1422b7 |
struct json_object *json,
|
|
Packit |
1422b7 |
struct ln_pdag **endNode
|
|
Packit |
1422b7 |
);
|
|
Packit |
1422b7 |
|
|
Packit |
1422b7 |
#endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */
|