/** * @file pdag.h * @brief The parse DAG object. * @class ln_pdag pdag.h *//* * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_PDAG_H_INCLUDED #define LIBLOGNORM_PDAG_H_INCLUDED #include #include #include #define META_KEY "metadata" #define ORIGINAL_MSG_KEY "originalmsg" #define UNPARSED_DATA_KEY "unparsed-data" #define EXEC_PATH_KEY "exec-path" #define META_RULE_KEY "rule" #define RULE_MOCKUP_KEY "mockup" #define RULE_LOCATION_KEY "location" typedef struct ln_pdag ln_pdag; /**< the parse DAG object */ typedef struct ln_parser_s ln_parser_t; typedef struct npb npb_t; typedef uint8_t prsid_t; struct ln_type_pdag; /** * parser IDs. * * These identfy a parser. VERY IMPORTANT: they must start at zero * and continously increment. They must exactly match the index * of the respective parser inside the parser lookup table. */ #define PRS_LITERAL 0 #define PRS_REPEAT 1 #if 0 #define PRS_DATE_RFC3164 1 #define PRS_DATE_RFC5424 2 #define PRS_NUMBER 3 #define PRS_FLOAT 4 #define PRS_HEXNUMBER 5 #define PRS_KERNEL_TIMESTAMP 6 #define PRS_WHITESPACE 7 #define PRS_IPV4 8 #define PRS_IPV6 9 #define PRS_WORD 10 #define PRS_ALPHA 11 #define PRS_REST 12 #define PRS_OP_QUOTED_STRING 13 #define PRS_QUOTED_STRING 14 #define PRS_DATE_ISO 15 #define PRS_TIME_24HR 16 #define PRS_TIME_12HR 17 #define PRS_DURATION 18 #define PRS_CISCO_INTERFACE_SPEC 19 #define PRS_NAME_VALUE_LIST 20 #define PRS_JSON 21 #define PRS_CEE_SYSLOG 22 #define PRS_MAC48 23 #define PRS_CEF 24 #define PRS_CHECKPOINT_LEA 25 #define PRS_v2_IPTABLES 26 #define PRS_STRING_TO 27 #define PRS_CHAR_TO 28 #define PRS_CHAR_SEP 29 #endif #define PRS_CUSTOM_TYPE 254 #define PRS_INVALID 255 /* NOTE: current max limit on parser ID is 255, because we use uint8_t * for the prsid_t type (which gains cache performance). If more parsers * come up, the type must be modified. */ /** * object describing a specific parser instance. */ struct ln_parser_s { prsid_t prsid; /**< parser ID (for lookup table) */ ln_pdag *node; /**< node to branch to if parser succeeded */ void *parser_data; /**< opaque data that the field-parser understands */ struct ln_type_pdag *custType; /**< points to custom type, if such is used */ int prio; /**< priority (combination of user- and parser-specific parts) */ const char *name; /**< field name */ const char *conf; /**< configuration as printable json for comparison reasons */ }; struct ln_parser_info { const char *name; /**< parser name as used in rule base */ int prio; /**< parser specific prio in range 0..255 */ int (*construct)(ln_ctx ctx, json_object *const json, void **); int (*parser)(npb_t *npb, size_t*, void *const, size_t*, struct json_object **); /**< parser to use */ void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */ #ifdef ADVANCED_STATS uint64_t called; uint64_t success; #endif }; /* parse DAG object */ struct ln_pdag { ln_ctx ctx; /**< our context */ // TODO: why do we need it? ln_parser_t *parsers; /* array of parsers to try */ prsid_t nparsers; /**< current table size (prsid_t slighly abused) */ struct { unsigned isTerminal:1; /**< designates this node a terminal sequence */ unsigned visited:1; /**< work var for recursive procedures */ } flags; struct json_object *tags; /**< tags to assign to events of this type */ int refcnt; /**< reference count for deleting tracking */ struct { unsigned called; unsigned backtracked; /**< incremented when backtracking was initiated */ unsigned terminated; } stats; /**< usage statistics */ const char *rb_id; /**< human-readable rulebase identifier, for stats etc */ // experimental, move outside later const char *rb_file; unsigned int rb_lineno; }; #ifdef ADVANCED_STATS struct advstats { int pathlen; int parser_calls; /**< parser calls in general during path */ int lit_parser_calls; /**< same just for the literal parser */ int backtracked; int recursion_level; es_str_t *exec_path; }; #define ADVSTATS_MAX_ENTITIES 100 extern int advstats_max_pathlen; extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES]; extern int advstats_max_backtracked; extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES]; #endif /** the "normalization paramater block" (npb) * This structure is passed to all normalization routines including * parsers. It contains data that commonly needs to be passed, * like the to be parsed string and its length, as well as read/write * data which is used to track information over the general * normalization process (like the execution path, if requested). * The main purpose is to save stack writes by eliminating the * need for using multiple function parameters. Note that it * must be carefully considered which items to add to the * npb - those that change from recursion level to recursion * level are NOT to be placed here. */ struct npb { ln_ctx ctx; const char *str; /**< to-be-normalized message */ size_t strLen; /**< length of it */ size_t parsedTo; /**< up to which byte could this be parsed? */ es_str_t *rule; /**< a mock-up of the rule used to parse */ es_str_t *exec_path; #ifdef ADVANCED_STATS int pathlen; int backtracked; int recursion_level; struct advstats astats; #endif }; /* Methods */ /** * Allocates and initializes a new parse DAG node. * @memberof ln_pdag * * @param[in] ctx current library context. This MUST match the * context of the parent. * @param[in] parent pointer to the new node inside the parent * * @return pointer to new node or NULL on error */ struct ln_pdag* ln_newPDAG(ln_ctx ctx); /** * Free a parse DAG and destruct all members. * @memberof ln_pdag * * @param[in] DAG pointer to pdag to free */ void ln_pdagDelete(struct ln_pdag *DAG); /** * Add parser to dag node. * Works on unoptimzed dag. * * @param[in] pdag pointer to pdag to modify * @param[in] parser parser definition * @returns 0 on success, something else otherwise */ int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *); /** * Display the content of a pdag (debug function). * This is a debug aid that spits out a textual representation * of the provided pdag via multiple calls of the debug callback. * * @param DAG pdag to display */ void ln_displayPDAG(ln_ctx ctx); /** * Generate a DOT graph. * Well, actually it does not generate the graph itself, but a * control file that is suitable for the GNU DOT tool. Such a file * can be very useful to understand complex sample databases * (not to mention that it is probably fun for those creating * samples). * The dot commands are appended to the provided string. * * @param[in] DAG pdag to display * @param[out] str string which receives the DOT commands. */ void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str); /** * Build a pdag based on the provided string, but only if necessary. * The passed-in DAG is searched and traversed for str. If a node exactly * matching str is found, that node is returned. If no exact match is found, * a new node is added. Existing nodes may be split, if a so-far common * prefix needs to be split in order to add the new node. * * @param[in] DAG root of the current DAG * @param[in] str string to be added * @param[in] offs offset into str where match needs to start * (this is required for recursive calls to handle * common prefixes) * @return NULL on error, otherwise the pdag leaf that * corresponds to the parameters passed. */ struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs); prsid_t ln_parserName2ID(const char *const __restrict__ name); int ln_pdagOptimize(ln_ctx ctx); void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int); ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit); ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf); struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd); void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp); /* friends */ int ln_normalizeRec(npb_t *const __restrict__ npb, struct ln_pdag *dag, const size_t offs, const int bPartialMatch, struct json_object *json, struct ln_pdag **endNode ); #endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */