Tree - source-git/libseccomp - CentOS Git server

source-git / libseccomp

Files

Commit: 56e23f3f19f941ce5c9fff00921ebb1d2d6cee4a
Blob Blame History Raw
/**
 * Seccomp BPF Translator
 *
 * Copyright (c) 2012 Red Hat <pmoore@redhat.com>
 * Author: Paul Moore <paul@paul-moore.com>
 */

/*
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of version 2.1 of the GNU Lesser General Public License as
 * published by the Free Software Foundation.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library; if not, see <http://www.gnu.org/licenses>.
 */

#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>

#ifndef _BSD_SOURCE
#define _BSD_SOURCE
#endif
#include <endian.h>

#include <seccomp.h>

#include "arch.h"
#include "arch-x32.h"
#include "gen_bpf.h"
#include "db.h"
#include "hash.h"
#include "system.h"
#include "helper.h"

/* allocation increments */
#define AINC_BLK			2
#define AINC_PROG			64

struct acc_state {
	int32_t offset;
	uint32_t mask;
};
#define _ACC_STATE(x,y) \
	(struct acc_state){ .offset = (x), .mask = (y) }
#define _ACC_STATE_OFFSET(x) \
	_ACC_STATE(x,ARG_MASK_MAX)
#define _ACC_STATE_UNDEF \
	_ACC_STATE_OFFSET(-1)
#define _ACC_CMP_EQ(x,y) \
	((x).offset == (y).offset && (x).mask == (y).mask)

enum bpf_jump_type {
	TGT_NONE = 0,
	TGT_K,				/* immediate "k" value */
	TGT_NXT,			/* fall through to the next block */
	TGT_IMM,			/* resolved immediate value */
	TGT_PTR_DB,			/* pointer to part of the filter db */
	TGT_PTR_BLK,			/* pointer to an instruction block */
	TGT_PTR_HSH,			/* pointer to a block hash table */
};

struct bpf_jump {
	union {
		uint8_t imm_j;
		uint32_t imm_k;
		uint64_t hash;
		struct db_arg_chain_tree *db;
		struct bpf_blk *blk;
		unsigned int nxt;
	} tgt;
	enum bpf_jump_type type;
};
#define _BPF_OP(a,x) \
	(_htot16(a,x))
#define _BPF_JMP_NO \
	((struct bpf_jump) { .type = TGT_NONE })
#define _BPF_JMP_NXT(x) \
	((struct bpf_jump) { .type = TGT_NXT, .tgt = { .nxt = (x) } })
#define _BPF_JMP_IMM(x) \
	((struct bpf_jump) { .type = TGT_IMM, .tgt = { .imm_j = (x) } })
#define _BPF_JMP_DB(x) \
	((struct bpf_jump) { .type = TGT_PTR_DB, .tgt = { .db = (x) } })
#define _BPF_JMP_BLK(x) \
	((struct bpf_jump) { .type = TGT_PTR_BLK, .tgt = { .blk = (x) } })
#define _BPF_JMP_HSH(x) \
	((struct bpf_jump) { .type = TGT_PTR_HSH, .tgt = { .hash = (x) } })
#define _BPF_K(a,x) \
	((struct bpf_jump) { .type = TGT_K, .tgt = { .imm_k = _htot32(a,x) } })
#define _BPF_JMP_MAX			255
#define _BPF_JMP_MAX_RET		255

struct bpf_instr {
	uint16_t op;
	struct bpf_jump jt;
	struct bpf_jump jf;
	struct bpf_jump k;
};
#define _BPF_OFFSET_SYSCALL		(offsetof(struct seccomp_data, nr))
#define _BPF_SYSCALL(a)			_BPF_K(a,_BPF_OFFSET_SYSCALL)
#define _BPF_OFFSET_ARCH		(offsetof(struct seccomp_data, arch))
#define _BPF_ARCH(a)			_BPF_K(a,_BPF_OFFSET_ARCH)

struct bpf_blk {
	/* bpf instructions */
	struct bpf_instr *blks;
	unsigned int blk_cnt;
	unsigned int blk_alloc;

	/* accumulator state */
	struct acc_state acc_start;
	struct acc_state acc_end;

	/* priority - higher is better */
	unsigned int priority;

	/* status flags */
	bool flag_hash;			/* added to the hash table */
	bool flag_dup;			/* duplicate block and in use */
	bool flag_unique;		/* ->blks is unique to this block */

	/* original db_arg_chain_tree node */
	const struct db_arg_chain_tree *node;

	/* used during block assembly */
	uint64_t hash;
	struct bpf_blk *hash_nxt;
	struct bpf_blk *prev, *next;
	struct bpf_blk *lvl_prv, *lvl_nxt;
};
#define _BLK_MSZE(x) \
	((x)->blk_cnt * sizeof(*((x)->blks)))

struct bpf_hash_bkt {
	struct bpf_blk *blk;
	struct bpf_hash_bkt *next;
	unsigned int found;
};

#define _BPF_HASH_BITS			8
#define _BPF_HASH_SIZE			(1 << _BPF_HASH_BITS)
#define _BPF_HASH_MASK			(_BPF_HASH_BITS - 1)
struct bpf_state {
	/* block hash table */
	struct bpf_hash_bkt *htbl[_BPF_HASH_SIZE];

	/* filter attributes */
	const struct db_filter_attr *attr;
	/* bad arch action */
	uint64_t bad_arch_hsh;
	/* default action */
	uint64_t def_hsh;

	/* target arch - NOTE: be careful, temporary use only! */
	const struct arch_def *arch;

	/* bpf program */
	struct bpf_program *bpf;
};

/**
 * Populate a BPF instruction
 * @param _ins the BPF instruction
 * @param _op the BPF operand
 * @param _jt the BPF jt value
 * @param _jf the BPF jf value
 * @param _k the BPF k value
 *
 * Set the given values on the provided bpf_instr struct.
 *
 */
#define _BPF_INSTR(_ins,_op,_jt,_jf,_k) \
	do { \
		memset(&(_ins), 0, sizeof(_ins)); \
		(_ins).op = (_op); \
		(_ins).jt = _jt; \
		(_ins).jf = _jf; \
		(_ins).k = _k; \
	} while (0)

static struct bpf_blk *_gen_bpf_chain(struct bpf_state *state,
				      const struct db_sys_list *sys,
				      const struct db_arg_chain_tree *chain,
				      const struct bpf_jump *nxt_jump,
				      struct acc_state *a_state);

static struct bpf_blk *_hsh_remove(struct bpf_state *state, uint64_t h_val);
static struct bpf_blk *_hsh_find(const struct bpf_state *state, uint64_t h_val);

/**
 * Convert a 16-bit host integer into the target's endianess
 * @param arch the architecture definition
 * @param val the 16-bit integer
 *
 * Convert the endianess of the supplied value and return it to the caller.
 *
 */
static uint16_t _htot16(const struct arch_def *arch, uint16_t val)
{
	if (arch->endian == ARCH_ENDIAN_LITTLE)
		return htole16(val);
	else
		return htobe16(val);
}

/**
 * Convert a 32-bit host integer into the target's endianess
 * @param arch the architecture definition
 * @param val the 32-bit integer
 *
 * Convert the endianess of the supplied value and return it to the caller.
 *
 */
static uint32_t _htot32(const struct arch_def *arch, uint32_t val)
{
	if (arch->endian == ARCH_ENDIAN_LITTLE)
		return htole32(val);
	else
		return htobe32(val);
}

/**
 * Free the BPF instruction block
 * @param state the BPF state
 * @param blk the BPF instruction block
 *
 * Free the BPF instruction block, any linked blocks are preserved and the hash
 * table is not modified.  In general, you probably want to use _blk_free()
 * instead.
 *
 */
static void __blk_free(struct bpf_state *state, struct bpf_blk *blk)
{
	struct bpf_blk *b_tmp;

	while (blk->hash_nxt != NULL) {
		b_tmp = blk->hash_nxt;
		blk->hash_nxt = b_tmp->hash_nxt;
		if (!b_tmp->flag_dup)
			free(b_tmp);
	}
	if (blk->blks != NULL && blk->flag_unique)
		free(blk->blks);
	free(blk);
}

/**
* Free the BPF instruction block
 * @param state the BPF state
 * @param blk the BPF instruction block
 *
 * Free the BPF instruction block including any linked blocks.  The hash table
 * is updated to reflect the newly removed block(s).
 *
 */
static void _blk_free(struct bpf_state *state, struct bpf_blk *blk)
{
	int iter;
	struct bpf_blk *b_iter;
	struct bpf_instr *i_iter;

	if (blk == NULL)
		return;

	/* remove this block from the hash table */
	_hsh_remove(state, blk->hash);

	/* run through the block freeing TGT_PTR_{BLK,HSH} jump targets */
	for (iter = 0; iter < blk->blk_cnt; iter++) {
		i_iter = &blk->blks[iter];
		switch (i_iter->jt.type) {
		case TGT_PTR_BLK:
			_blk_free(state, i_iter->jt.tgt.blk);
			break;
		case TGT_PTR_HSH:
			b_iter = _hsh_find(state, i_iter->jt.tgt.hash);
			_blk_free(state, b_iter);
			break;
		default:
			/* do nothing */
			break;
		}
		switch (i_iter->jf.type) {
		case TGT_PTR_BLK:
			_blk_free(state, i_iter->jf.tgt.blk);
			break;
		case TGT_PTR_HSH:
			b_iter = _hsh_find(state, i_iter->jf.tgt.hash);
			_blk_free(state, b_iter);
			break;
		default:
			/* do nothing */
			break;
		}
	}
	__blk_free(state, blk);
}

/**
 * Allocate and initialize a new instruction block
 *
 * Allocate a new BPF instruction block and perform some very basic
 * initialization.  Returns a pointer to the block on success, NULL on failure.
 *
 */
static struct bpf_blk *_blk_alloc(void)
{
	struct bpf_blk *blk;

	blk = zmalloc(sizeof(*blk));
	if (blk == NULL)
		return NULL;
	blk->flag_unique = true;
	blk->acc_start = _ACC_STATE_UNDEF;
	blk->acc_end = _ACC_STATE_UNDEF;

	return blk;
}

/**
 * Resize an instruction block
 * @param state the BPF state
 * @param blk the existing instruction block, or NULL
 * @param size_add the minimum amount of instructions to add
 *
 * Resize the given instruction block such that it is at least as large as the
 * current size plus @size_add.  Returns a pointer to the block on success,
 * NULL on failure.
 *
 */
static struct bpf_blk *_blk_resize(struct bpf_state *state,
				   struct bpf_blk *blk,
				   unsigned int size_add)
{
	unsigned int size_adj = (AINC_BLK > size_add ? AINC_BLK : size_add);
	struct bpf_instr *new;

	if (blk == NULL)
		return NULL;

	if ((blk->blk_cnt + size_adj) <= blk->blk_alloc)
		return blk;

	blk->blk_alloc += size_adj;
	new = realloc(blk->blks, blk->blk_alloc * sizeof(*(blk->blks)));
	if (new == NULL) {
		_blk_free(state, blk);
		return NULL;
	}
	blk->blks = new;

	return blk;
}

/**
 * Append a new BPF instruction to an instruction block
 * @param state the BPF state
 * @param blk the existing instruction block, or NULL
 * @param instr the new instruction
 *
 * Add the new BPF instruction to the end of the given instruction block.  If
 * the given instruction block is NULL, a new block will be allocated.  Returns
 * a pointer to the block on success, NULL on failure, and in the case of
 * failure the instruction block is free'd.
 *
 */
static struct bpf_blk *_blk_append(struct bpf_state *state,
				   struct bpf_blk *blk,
				   const struct bpf_instr *instr)
{
	if (blk == NULL) {
		blk = _blk_alloc();
		if (blk == NULL)
			return NULL;
	}

	if (_blk_resize(state, blk, 1) == NULL)
		return NULL;
	memcpy(&blk->blks[blk->blk_cnt++], instr, sizeof(*instr));

	return blk;
}

/**
 * Prepend a new BPF instruction to an instruction block
 * @param state the BPF state
 * @param blk the existing instruction block, or NULL
 * @param instr the new instruction
 *
 * Add the new BPF instruction to the start of the given instruction block.
 * If the given instruction block is NULL, a new block will be allocated.
 * Returns a pointer to the block on success, NULL on failure, and in the case
 * of failure the instruction block is free'd.
 *
 */
static struct bpf_blk *_blk_prepend(struct bpf_state *state,
				    struct bpf_blk *blk,
				    const struct bpf_instr *instr)
{
	/* empty - we can treat this like a normal append operation */
	if (blk == NULL || blk->blk_cnt == 0)
		return _blk_append(state, blk, instr);

	if (_blk_resize(state, blk, 1) == NULL)
		return NULL;
	memmove(&blk->blks[1], &blk->blks[0], blk->blk_cnt++ * sizeof(*instr));
	memcpy(&blk->blks[0], instr, sizeof(*instr));

	return blk;
}

/**
 * Append a block of BPF instructions to the final BPF program
 * @param prg the BPF program
 * @param blk the BPF instruction block
 *
 * Add the BPF instruction block to the end of the BPF program and perform the
 * necssary translation.  Returns zero on success, negative values on failure
 * and in the case of failure the BPF program is free'd.
 *
 */
static int _bpf_append_blk(struct bpf_program *prg, const struct bpf_blk *blk)
{
	int rc;
	bpf_instr_raw *i_new;
	bpf_instr_raw *i_iter;
	unsigned int old_cnt = prg->blk_cnt;
	unsigned int iter;

	/* (re)allocate the program memory */
	prg->blk_cnt += blk->blk_cnt;
	i_new = realloc(prg->blks, BPF_PGM_SIZE(prg));
	if (i_new == NULL) {
		rc = -ENOMEM;
		goto bpf_append_blk_failure;
	}
	prg->blks = i_new;

	/* transfer and translate the blocks to raw instructions */
	for (iter = 0; iter < blk->blk_cnt; iter++) {
		i_iter = &(prg->blks[old_cnt + iter]);

		i_iter->code = blk->blks[iter].op;
		switch (blk->blks[iter].jt.type) {
		case TGT_NONE:
			i_iter->jt = 0;
			break;
		case TGT_IMM:
			/* jump to the value specified */
			i_iter->jt = blk->blks[iter].jt.tgt.imm_j;
			break;
		default:
			/* fatal error - we should never get here */
			rc = -EFAULT;
			goto bpf_append_blk_failure;
		}
		switch (blk->blks[iter].jf.type) {
		case TGT_NONE:
			i_iter->jf = 0;
			break;
		case TGT_IMM:
			/* jump to the value specified */
			i_iter->jf = blk->blks[iter].jf.tgt.imm_j;
			break;
		default:
			/* fatal error - we should never get here */
			rc = -EFAULT;
			goto bpf_append_blk_failure;
		}
		switch (blk->blks[iter].k.type) {
		case TGT_K:
			i_iter->k = blk->blks[iter].k.tgt.imm_k;
			break;
		default:
			/* fatal error - we should never get here */
			rc = -EFAULT;
			goto bpf_append_blk_failure;
		}
	}

	return prg->blk_cnt;

bpf_append_blk_failure:
	prg->blk_cnt = 0;
	free(prg->blks);
	return rc;
}

/**
 * Free the BPF program
 * @param prg the BPF program
 *
 * Free the BPF program.  None of the associated BPF state used to generate the
 * BPF program is released in this function.
 *
 */
static void _program_free(struct bpf_program *prg)
{
	if (prg == NULL)
		return;

	if (prg->blks != NULL)
		free(prg->blks);
	free(prg);
}

/**
 * Free the BPF state
 * @param state the BPF state
 *
 * Free all of the BPF state, including the BPF program if present.
 *
 */
static void _state_release(struct bpf_state *state)
{
	unsigned int bkt;
	struct bpf_hash_bkt *iter;

	if (state == NULL)
		return;

	/* release all of the hash table entries */
	for (bkt = 0; bkt < _BPF_HASH_SIZE; bkt++) {
		while (state->htbl[bkt]) {
			iter = state->htbl[bkt];
			state->htbl[bkt] = iter->next;
			__blk_free(state, iter->blk);
			free(iter);
		}
	}
	_program_free(state->bpf);

	memset(state, 0, sizeof(*state));
}

/**
 * Add an instruction block to the BPF state hash table
 * @param state the BPF state
 * @param blk_p pointer to the BPF instruction block
 * @param found initial found value (see _hsh_find_once() for description)
 *
 * This function adds an instruction block to the hash table, and frees the
 * block if an identical instruction block already exists, returning a pointer
 * to the original block in place of the given block.  Returns zero on success
 * and negative values on failure.
 *
 */
static int _hsh_add(struct bpf_state *state, struct bpf_blk **blk_p,
		    unsigned int found)
{
	uint64_t h_val, h_val_tmp[3];
	struct bpf_hash_bkt *h_new, *h_iter, *h_prev = NULL;
	struct bpf_blk *blk = *blk_p;
	struct bpf_blk *b_iter;

	if (blk->flag_hash)
		return 0;

	h_new = zmalloc(sizeof(*h_new));
	if (h_new == NULL)
		return -ENOMEM;

	/* generate the hash */
	h_val_tmp[0] = hash(blk->blks, _BLK_MSZE(blk));
	h_val_tmp[1] = hash(&blk->acc_start, sizeof(blk->acc_start));
	h_val_tmp[2] = hash(&blk->acc_end, sizeof(blk->acc_end));
	h_val = hash(h_val_tmp, sizeof(h_val_tmp));
	blk->hash = h_val;
	blk->flag_hash = true;
	blk->node = NULL;
	h_new->blk = blk;
	h_new->found = (found ? 1 : 0);

	/* insert the block into the hash table */
hsh_add_restart:
	h_iter = state->htbl[h_val & _BPF_HASH_MASK];
	if (h_iter != NULL) {
		do {
			if ((h_iter->blk->hash == h_val) &&
			    (_BLK_MSZE(h_iter->blk) == _BLK_MSZE(blk)) &&
			    (memcmp(h_iter->blk->blks, blk->blks,
				    _BLK_MSZE(blk)) == 0) &&
			    _ACC_CMP_EQ(h_iter->blk->acc_start,
					blk->acc_start) &&
			    _ACC_CMP_EQ(h_iter->blk->acc_end,
					blk->acc_end)) {
				/* duplicate block */
				free(h_new);

				/* store the duplicate block */
				b_iter = h_iter->blk;
				while (b_iter->hash_nxt != NULL)
					b_iter = b_iter->hash_nxt;
				b_iter->hash_nxt = blk;

				/* in some cases we want to return the
				 * duplicate block */
				if (found) {
					blk->flag_dup = true;
					return 0;
				}

				/* update the priority if needed */
				if (h_iter->blk->priority < blk->priority)
					h_iter->blk->priority = blk->priority;

				/* try to save some memory */
				free(blk->blks);
				blk->blks = h_iter->blk->blks;
				blk->flag_unique = false;

				*blk_p = h_iter->blk;
				return 0;
			} else if (h_iter->blk->hash == h_val) {
				/* hash collision */
				if ((h_val >> 32) == 0xffffffff) {
					/* overflow */
					blk->flag_hash = false;
					blk->hash = 0;
					free(h_new);
					return -EFAULT;
				}
				h_val += ((uint64_t)1 << 32);
				h_new->blk->hash = h_val;

				/* restart at the beginning of the bucket */
				goto hsh_add_restart;
			} else {
				/* no match, move along */
				h_prev = h_iter;
				h_iter = h_iter->next;
			}
		} while (h_iter != NULL);
		h_prev->next = h_new;
	} else
		state->htbl[h_val & _BPF_HASH_MASK] = h_new;

	return 0;
}

/**
 * Remove an entry from the hash table
 * @param state the BPF state
 * @param h_val the hash value
 *
 * Remove an entry from the hash table and return it to the caller, NULL is
 * returned if the entry can not be found.
 *
 */
static struct bpf_blk *_hsh_remove(struct bpf_state *state, uint64_t h_val)
{
	unsigned int bkt = h_val & _BPF_HASH_MASK;
	struct bpf_blk *blk;
	struct bpf_hash_bkt *h_iter, *h_prev = NULL;

	h_iter = state->htbl[bkt];
	while (h_iter != NULL) {
		if (h_iter->blk->hash == h_val) {
			if (h_prev != NULL)
				h_prev->next = h_iter->next;
			else
				state->htbl[bkt] = h_iter->next;
			blk = h_iter->blk;
			free(h_iter);
			return blk;
		}
		h_prev = h_iter;
		h_iter =  h_iter->next;
	}

	return NULL;
}

/**
 * Find and return a hash bucket
 * @param state the BPF state
 * @param h_val the hash value
 *
 * Find the entry associated with the given hash value and return it to the
 * caller, NULL is returned if the entry can not be found.  This function
 * should not be called directly; use _hsh_find() and _hsh_find_once() instead.
 *
 */
static struct bpf_hash_bkt *_hsh_find_bkt(const struct bpf_state *state,
					  uint64_t h_val)
{
	struct bpf_hash_bkt *h_iter;

	h_iter = state->htbl[h_val & _BPF_HASH_MASK];
	while (h_iter != NULL) {
		if (h_iter->blk->hash == h_val)
			return h_iter;
		h_iter = h_iter->next;
	}

	return NULL;
}

/**
 * Find and only return an entry in the hash table once
 * @param state the BPF state
 * @param h_val the hash value
 *
 * Find the entry associated with the given hash value and return it to the
 * caller if it has not be returned previously by this function; returns NULL
 * if the entry can not be found or has already been returned in a previous
 * call.
 *
 */
static struct bpf_blk *_hsh_find_once(const struct bpf_state *state,
				      uint64_t h_val)
{
	struct bpf_hash_bkt *h_iter;

	h_iter = _hsh_find_bkt(state, h_val);
	if (h_iter == NULL || h_iter->found != 0)
		return NULL;
	h_iter->found = 1;
	return h_iter->blk;
}

/**
 * Finds an entry in the hash table
 * @param state the BPF state
 * @param h_val the hash value
 *
 * Find the entry associated with the given hash value and return it to the
 * caller, NULL is returned if the entry can not be found.
 *
 */
static struct bpf_blk *_hsh_find(const struct bpf_state *state, uint64_t h_val)
{
	struct bpf_hash_bkt *h_iter;

	h_iter = _hsh_find_bkt(state, h_val);
	if (h_iter == NULL)
		return NULL;
	return h_iter->blk;
}

/**
 * Generate a BPF action instruction
 * @param state the BPF state
 * @param blk the BPF instruction block, or NULL
 * @param action the desired action
 *
 * Generate a BPF action instruction and append it to the given instruction
 * block.  Returns a pointer to the instruction block on success, NULL on
 * failure.
 *
 */
static struct bpf_blk *_gen_bpf_action(struct bpf_state *state,
				       struct bpf_blk *blk, uint32_t action)
{
	struct bpf_instr instr;

	_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_RET),
		   _BPF_JMP_NO, _BPF_JMP_NO, _BPF_K(state->arch, action));
	return _blk_append(state, blk, &instr);
}

/**
 * Generate a BPF action instruction and insert it into the hash table
 * @param state the BPF state
 * @param action the desired action
 *
 * Generate a BPF action instruction and insert it into the hash table.
 * Returns a pointer to the instruction block on success, NULL on failure.
 *
 */
static struct bpf_blk *_gen_bpf_action_hsh(struct bpf_state *state,
					   uint32_t action)
{
	struct bpf_blk *blk;

	blk = _gen_bpf_action(state, NULL, action);
	if (blk == NULL)
		return NULL;
	if (_hsh_add(state, &blk, 0) < 0) {
		_blk_free(state, blk);
		return NULL;
	}

	return blk;
}

/**
 * Generate a BPF instruction block for a given chain node
 * @param state the BPF state
 * @param node the filter chain node
 * @param a_state the accumulator state
 *
 * Generate BPF instructions to execute the filter for the given chain node.
 * Returns a pointer to the instruction block on success, NULL on failure.
 *
 */
static struct bpf_blk *_gen_bpf_node(struct bpf_state *state,
				     const struct db_arg_chain_tree *node,
				     struct acc_state *a_state)
{
	int32_t acc_offset;
	uint32_t acc_mask;
	uint64_t act_t_hash = 0, act_f_hash = 0;
	struct bpf_blk *blk, *b_act;
	struct bpf_instr instr;

	blk = _blk_alloc();
	if (blk == NULL)
		return NULL;
	blk->acc_start = *a_state;

	/* generate the action blocks */
	if (node->act_t_flg) {
		b_act = _gen_bpf_action_hsh(state, node->act_t);
		if (b_act == NULL)
			goto node_failure;
		act_t_hash = b_act->hash;
	}
	if (node->act_f_flg) {
		b_act = _gen_bpf_action_hsh(state, node->act_f);
		if (b_act == NULL)
			goto node_failure;
		act_f_hash = b_act->hash;
	}

	/* check the accumulator state */
	acc_offset = node->arg_offset;
	acc_mask = node->mask;
	if (acc_offset < 0)
		goto node_failure;
	if ((acc_offset != a_state->offset) ||
	    ((acc_mask & a_state->mask) != acc_mask)) {
		/* reload the accumulator */
		a_state->offset = acc_offset;
		a_state->mask = ARG_MASK_MAX;
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_LD + BPF_ABS),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_K(state->arch, acc_offset));
		blk = _blk_append(state, blk, &instr);
		if (blk == NULL)
			goto node_failure;
		/* we're not dependent on the accumulator anymore */
		blk->acc_start = _ACC_STATE_UNDEF;
	}
	if (acc_mask != a_state->mask) {
		/* apply the bitmask */
		a_state->mask = acc_mask;
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_ALU + BPF_AND),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_K(state->arch, acc_mask));
		blk = _blk_append(state, blk, &instr);
		if (blk == NULL)
			goto node_failure;
	}

	/* set the accumulator state at the end of the block */
	/* NOTE: the accumulator end state is very critical when we are
	 *       assembling the final state; we assume that however we leave
	 *       this instruction block the accumulator state is represented
	 *       by blk->acc_end, it must be kept correct */
	blk->acc_end = *a_state;

	/* check the accumulator against the datum */
	switch (node->op) {
	case SCMP_CMP_MASKED_EQ:
	case SCMP_CMP_EQ:
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JEQ),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_K(state->arch, node->datum));
		break;
	case SCMP_CMP_GT:
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JGT),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_K(state->arch, node->datum));
		break;
	case SCMP_CMP_GE:
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JGE),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_K(state->arch, node->datum));
		break;
	case SCMP_CMP_NE:
	case SCMP_CMP_LT:
	case SCMP_CMP_LE:
	default:
		/* fatal error, we should never get here */
		goto node_failure;
	}

	/* fixup the jump targets */
	if (node->nxt_t != NULL)
		instr.jt = _BPF_JMP_DB(node->nxt_t);
	else if (node->act_t_flg)
		instr.jt = _BPF_JMP_HSH(act_t_hash);
	else
		instr.jt = _BPF_JMP_NXT(0);
	if (node->nxt_f != NULL)
		instr.jf = _BPF_JMP_DB(node->nxt_f);
	else if (node->act_f_flg)
		instr.jf = _BPF_JMP_HSH(act_f_hash);
	else
		instr.jf = _BPF_JMP_NXT(0);
	blk = _blk_append(state, blk, &instr);
	if (blk == NULL)
		goto node_failure;

	blk->node = node;
	return blk;

node_failure:
	_blk_free(state, blk);
	return NULL;
}

/**
 * Resolve the jump targets in a BPF instruction block
 * @param state the BPF state
 * @param sys the syscall filter
 * @param blk the BPF instruction block
 * @param nxt_jump the jump to fallthrough to at the end of the level
 *
 * Resolve the jump targets in a BPF instruction block generated by the
 * _gen_bpf_chain_lvl() function and adds the resulting block to the hash
 * table.  Returns a pointer to the new instruction block on success, NULL on
 * failure.
 *
 */
static struct bpf_blk *_gen_bpf_chain_lvl_res(struct bpf_state *state,
					      const struct db_sys_list *sys,
					      struct bpf_blk *blk,
					      const struct bpf_jump *nxt_jump)
{
	int rc;
	unsigned int iter;
	struct bpf_blk *b_new;
	struct bpf_instr *i_iter;
	struct db_arg_chain_tree *node;

	if (blk->flag_hash)
		return blk;

	/* convert TGT_PTR_DB to TGT_PTR_HSH references */
	for (iter = 0; iter < blk->blk_cnt; iter++) {
		i_iter = &blk->blks[iter];
		switch (i_iter->jt.type) {
		case TGT_NONE:
		case TGT_IMM:
		case TGT_PTR_HSH:
			/* ignore these jump types */
			break;
		case TGT_PTR_BLK:
			b_new = _gen_bpf_chain_lvl_res(state, sys,
						       i_iter->jt.tgt.blk,
						       nxt_jump);
			if (b_new == NULL)
				return NULL;
			i_iter->jt = _BPF_JMP_HSH(b_new->hash);
			break;
		case TGT_PTR_DB:
			node = (struct db_arg_chain_tree *)i_iter->jt.tgt.db;
			b_new = _gen_bpf_chain(state, sys, node,
					       nxt_jump, &blk->acc_end);
			if (b_new == NULL)
				return NULL;
			i_iter->jt = _BPF_JMP_HSH(b_new->hash);
			break;
		default:
			/* we should not be here */
			return NULL;
		}
		switch (i_iter->jf.type) {
		case TGT_NONE:
		case TGT_IMM:
		case TGT_PTR_HSH:
			/* ignore these jump types */
			break;
		case TGT_PTR_BLK:
			b_new = _gen_bpf_chain_lvl_res(state, sys,
						       i_iter->jf.tgt.blk,
						       nxt_jump);
			if (b_new == NULL)
				return NULL;
			i_iter->jf = _BPF_JMP_HSH(b_new->hash);
			break;
		case TGT_PTR_DB:
			node = (struct db_arg_chain_tree *)i_iter->jf.tgt.db;
			b_new = _gen_bpf_chain(state, sys, node,
					       nxt_jump, &blk->acc_end);
			if (b_new == NULL)
				return NULL;
			i_iter->jf = _BPF_JMP_HSH(b_new->hash);
			break;
		default:
			/* we should not be here */
			return NULL;
		}
		switch (i_iter->k.type) {
		case TGT_NONE:
		case TGT_K:
		case TGT_PTR_HSH:
			/* ignore these jump types */
			break;
		default:
			/* we should not be here */
			return NULL;
		}
	}

	/* insert the block into the hash table */
	rc = _hsh_add(state, &blk, 0);
	if (rc < 0)
		return NULL;

	return blk;
}

/**
 * Generates the BPF instruction blocks for a given filter chain
 * @param state the BPF state
 * @param sys the syscall filter
 * @param chain the filter chain
 * @param nxt_jump the jump to fallthrough to at the end of the level
 * @param a_state the accumulator state
 *
 * Generate the BPF instruction blocks for the given filter chain and return
 * a pointer to the first block on success; returns NULL on failure.
 *
 */
static struct bpf_blk *_gen_bpf_chain(struct bpf_state *state,
				      const struct db_sys_list *sys,
				      const struct db_arg_chain_tree *chain,
				      const struct bpf_jump *nxt_jump,
				      struct acc_state *a_state)
{
	struct bpf_blk *b_head = NULL, *b_tail = NULL;
	struct bpf_blk *b_prev, *b_next, *b_iter;
	struct bpf_instr *i_iter;
	const struct db_arg_chain_tree *c_iter;
	unsigned int iter;
	struct bpf_jump nxt_jump_tmp;
	struct acc_state acc = *a_state;

	if (chain == NULL) {
		b_head = _gen_bpf_action(state, NULL, sys->action);
		if (b_head == NULL)
			goto chain_failure;
		b_tail = b_head;
	} else {
		/* find the starting node of the level */
		c_iter = chain;
		while (c_iter->lvl_prv != NULL)
			c_iter = c_iter->lvl_prv;

		/* build all of the blocks for this level */
		do {
			b_iter = _gen_bpf_node(state, c_iter, &acc);
			if (b_iter == NULL)
				goto chain_failure;
			if (b_head != NULL) {
				b_iter->lvl_prv = b_tail;
				b_tail->lvl_nxt = b_iter;
				b_tail = b_iter;
			} else {
				b_head = b_iter;
				b_tail = b_iter;
			}
			c_iter = c_iter->lvl_nxt;
		} while (c_iter != NULL);

		/* resolve the TGT_NXT jumps */
		b_iter = b_head;
		do {
			b_next = b_iter->lvl_nxt;
			for (iter = 0; iter < b_iter->blk_cnt; iter++) {
				i_iter = &b_iter->blks[iter];
				if (i_iter->jt.type == TGT_NXT) {
					if (i_iter->jt.tgt.nxt != 0)
						goto chain_failure;
					if (b_next == NULL)
						i_iter->jt = *nxt_jump;
					else
						i_iter->jt =
							_BPF_JMP_BLK(b_next);
				}
				if (i_iter->jf.type == TGT_NXT) {
					if (i_iter->jf.tgt.nxt != 0)
						goto chain_failure;
					if (b_next == NULL)
						i_iter->jf = *nxt_jump;
					else
						i_iter->jf =
							_BPF_JMP_BLK(b_next);
				}
			}
			b_iter = b_next;
		} while (b_iter != NULL);
	}

	/* resolve all of the blocks */
	memset(&nxt_jump_tmp, 0, sizeof(nxt_jump_tmp));
	b_iter = b_tail;
	do {
		/* b_iter may change after resolving, so save the linkage */
		b_prev = b_iter->lvl_prv;
		b_next = b_iter->lvl_nxt;

		nxt_jump_tmp = _BPF_JMP_BLK(b_next);
		b_iter = _gen_bpf_chain_lvl_res(state, sys, b_iter,
						(b_next == NULL ?
						 nxt_jump :
						 &nxt_jump_tmp));
		if (b_iter == NULL)
			goto chain_failure;

		/* restore the block linkage on this level */
		if (b_prev != NULL)
			b_prev->lvl_nxt = b_iter;
		b_iter->lvl_prv = b_prev;
		b_iter->lvl_nxt = b_next;
		if (b_next != NULL)
			b_next->lvl_prv = b_iter;
		if (b_iter->lvl_prv == NULL)
			b_head = b_iter;

		b_iter = b_prev;
	} while (b_iter != NULL);

	return b_head;

chain_failure:
	while (b_head != NULL) {
		b_iter = b_head;
		b_head = b_iter->lvl_nxt;
		_blk_free(state, b_iter);
	}
	return NULL;
}

/**
 * Generate the BPF instruction blocks for a given syscall
 * @param state the BPF state
 * @param sys the syscall filter DB entry
 * @param nxt_hash the hash value of the next syscall filter DB entry
 * @param acc_reset accumulator reset flag
 *
 * Generate the BPF instruction blocks for the given syscall filter and return
 * a pointer to the first block on success; returns NULL on failure.  It is
 * important to note that the block returned has not been added to the hash
 * table, however, any linked/referenced blocks have been added to the hash
 * table.
 *
 */
static struct bpf_blk *_gen_bpf_syscall(struct bpf_state *state,
					const struct db_sys_list *sys,
					uint64_t nxt_hash,
					bool acc_reset)
{
	int rc;
	struct bpf_instr instr;
	struct bpf_blk *blk_c, *blk_s;
	struct bpf_jump def_jump;
	struct acc_state a_state;

	/* we do the memset before the assignment to keep valgrind happy */
	memset(&def_jump, 0, sizeof(def_jump));
	def_jump = _BPF_JMP_HSH(state->def_hsh);

	blk_s = _blk_alloc();
	if (blk_s == NULL)
		return NULL;

	/* setup the accumulator state */
	if (acc_reset) {
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_LD + BPF_ABS),
			   _BPF_JMP_NO, _BPF_JMP_NO,
			   _BPF_SYSCALL(state->arch));
		blk_s = _blk_append(state, blk_s, &instr);
		if (blk_s == NULL)
			return NULL;
		/* we've loaded the syscall ourselves */
		a_state = _ACC_STATE_OFFSET(_BPF_OFFSET_SYSCALL);
		blk_s->acc_start = _ACC_STATE_UNDEF;
		blk_s->acc_end = _ACC_STATE_OFFSET(_BPF_OFFSET_SYSCALL);
	} else {
		/* we rely on someone else to load the syscall */
		a_state = _ACC_STATE_UNDEF;
		blk_s->acc_start = _ACC_STATE_OFFSET(_BPF_OFFSET_SYSCALL);
		blk_s->acc_end = _ACC_STATE_OFFSET(_BPF_OFFSET_SYSCALL);
	}

	/* generate the argument chains */
	blk_c = _gen_bpf_chain(state, sys, sys->chains, &def_jump, &a_state);
	if (blk_c == NULL) {
		_blk_free(state, blk_s);
		return NULL;
	}

	/* syscall check */
	_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JEQ),
		   _BPF_JMP_HSH(blk_c->hash), _BPF_JMP_HSH(nxt_hash),
		   _BPF_K(state->arch, sys->num));
	blk_s = _blk_append(state, blk_s, &instr);
	if (blk_s == NULL)
		return NULL;
	blk_s->priority = sys->priority;

	/* add to the hash table */
	rc = _hsh_add(state, &blk_s, 1);
	if (rc < 0) {
		_blk_free(state, blk_s);
		return NULL;
	}

	return blk_s;
}

/**
 * Generate the BPF instruction blocks for a given filter/architecture
 * @param state the BPF state
 * @param db the filter DB
 * @param db_secondary the secondary DB
 *
 * Generate the BPF instruction block for the given filter DB(s)/architecture(s)
 * and return a pointer to the block on succes, NULL on failure.  The resulting
 * block assumes that the architecture token has already been loaded into the
 * BPF accumulator.
 *
 */
static struct bpf_blk *_gen_bpf_arch(struct bpf_state *state,
				     const struct db_filter *db,
				     const struct db_filter *db_secondary)
{
	int rc;
	unsigned int blk_cnt = 0;
	bool acc_reset;
	struct bpf_instr instr;
	struct db_sys_list *s_head = NULL, *s_tail = NULL, *s_iter, *s_iter_b;
	struct bpf_blk *b_head = NULL, *b_tail = NULL, *b_iter, *b_new;

	state->arch = db->arch;

	/* sort the syscall list */
	db_list_foreach(s_iter, db->syscalls) {
		if (s_head != NULL) {
			s_iter_b = s_head;
			while ((s_iter_b->pri_nxt != NULL) &&
			       (s_iter->priority <= s_iter_b->priority))
				s_iter_b = s_iter_b->pri_nxt;

			if (s_iter->priority > s_iter_b->priority) {
				s_iter->pri_prv = s_iter_b->pri_prv;
				s_iter->pri_nxt = s_iter_b;
				if (s_iter_b == s_head) {
					s_head->pri_prv = s_iter;
					s_head = s_iter;
				} else {
					s_iter->pri_prv->pri_nxt = s_iter;
					s_iter->pri_nxt->pri_prv = s_iter;
				}
			} else {
				s_iter->pri_prv = s_tail;
				s_iter->pri_nxt = NULL;
				s_iter->pri_prv->pri_nxt = s_iter;
				s_tail = s_iter;
			}
		} else {
			s_head = s_iter;
			s_tail = s_iter;
			s_head->pri_prv = NULL;
			s_head->pri_nxt = NULL;
		}
	}
	if (db_secondary != NULL) {
		db_list_foreach(s_iter, db_secondary->syscalls) {
			if (s_head != NULL) {
				s_iter_b = s_head;
				while ((s_iter_b->pri_nxt != NULL) &&
				       (s_iter->priority <= s_iter_b->priority))
					s_iter_b = s_iter_b->pri_nxt;

				if (s_iter->priority > s_iter_b->priority) {
					s_iter->pri_prv = s_iter_b->pri_prv;
					s_iter->pri_nxt = s_iter_b;
					if (s_iter_b == s_head) {
						s_head->pri_prv = s_iter;
						s_head = s_iter;
					} else {
						s_iter->pri_prv->pri_nxt =
							s_iter;
						s_iter->pri_nxt->pri_prv =
							s_iter;
					}
				} else {
					s_iter->pri_prv = s_tail;
					s_iter->pri_nxt = NULL;
					s_iter->pri_prv->pri_nxt = s_iter;
					s_tail = s_iter;
				}
			} else {
				s_head = s_iter;
				s_tail = s_iter;
				s_head->pri_prv = NULL;
				s_head->pri_nxt = NULL;
			}
		}
	}

	if ((state->arch->token == SCMP_ARCH_X86_64 ||
	     state->arch->token == SCMP_ARCH_X32) && (db_secondary == NULL))
		acc_reset = false;
	else
		acc_reset = true;

	/* create the syscall filters and add them to block list group */
	for (s_iter = s_tail; s_iter != NULL; s_iter = s_iter->pri_prv) {
		if (!s_iter->valid)
			continue;

		/* build the syscall filter */
		b_new = _gen_bpf_syscall(state, s_iter,
					 (b_head == NULL ?
					  state->def_hsh : b_head->hash),
					 (s_iter == s_head ?
					  acc_reset : false));
		if (b_new == NULL)
			goto arch_failure;

		/* add the filter to the list head */
		b_new->prev = NULL;
		b_new->next = b_head;
		if (b_tail != NULL) {
			b_head->prev = b_new;
			b_head = b_new;
		} else {
			b_head = b_new;
			b_tail = b_head;
		}

		if (b_tail->next != NULL)
			b_tail = b_tail->next;
		blk_cnt++;
	}

	/* additional ABI filtering */
	if ((state->arch->token == SCMP_ARCH_X86_64 ||
	     state->arch->token == SCMP_ARCH_X32) && (db_secondary == NULL)) {
		_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_LD + BPF_ABS),
			   _BPF_JMP_NO, _BPF_JMP_NO, _BPF_SYSCALL(state->arch));
		b_new = _blk_append(state, NULL, &instr);
		if (b_new == NULL)
			goto arch_failure;
		b_new->acc_end = _ACC_STATE_OFFSET(_BPF_OFFSET_SYSCALL);
		if (state->arch->token == SCMP_ARCH_X86_64) {
			/* filter out x32 */
			_BPF_INSTR(instr,
				   _BPF_OP(state->arch, BPF_JMP + BPF_JGE),
				   _BPF_JMP_NO,
				   _BPF_JMP_NO,
				   _BPF_K(state->arch, X32_SYSCALL_BIT));
			if (b_head != NULL)
				instr.jf = _BPF_JMP_HSH(b_head->hash);
			else
				instr.jf = _BPF_JMP_HSH(state->def_hsh);
			b_new = _blk_append(state, b_new, &instr);
			if (b_new == NULL)
				goto arch_failure;
			/* NOTE: starting with Linux v4.8 the seccomp filters
			 *	 are processed both when the syscall is
			 *	 initially executed as well as after any
			 *	 tracing processes finish so we need to make
			 *	 sure we don't trap the -1 syscall which
			 *	 tracers can use to skip the syscall, see
			 *	 seccomp(2) for more information */
			_BPF_INSTR(instr,
				   _BPF_OP(state->arch, BPF_JMP + BPF_JEQ),
				   _BPF_JMP_NO,
				   _BPF_JMP_HSH(state->bad_arch_hsh),
				   _BPF_K(state->arch, -1));
			if (b_head != NULL)
				instr.jt = _BPF_JMP_HSH(b_head->hash);
			else
				instr.jt = _BPF_JMP_HSH(state->def_hsh);
			blk_cnt++;
		} else if (state->arch->token == SCMP_ARCH_X32) {
			/* filter out x86_64 */
			_BPF_INSTR(instr,
				   _BPF_OP(state->arch, BPF_JMP + BPF_JGE),
				   _BPF_JMP_NO,
				   _BPF_JMP_HSH(state->bad_arch_hsh),
				   _BPF_K(state->arch, X32_SYSCALL_BIT));
			if (b_head != NULL)
				instr.jt = _BPF_JMP_HSH(b_head->hash);
			else
				instr.jt = _BPF_JMP_HSH(state->def_hsh);
			blk_cnt++;
		} else
			/* we should never get here */
			goto arch_failure;
		b_new = _blk_append(state, b_new, &instr);
		if (b_new == NULL)
			goto arch_failure;
		b_new->next = b_head;
		if (b_head != NULL)
			b_head->prev = b_new;
		b_head = b_new;
		rc = _hsh_add(state, &b_head, 1);
		if (rc < 0)
			goto arch_failure;
	}

	/* do the ABI/architecture check */
	_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JEQ),
		   _BPF_JMP_NO, _BPF_JMP_NXT(blk_cnt++),
		   _BPF_K(state->arch, state->arch->token_bpf));
	if (b_head != NULL)
		instr.jt = _BPF_JMP_HSH(b_head->hash);
	else
		instr.jt = _BPF_JMP_HSH(state->def_hsh);
	b_new = _blk_append(state, NULL, &instr);
	if (b_new == NULL)
		goto arch_failure;
	b_new->next = b_head;
	if (b_head != NULL)
		b_head->prev = b_new;
	b_head = b_new;
	rc = _hsh_add(state, &b_head, 1);
	if (rc < 0)
		goto arch_failure;

	state->arch = NULL;
	return b_head;

arch_failure:
	/* NOTE: we do the cleanup here and not just return an error as all of
	 * the instruction blocks may not be added to the hash table when we
	 * hit an error */
	state->arch = NULL;
	b_iter = b_head;
	while (b_iter != NULL) {
		b_new = b_iter->next;
		_blk_free(state, b_iter);
		b_iter = b_new;
	}
	return NULL;
}

/**
 * Find the target block for the "next" jump
 * @param blk the instruction block
 * @param nxt the next offset
 *
 * Find the target block for the TGT_NXT jump using the given offset.  Returns
 * a pointer to the target block on success or NULL on failure.
 *
 */
static struct bpf_blk *_gen_bpf_find_nxt(const struct bpf_blk *blk,
					 unsigned int nxt)
{
	struct bpf_blk *iter = blk->next;

	for (; (iter != NULL) && (nxt > 0); nxt--)
		iter = iter->next;

	return iter;
}

/**
 * Manage jumps to return instructions
 * @param state the BPF state
 * @param blk the instruction block to check
 * @param offset the instruction offset into the instruction block
 * @param blk_ret the return instruction block
 *
 * Using the given block and instruction offset, calculate the jump distance
 * between the jumping instruction and return instruction block.  If the jump
 * distance is too great, duplicate the return instruction to reduce the
 * distance to the maximum value.  Returns 1 if a long jump was added, zero if
 * the existing jump is valid, and negative values on failure.
 *
 */
static int _gen_bpf_build_jmp_ret(struct bpf_state *state,
				  struct bpf_blk *blk, unsigned int offset,
				  struct bpf_blk *blk_ret)
{
	unsigned int j_len;
	uint64_t tgt_hash = blk_ret->hash;
	struct bpf_blk *b_jmp, *b_new;

	/* calculate the jump distance */
	j_len = blk->blk_cnt - (offset + 1);
	b_jmp = blk->next;
	while (b_jmp != NULL && b_jmp != blk_ret && j_len < _BPF_JMP_MAX_RET) {
		j_len += b_jmp->blk_cnt;
		b_jmp = b_jmp->next;
	}
	if (b_jmp == NULL)
		return -EFAULT;
	if (j_len <= _BPF_JMP_MAX_RET && b_jmp == blk_ret)
		return 0;

	/* we need a closer return instruction, see if one already exists */
	j_len = blk->blk_cnt - (offset + 1);
	b_jmp = blk->next;
	while (b_jmp != NULL && b_jmp->hash != tgt_hash &&
	       j_len < _BPF_JMP_MAX_RET) {
		j_len += b_jmp->blk_cnt;
		b_jmp = b_jmp->next;
	}
	if (b_jmp == NULL)
		return -EFAULT;
	if (j_len <= _BPF_JMP_MAX_RET && b_jmp->hash == tgt_hash)
		return 0;

	/* we need to insert a new return instruction - create one */
	b_new = _gen_bpf_action(state, NULL, blk_ret->blks[0].k.tgt.imm_k);
	if (b_new == NULL)
		return -EFAULT;

	/* NOTE - we need to be careful here, we're giving the block a hash
	 *	  value (this is a sneaky way to ensure we leverage the
	 *	  inserted long jumps as much as possible) but we never add the
	 *	  block to the hash table so it won't get cleaned up
	 *	  automatically */
	b_new->hash = tgt_hash;

	/* insert the jump after the current jumping block */
	b_new->prev = blk;
	b_new->next = blk->next;
	blk->next->prev = b_new;
	blk->next = b_new;

	return 1;
}

/**
 * Manage jump lengths by duplicating and adding jumps if needed
 * @param state the BPF state
 * @param tail the tail of the instruction block list
 * @param blk the instruction block to check
 * @param offset the instruction offset into the instruction block
 * @param tgt_hash the hash of the jump destination block
 *
 * Using the given block and instruction offset, calculate the jump distance
 * between the jumping instruction and the destination.  If the jump distance
 * is too great, add a long jump instruction to reduce the distance to a legal
 * value.  Returns 1 if a new instruction was added, zero if the existing jump
 * is valid, and negative values on failure.
 *
 */
static int _gen_bpf_build_jmp(struct bpf_state *state,
			      struct bpf_blk *tail,
			      struct bpf_blk *blk, unsigned int offset,
			      uint64_t tgt_hash)
{
	int rc;
	unsigned int jmp_len;
	struct bpf_instr instr;
	struct bpf_blk *b_new, *b_jmp, *b_tgt;

	/* find the jump target */
	b_tgt = tail;
	while (b_tgt != blk && b_tgt->hash != tgt_hash)
		b_tgt = b_tgt->prev;
	if (b_tgt == blk)
		return -EFAULT;

	if (b_tgt->blk_cnt == 1 &&
	    b_tgt->blks[0].op == _BPF_OP(state->arch, BPF_RET)) {
		rc = _gen_bpf_build_jmp_ret(state, blk, offset, b_tgt);
		if (rc == 1)
			return 1;
		else if (rc < 0)
			return rc;
	}

	/* calculate the jump distance */
	jmp_len = blk->blk_cnt - (offset + 1);
	b_jmp = blk->next;
	while (b_jmp != NULL && b_jmp != b_tgt && jmp_len < _BPF_JMP_MAX) {
		jmp_len += b_jmp->blk_cnt;
		b_jmp = b_jmp->next;
	}
	if (b_jmp == NULL)
		return -EFAULT;
	if (jmp_len <= _BPF_JMP_MAX && b_jmp == b_tgt)
		return 0;

	/* we need a long jump, see if one already exists */
	jmp_len = blk->blk_cnt - (offset + 1);
	b_jmp = blk->next;
	while (b_jmp != NULL && b_jmp->hash != tgt_hash &&
	       jmp_len < _BPF_JMP_MAX) {
		jmp_len += b_jmp->blk_cnt;
		b_jmp = b_jmp->next;
	}
	if (b_jmp == NULL)
		return -EFAULT;
	if (jmp_len <= _BPF_JMP_MAX && b_jmp->hash == tgt_hash)
		return 0;

	/* we need to insert a long jump - create one */
	_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_JMP + BPF_JA),
		   _BPF_JMP_NO, _BPF_JMP_NO, _BPF_JMP_HSH(tgt_hash));
	b_new = _blk_append(state, NULL, &instr);
	if (b_new == NULL)
		return -EFAULT;

	/* NOTE - we need to be careful here, we're giving the block a hash
	 *	  value (this is a sneaky way to ensure we leverage the
	 *	  inserted long jumps as much as possible) but we never add the
	 *	  block to the hash table so it won't get cleaned up
	 *	  automatically */
	b_new->hash = tgt_hash;

	/* insert the jump after the current jumping block */
	b_new->prev = blk;
	b_new->next = blk->next;
	blk->next->prev = b_new;
	blk->next = b_new;

	return 1;
}

/**
 * Generate the BPF program for the given filter collection
 * @param state the BPF state
 * @param col the filter collection
 *
 * Generate the BPF program for the given filter collection.  Returns zero on
 * success, negative values on failure.
 *
 */
static int _gen_bpf_build_bpf(struct bpf_state *state,
			      const struct db_filter_col *col)
{
	int rc;
	int iter;
	uint64_t h_val;
	unsigned int res_cnt;
	unsigned int jmp_len;
	int arch_x86_64 = -1, arch_x32 = -1;
	struct bpf_instr instr;
	struct bpf_instr *i_iter;
	struct bpf_blk *b_badarch, *b_default;
	struct bpf_blk *b_head = NULL, *b_tail = NULL, *b_iter, *b_new, *b_jmp;
	struct db_filter *db_secondary = NULL;
	struct arch_def pseudo_arch;

	if (col->filter_cnt == 0)
		return -EINVAL;

	/* create a fake architecture definition for use in the early stages */
	memset(&pseudo_arch, 0, sizeof(pseudo_arch));
	pseudo_arch.endian = col->endian;
	state->arch = &pseudo_arch;

	/* generate the badarch action */
	b_badarch = _gen_bpf_action(state, NULL, state->attr->act_badarch);
	if (b_badarch == NULL)
		return -ENOMEM;
	rc = _hsh_add(state, &b_badarch, 1);
	if (rc < 0)
		return rc;
	state->bad_arch_hsh = b_badarch->hash;

	/* generate the default action */
	b_default = _gen_bpf_action(state, NULL, state->attr->act_default);
	if (b_default == NULL)
		return -ENOMEM;
	rc = _hsh_add(state, &b_default, 0);
	if (rc < 0)
		return rc;
	state->def_hsh = b_default->hash;

	/* load the architecture token/number */
	_BPF_INSTR(instr, _BPF_OP(state->arch, BPF_LD + BPF_ABS),
		   _BPF_JMP_NO, _BPF_JMP_NO, _BPF_ARCH(state->arch));
	b_head = _blk_append(state, NULL, &instr);
	if (b_head == NULL)
		return -ENOMEM;
	b_head->acc_end = _ACC_STATE_OFFSET(_BPF_OFFSET_ARCH);
	rc = _hsh_add(state, &b_head, 1);
	if (rc < 0)
		return rc;
	b_tail = b_head;

	/* generate the per-architecture filters */
	for (iter = 0; iter < col->filter_cnt; iter++) {
		if (col->filters[iter]->arch->token == SCMP_ARCH_X86_64)
			arch_x86_64 = iter;
		if (col->filters[iter]->arch->token == SCMP_ARCH_X32)
			arch_x32 = iter;
	}
	for (iter = 0; iter < col->filter_cnt; iter++) {
		/* figure out the secondary arch filter mess */
		if (iter == arch_x86_64) {
			if (arch_x32 > iter)
				db_secondary = col->filters[arch_x32];
			else if (arch_x32 >= 0)
				continue;
		} else if (iter == arch_x32) {
			if (arch_x86_64 > iter)
				db_secondary = col->filters[arch_x86_64];
			else if (arch_x86_64 >= 0)
				continue;
		} else
			db_secondary = NULL;

		/* create the filter for the architecture(s) */
		b_new = _gen_bpf_arch(state, col->filters[iter], db_secondary);
		if (b_new == NULL)
			return -ENOMEM;
		b_new->prev = b_tail;
		b_tail->next = b_new;
		b_tail = b_new;
		while (b_tail->next != NULL)
			b_tail = b_tail->next;
	}

	/* add a badarch action to the end */
	b_badarch->prev = b_tail;
	b_badarch->next = NULL;
	b_tail->next = b_badarch;
	b_tail = b_badarch;

	/* reset the state to the pseudo_arch for the final resolution */
	state->arch = &pseudo_arch;

	/* resolve any TGT_NXT jumps at the top level */
	b_iter = b_head;
	do {
		for (iter = 0; iter < b_iter->blk_cnt; iter++) {
			i_iter = &b_iter->blks[iter];
			if (i_iter->jt.type == TGT_NXT) {
				b_jmp = _gen_bpf_find_nxt(b_iter,
							  i_iter->jt.tgt.nxt);
				if (b_jmp == NULL)
					return -EFAULT;
				i_iter->jt = _BPF_JMP_HSH(b_jmp->hash);
			}
			if (i_iter->jf.type == TGT_NXT) {
				b_jmp = _gen_bpf_find_nxt(b_iter,
							  i_iter->jf.tgt.nxt);
				if (b_jmp == NULL)
					return -EFAULT;
				i_iter->jf = _BPF_JMP_HSH(b_jmp->hash);
			}
			/* we shouldn't need to worry about a TGT_NXT in k */
		}
		b_iter = b_iter->next;
	} while (b_iter != NULL && b_iter->next != NULL);

	/* pull in all of the TGT_PTR_HSH jumps, one layer at a time */
	b_iter = b_tail;
	do {
		b_jmp = NULL;
		/* look for jumps - backwards (shorter jumps) */
		for (iter = b_iter->blk_cnt - 1;
		     (iter >= 0) && (b_jmp == NULL);
		     iter--) {
			i_iter = &b_iter->blks[iter];
			if (i_iter->jt.type == TGT_PTR_HSH)
				b_jmp = _hsh_find_once(state,
						       i_iter->jt.tgt.hash);
			if (b_jmp == NULL && i_iter->jf.type == TGT_PTR_HSH)
				b_jmp = _hsh_find_once(state,
						       i_iter->jf.tgt.hash);
			if (b_jmp == NULL && i_iter->k.type == TGT_PTR_HSH)
				b_jmp = _hsh_find_once(state,
						       i_iter->k.tgt.hash);
			if (b_jmp != NULL) {
				/* do we need to reload the accumulator? */
				if ((b_jmp->acc_start.offset != -1) &&
				    !_ACC_CMP_EQ(b_iter->acc_end,
						 b_jmp->acc_start)) {
					if (b_jmp->acc_start.mask != ARG_MASK_MAX) {
						_BPF_INSTR(instr,
							   _BPF_OP(state->arch,
								   BPF_ALU + BPF_AND),
							   _BPF_JMP_NO,
							   _BPF_JMP_NO,
							   _BPF_K(state->arch,
								  b_jmp->acc_start.mask));
						b_jmp = _blk_prepend(state,
								     b_jmp,
								     &instr);
						if (b_jmp == NULL)
							return -EFAULT;
					}
					_BPF_INSTR(instr,
						   _BPF_OP(state->arch,
							   BPF_LD + BPF_ABS),
						   _BPF_JMP_NO, _BPF_JMP_NO,
						   _BPF_K(state->arch,
							  b_jmp->acc_start.offset));
					b_jmp = _blk_prepend(state,
							     b_jmp, &instr);
					if (b_jmp == NULL)
						return -EFAULT;
					/* not reliant on the accumulator */
					b_jmp->acc_start = _ACC_STATE_UNDEF;
				}
				/* insert the new block after this block */
				b_jmp->prev = b_iter;
				b_jmp->next = b_iter->next;
				b_iter->next = b_jmp;
				if (b_jmp->next)
					b_jmp->next->prev = b_jmp;
			}
		}
		if (b_jmp != NULL) {
			while (b_tail->next != NULL)
				b_tail = b_tail->next;
			b_iter = b_tail;
		} else
			b_iter = b_iter->prev;
	} while (b_iter != NULL);


	/* NOTE - from here to the end of the function we need to fail via the
	 *	  the build_bpf_free_blks label, not just return an error; see
	 *	  the _gen_bpf_build_jmp() function for details */

	/* check for long jumps and insert if necessary, we also verify that
	 * all our jump targets are valid at this point in the process */
	b_iter = b_tail;
	do {
		res_cnt = 0;
		for (iter = b_iter->blk_cnt - 1; iter >= 0; iter--) {
			i_iter = &b_iter->blks[iter];
			switch (i_iter->jt.type) {
			case TGT_NONE:
			case TGT_IMM:
				break;
			case TGT_PTR_HSH:
				h_val = i_iter->jt.tgt.hash;
				rc = _gen_bpf_build_jmp(state, b_tail,
							b_iter, iter,
							h_val);
				if (rc < 0)
					goto build_bpf_free_blks;
				res_cnt += rc;
				break;
			default:
				/* fatal error */
				goto build_bpf_free_blks;
			}
			switch (i_iter->jf.type) {
			case TGT_NONE:
			case TGT_IMM:
				break;
			case TGT_PTR_HSH:
				h_val = i_iter->jf.tgt.hash;
				rc = _gen_bpf_build_jmp(state, b_tail,
							b_iter, iter,
							h_val);
				if (rc < 0)
					goto build_bpf_free_blks;
				res_cnt += rc;
				break;
			default:
				/* fatal error */
				goto build_bpf_free_blks;
			}
		}
		if (res_cnt == 0)
			b_iter = b_iter->prev;
	} while (b_iter != NULL);

	/* build the bpf program */
	do {
		b_iter = b_head;
		/* resolve the TGT_PTR_HSH jumps */
		for (iter = 0; iter < b_iter->blk_cnt; iter++) {
			i_iter = &b_iter->blks[iter];
			if (i_iter->jt.type == TGT_PTR_HSH) {
				h_val = i_iter->jt.tgt.hash;
				jmp_len = b_iter->blk_cnt - (iter + 1);
				b_jmp = b_iter->next;
				while (b_jmp != NULL && b_jmp->hash != h_val) {
					jmp_len += b_jmp->blk_cnt;
					b_jmp = b_jmp->next;
				}
				if (b_jmp == NULL || jmp_len > _BPF_JMP_MAX)
					goto build_bpf_free_blks;
				i_iter->jt = _BPF_JMP_IMM(jmp_len);
			}
			if (i_iter->jf.type == TGT_PTR_HSH) {
				h_val = i_iter->jf.tgt.hash;
				jmp_len = b_iter->blk_cnt - (iter + 1);
				b_jmp = b_iter->next;
				while (b_jmp != NULL && b_jmp->hash != h_val) {
					jmp_len += b_jmp->blk_cnt;
					b_jmp = b_jmp->next;
				}
				if (b_jmp == NULL || jmp_len > _BPF_JMP_MAX)
					goto build_bpf_free_blks;
				i_iter->jf = _BPF_JMP_IMM(jmp_len);
			}
			if (i_iter->k.type == TGT_PTR_HSH) {
				h_val = i_iter->k.tgt.hash;
				jmp_len = b_iter->blk_cnt - (iter + 1);
				b_jmp = b_tail;
				while (b_jmp->hash != h_val)
					b_jmp = b_jmp->prev;
				b_jmp = b_jmp->prev;
				while (b_jmp != b_iter) {
					jmp_len += b_jmp->blk_cnt;
					b_jmp = b_jmp->prev;
				}
				if (b_jmp == NULL)
					goto build_bpf_free_blks;
				i_iter->k = _BPF_K(state->arch, jmp_len);
			}
		}

		/* build the bpf program */
		if (_bpf_append_blk(state->bpf, b_iter) < 0)
			goto build_bpf_free_blks;

		/* we're done with the block, free it */
		b_head = b_iter->next;
		_blk_free(state, b_iter);
	} while (b_head != NULL);

	return 0;

build_bpf_free_blks:
	b_iter = b_head;
	while (b_iter != NULL) {
		b_jmp = b_iter->next;
		_hsh_remove(state, b_iter->hash);
		__blk_free(state, b_iter);
		b_iter = b_jmp;
	}
	return -EFAULT;
}

/**
 * Generate a BPF representation of the filter DB
 * @param col the seccomp filter collection
 *
 * This function generates a BPF representation of the given filter collection.
 * Returns a pointer to a valid bpf_program on success, NULL on failure.
 *
 */
struct bpf_program *gen_bpf_generate(const struct db_filter_col *col)
{
	int rc;
	struct bpf_state state;
	struct bpf_program *prgm;

	memset(&state, 0, sizeof(state));
	state.attr = &col->attr;

	prgm = zmalloc(sizeof(*(prgm)));
	if (prgm == NULL)
		return NULL;
	state.bpf = prgm;

	rc = _gen_bpf_build_bpf(&state, col);
	if (rc == 0)
		state.bpf = NULL;
	_state_release(&state);

	return prgm;
}

/**
 * Free memory associated with a BPF representation
 * @param program the BPF representation
 *
 * Free the memory associated with a BPF representation generated by the
 * gen_bpf_generate() function.
 *
 */
void gen_bpf_release(struct bpf_program *program)
{
	_program_free(program);
}
source-git / libseccomp

Source Code

Files