/**
* Seccomp System Interfaces
*
* Copyright (c) 2014 Red Hat <pmoore@redhat.com>
* Author: Paul Moore <paul@paul-moore.com>
*/
/*
* This library is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License as
* published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses>.
*/
#include <stdlib.h>
#include <errno.h>
#include <sys/prctl.h>
#define _GNU_SOURCE
#include <unistd.h>
#include "system.h"
#include <seccomp.h>
#include "arch.h"
#include "db.h"
#include "gen_bpf.h"
#include "helper.h"
/* NOTE: the seccomp syscall allowlist is currently disabled for testing
* purposes, but unless we can verify all of the supported ABIs before
* our next release we may have to enable the allowlist */
#define SYSCALL_ALLOWLIST_ENABLE 0
/* task global state */
struct task_state {
/* seccomp(2) syscall */
int nr_seccomp;
/* userspace notification fd */
int notify_fd;
/* runtime support flags */
int sup_syscall;
int sup_flag_tsync;
int sup_flag_log;
int sup_action_log;
int sup_kill_process;
int sup_flag_spec_allow;
int sup_flag_new_listener;
int sup_user_notif;
int sup_flag_tsync_esrch;
};
static struct task_state state = {
.nr_seccomp = -1,
.notify_fd = -1,
.sup_syscall = -1,
.sup_flag_tsync = -1,
.sup_flag_log = -1,
.sup_action_log = -1,
.sup_kill_process = -1,
.sup_flag_spec_allow = -1,
.sup_flag_new_listener = -1,
.sup_user_notif = -1,
.sup_flag_tsync_esrch = -1,
};
/**
* Reset the task state
*
* This function fully resets the library's global "system task state".
*
*/
void sys_reset_state(void)
{
state.nr_seccomp = -1;
if (state.notify_fd > 0)
close(state.notify_fd);
state.notify_fd = -1;
state.sup_syscall = -1;
state.sup_flag_tsync = -1;
state.sup_flag_log = -1;
state.sup_action_log = -1;
state.sup_kill_process = -1;
state.sup_flag_spec_allow = -1;
state.sup_flag_new_listener = -1;
state.sup_user_notif = -1;
state.sup_flag_tsync_esrch = -1;
}
/**
* Check to see if the seccomp() syscall is supported
*
* This function attempts to see if the system supports the seccomp() syscall.
* Unfortunately, there are a few reasons why this check may fail, including
* a previously loaded seccomp filter, so it is hard to say for certain.
* Return one if the syscall is supported, zero otherwise.
*
*/
int sys_chk_seccomp_syscall(void)
{
int rc;
int nr_seccomp;
/* NOTE: it is reasonably safe to assume that we should be able to call
* seccomp() when the caller first starts, but we can't rely on
* it later so we need to cache our findings for use later */
if (state.sup_syscall >= 0)
return state.sup_syscall;
#if SYSCALL_ALLOWLIST_ENABLE
/* architecture allowlist */
switch (arch_def_native->token) {
case SCMP_ARCH_X86_64:
case SCMP_ARCH_ARM:
case SCMP_ARCH_AARCH64:
case SCMP_ARCH_PPC64:
case SCMP_ARCH_PPC64LE:
case SCMP_ARCH_S390:
case SCMP_ARCH_S390X:
case SCMP_ARCH_RISCV64:
break;
default:
goto unsupported;
}
#endif
nr_seccomp = arch_syscall_resolve_name(arch_def_native, "seccomp");
if (nr_seccomp < 0)
goto unsupported;
/* this is an invalid call because the second argument is non-zero, but
* depending on the errno value of ENOSYS or EINVAL we can guess if the
* seccomp() syscall is supported or not */
rc = syscall(nr_seccomp, SECCOMP_SET_MODE_STRICT, 1, NULL);
if (rc < 0 && errno == EINVAL)
goto supported;
unsupported:
state.sup_syscall = 0;
return 0;
supported:
state.nr_seccomp = nr_seccomp;
state.sup_syscall = 1;
return 1;
}
/**
* Force the seccomp() syscall support setting
* @param enable the intended support state
*
* This function overrides the current seccomp() syscall support setting; this
* is very much a "use at your own risk" function.
*
*/
void sys_set_seccomp_syscall(bool enable)
{
state.sup_syscall = (enable ? 1 : 0);
}
/**
* Check to see if a seccomp action is supported
* @param action the seccomp action
*
* This function checks to see if a seccomp action is supported by the system.
* Return one if the action is supported, zero otherwise.
*
*/
int sys_chk_seccomp_action(uint32_t action)
{
if (action == SCMP_ACT_KILL_PROCESS) {
if (state.sup_kill_process < 0) {
if (sys_chk_seccomp_syscall() == 1 &&
syscall(state.nr_seccomp,
SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0)
state.sup_kill_process = 1;
else
state.sup_kill_process = 0;
}
return state.sup_kill_process;
} else if (action == SCMP_ACT_KILL_THREAD) {
return 1;
} else if (action == SCMP_ACT_TRAP) {
return 1;
} else if ((action == SCMP_ACT_ERRNO(action & 0x0000ffff)) &&
((action & 0x0000ffff) < MAX_ERRNO)) {
return 1;
} else if (action == SCMP_ACT_TRACE(action & 0x0000ffff)) {
return 1;
} else if (action == SCMP_ACT_LOG) {
if (state.sup_action_log < 0) {
if (sys_chk_seccomp_syscall() == 1 &&
syscall(state.nr_seccomp,
SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0)
state.sup_action_log = 1;
else
state.sup_action_log = 0;
}
return state.sup_action_log;
} else if (action == SCMP_ACT_ALLOW) {
return 1;
} else if (action == SCMP_ACT_NOTIFY) {
if (state.sup_user_notif < 0) {
struct seccomp_notif_sizes sizes;
if (sys_chk_seccomp_syscall() == 1 &&
syscall(state.nr_seccomp,
SECCOMP_GET_NOTIF_SIZES, 0, &sizes) == 0)
state.sup_user_notif = 1;
else
state.sup_user_notif = 0;
}
return state.sup_user_notif;
}
return 0;
}
/**
* Force a seccomp action support setting
* @param action the seccomp action
* @param enable the intended support state
*
* This function overrides the current seccomp action support setting; this
* is very much a "use at your own risk" function.
*/
void sys_set_seccomp_action(uint32_t action, bool enable)
{
switch (action) {
case SCMP_ACT_LOG:
state.sup_action_log = (enable ? 1 : 0);
break;
case SCMP_ACT_KILL_PROCESS:
state.sup_kill_process = (enable ? 1 : 0);
break;
case SCMP_ACT_NOTIFY:
state.sup_user_notif = (enable ? 1 : 0);
break;
}
}
/**
* Check to see if a seccomp() flag is supported by the kernel
* @param flag the seccomp() flag
*
* This function checks to see if a seccomp() flag is supported by the kernel.
* Return one if the flag is supported, zero otherwise.
*
*/
static int _sys_chk_flag_kernel(int flag)
{
/* this is an invalid seccomp(2) call because the last argument
* is NULL, but depending on the errno value of EFAULT we can
* guess if the filter flag is supported or not */
if (sys_chk_seccomp_syscall() == 1 &&
syscall(state.nr_seccomp,
SECCOMP_SET_MODE_FILTER, flag, NULL) == -1 &&
errno == EFAULT)
return 1;
return 0;
}
/**
* Check to see if a seccomp() flag is supported
* @param flag the seccomp() flag
*
* This function checks to see if a seccomp() flag is supported by the system.
* Return one if the syscall is supported, zero if unsupported, negative values
* on error.
*
*/
int sys_chk_seccomp_flag(int flag)
{
switch (flag) {
case SECCOMP_FILTER_FLAG_TSYNC:
if (state.sup_flag_tsync < 0)
state.sup_flag_tsync = _sys_chk_flag_kernel(flag);
return state.sup_flag_tsync;
case SECCOMP_FILTER_FLAG_LOG:
if (state.sup_flag_log < 0)
state.sup_flag_log = _sys_chk_flag_kernel(flag);
return state.sup_flag_log;
case SECCOMP_FILTER_FLAG_SPEC_ALLOW:
if (state.sup_flag_spec_allow < 0)
state.sup_flag_spec_allow = _sys_chk_flag_kernel(flag);
return state.sup_flag_spec_allow;
case SECCOMP_FILTER_FLAG_NEW_LISTENER:
if (state.sup_flag_new_listener < 0)
state.sup_flag_new_listener = _sys_chk_flag_kernel(flag);
return state.sup_flag_new_listener;
case SECCOMP_FILTER_FLAG_TSYNC_ESRCH:
if (state.sup_flag_tsync_esrch < 0)
state.sup_flag_tsync_esrch = _sys_chk_flag_kernel(flag);
return state.sup_flag_tsync_esrch;
}
return -EOPNOTSUPP;
}
/**
* Force a seccomp() syscall flag support setting
* @param flag the seccomp() flag
* @param enable the intended support state
*
* This function overrides the current seccomp() syscall support setting for a
* given flag; this is very much a "use at your own risk" function.
*
*/
void sys_set_seccomp_flag(int flag, bool enable)
{
switch (flag) {
case SECCOMP_FILTER_FLAG_TSYNC:
state.sup_flag_tsync = (enable ? 1 : 0);
break;
case SECCOMP_FILTER_FLAG_LOG:
state.sup_flag_log = (enable ? 1 : 0);
break;
case SECCOMP_FILTER_FLAG_SPEC_ALLOW:
state.sup_flag_spec_allow = (enable ? 1 : 0);
break;
case SECCOMP_FILTER_FLAG_NEW_LISTENER:
state.sup_flag_new_listener = (enable ? 1 : 0);
break;
case SECCOMP_FILTER_FLAG_TSYNC_ESRCH:
state.sup_flag_tsync_esrch = (enable ? 1 : 0);
break;
}
}
/**
* Loads the filter into the kernel
* @param col the filter collection
* @param rawrc pass the raw return code if true
*
* This function loads the given seccomp filter context into the kernel. If
* the filter was loaded correctly, the kernel will be enforcing the filter
* when this function returns. Returns zero on success, negative values on
* error.
*
*/
int sys_filter_load(struct db_filter_col *col, bool rawrc)
{
int rc;
bool tsync_notify;
bool listener_req;
struct bpf_program *prgm = NULL;
rc = gen_bpf_generate(col, &prgm);
if (rc < 0)
return rc;
/* attempt to set NO_NEW_PRIVS */
if (col->attr.nnp_enable) {
rc = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
if (rc < 0)
goto filter_load_out;
}
tsync_notify = state.sup_flag_tsync_esrch > 0 && state.notify_fd == -1;
listener_req = state.sup_user_notif > 0 && \
col->notify_used && state.notify_fd == -1;
/* load the filter into the kernel */
if (sys_chk_seccomp_syscall() == 1) {
int flgs = 0;
if (tsync_notify) {
if (col->attr.tsync_enable)
flgs |= SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
if (listener_req)
flgs |= SECCOMP_FILTER_FLAG_NEW_LISTENER;
} else if (col->attr.tsync_enable) {
if (listener_req) {
/* NOTE: we _should_ catch this in db.c */
rc = -EFAULT;
goto filter_load_out;
}
flgs |= SECCOMP_FILTER_FLAG_TSYNC;
} else if (listener_req)
flgs |= SECCOMP_FILTER_FLAG_NEW_LISTENER;
if (col->attr.log_enable)
flgs |= SECCOMP_FILTER_FLAG_LOG;
if (col->attr.spec_allow)
flgs |= SECCOMP_FILTER_FLAG_SPEC_ALLOW;
rc = syscall(state.nr_seccomp,
SECCOMP_SET_MODE_FILTER, flgs, prgm);
if (tsync_notify && rc > 0) {
/* return 0 on NEW_LISTENER success, but save the fd */
state.notify_fd = rc;
rc = 0;
} else if (rc > 0 && col->attr.tsync_enable) {
/* always return -ESRCH if we fail to sync threads */
errno = ESRCH;
rc = -errno;
} else if (rc > 0 && state.sup_user_notif > 0) {
/* return 0 on NEW_LISTENER success, but save the fd */
state.notify_fd = rc;
rc = 0;
}
} else
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prgm);
filter_load_out:
/* cleanup and return */
gen_bpf_release(prgm);
if (rc == -ESRCH)
return -ESRCH;
if (rc < 0)
return (rawrc ? -errno : -ECANCELED);
return rc;
}
/**
* Return the userspace notification fd
*
* This function returns the userspace notification fd from
* SECCOMP_FILTER_FLAG_NEW_LISTENER. If the notification fd has not yet been
* set, or an error has occurred, -1 is returned.
*
*/
int sys_notify_fd(void)
{
return state.notify_fd;
}
/**
* Allocate a pair of notification request/response structures
* @param req the request location
* @param resp the response location
*
* This function allocates a pair of request/response structure by computing
* the correct sized based on the currently running kernel. It returns zero on
* success, and negative values on failure.
*
*/
int sys_notify_alloc(struct seccomp_notif **req,
struct seccomp_notif_resp **resp)
{
int rc;
static struct seccomp_notif_sizes sizes = { 0, 0, 0 };
if (state.sup_syscall <= 0)
return -EOPNOTSUPP;
if (sizes.seccomp_notif == 0 && sizes.seccomp_notif_resp == 0) {
rc = syscall(__NR_seccomp, SECCOMP_GET_NOTIF_SIZES, 0, &sizes);
if (rc < 0)
return -ECANCELED;
}
if (sizes.seccomp_notif == 0 || sizes.seccomp_notif_resp == 0)
return -EFAULT;
if (req) {
*req = zmalloc(sizes.seccomp_notif);
if (!*req)
return -ENOMEM;
}
if (resp) {
*resp = zmalloc(sizes.seccomp_notif_resp);
if (!*resp) {
if (req)
free(*req);
return -ENOMEM;
}
}
return 0;
}
/**
* Receive a notification from a seccomp notification fd
* @param fd the notification fd
* @param req the request buffer to save into
*
* Blocks waiting for a notification on this fd. This function is thread safe
* (synchronization is performed in the kernel). Returns zero on success,
* negative values on error.
*
*/
int sys_notify_receive(int fd, struct seccomp_notif *req)
{
if (state.sup_user_notif <= 0)
return -EOPNOTSUPP;
if (ioctl(fd, SECCOMP_IOCTL_NOTIF_RECV, req) < 0)
return -ECANCELED;
return 0;
}
/**
* Send a notification response to a seccomp notification fd
* @param fd the notification fd
* @param resp the response buffer to use
*
* Sends a notification response on this fd. This function is thread safe
* (synchronization is performed in the kernel). Returns zero on success,
* negative values on error.
*
*/
int sys_notify_respond(int fd, struct seccomp_notif_resp *resp)
{
if (state.sup_user_notif <= 0)
return -EOPNOTSUPP;
if (ioctl(fd, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0)
return -ECANCELED;
return 0;
}
/**
* Check if a notification id is still valid
* @param fd the notification fd
* @param id the id to test
*
* Checks to see if a notification id is still valid. Returns 0 on success, and
* negative values on failure.
*
*/
int sys_notify_id_valid(int fd, uint64_t id)
{
if (state.sup_user_notif <= 0)
return -EOPNOTSUPP;
if (ioctl(fd, SECCOMP_IOCTL_NOTIF_ID_VALID, &id) < 0)
return -ENOENT;
return 0;
}