Blame bubblewrap.c

Packit Service fee338
/* bubblewrap
Packit Service fee338
 * Copyright (C) 2016 Alexander Larsson
Packit Service fee338
 *
Packit Service fee338
 * This program is free software; you can redistribute it and/or
Packit Service fee338
 * modify it under the terms of the GNU Lesser General Public
Packit Service fee338
 * License as published by the Free Software Foundation; either
Packit Service fee338
 * version 2 of the License, or (at your option) any later version.
Packit Service fee338
 *
Packit Service fee338
 * This library is distributed in the hope that it will be useful,
Packit Service fee338
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service fee338
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Packit Service fee338
 * Lesser General Public License for more details.
Packit Service fee338
 *
Packit Service fee338
 * You should have received a copy of the GNU Lesser General Public
Packit Service fee338
 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
Packit Service fee338
 *
Packit Service fee338
 */
Packit Service fee338
Packit Service fee338
#include "config.h"
Packit Service fee338
Packit Service fee338
#include <poll.h>
Packit Service fee338
#include <sched.h>
Packit Service fee338
#include <pwd.h>
Packit Service fee338
#include <grp.h>
Packit Service fee338
#include <sys/mount.h>
Packit Service fee338
#include <sys/socket.h>
Packit Service fee338
#include <sys/wait.h>
Packit Service fee338
#include <sys/eventfd.h>
Packit Service fee338
#include <sys/fsuid.h>
Packit Service fee338
#include <sys/signalfd.h>
Packit Service fee338
#include <sys/capability.h>
Packit Service fee338
#include <sys/prctl.h>
Packit Service fee338
#include <linux/sched.h>
Packit Service fee338
#include <linux/seccomp.h>
Packit Service fee338
#include <linux/filter.h>
Packit Service fee338
Packit Service fee338
#include "utils.h"
Packit Service fee338
#include "network.h"
Packit Service fee338
#include "bind-mount.h"
Packit Service fee338
Packit Service fee338
#ifndef CLONE_NEWCGROUP
Packit Service fee338
#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
Packit Service fee338
#endif
Packit Service fee338
Packit Service fee338
#ifndef TEMP_FAILURE_RETRY
Packit Service fee338
#define TEMP_FAILURE_RETRY(expression) \
Packit Service fee338
  (__extension__                                                              \
Packit Service fee338
    ({ long int __result;                                                     \
Packit Service fee338
       do __result = (long int) (expression);                                 \
Packit Service fee338
       while (__result == -1L && errno == EINTR);                             \
Packit Service fee338
       __result; }))
Packit Service fee338
#endif
Packit Service fee338
Packit Service fee338
/* Globals to avoid having to use getuid(), since the uid/gid changes during runtime */
Packit Service fee338
static uid_t real_uid;
Packit Service fee338
static gid_t real_gid;
Packit Service fee338
static uid_t overflow_uid;
Packit Service fee338
static gid_t overflow_gid;
Packit Service fee338
static bool is_privileged; /* See acquire_privs() */
Packit Service fee338
static const char *argv0;
Packit Service fee338
static const char *host_tty_dev;
Packit Service fee338
static int proc_fd = -1;
Packit Service fee338
static const char *opt_exec_label = NULL;
Packit Service fee338
static const char *opt_file_label = NULL;
Packit Service fee338
static bool opt_as_pid_1;
Packit Service fee338
Packit Service fee338
const char *opt_chdir_path = NULL;
Packit Service fee338
bool opt_unshare_user = FALSE;
Packit Service fee338
bool opt_unshare_user_try = FALSE;
Packit Service fee338
bool opt_unshare_pid = FALSE;
Packit Service fee338
bool opt_unshare_ipc = FALSE;
Packit Service fee338
bool opt_unshare_net = FALSE;
Packit Service fee338
bool opt_unshare_uts = FALSE;
Packit Service fee338
bool opt_unshare_cgroup = FALSE;
Packit Service fee338
bool opt_unshare_cgroup_try = FALSE;
Packit Service fee338
bool opt_needs_devpts = FALSE;
Packit Service fee338
bool opt_new_session = FALSE;
Packit Service fee338
bool opt_die_with_parent = FALSE;
Packit Service fee338
uid_t opt_sandbox_uid = -1;
Packit Service fee338
gid_t opt_sandbox_gid = -1;
Packit Service fee338
int opt_sync_fd = -1;
Packit Service fee338
int opt_block_fd = -1;
Packit Service fee338
int opt_userns_block_fd = -1;
Packit Service fee338
int opt_info_fd = -1;
Packit Service fee338
int opt_json_status_fd = -1;
Packit Service fee338
int opt_seccomp_fd = -1;
Packit Service fee338
const char *opt_sandbox_hostname = NULL;
Packit Service fee338
char *opt_args_data = NULL;  /* owned */
Packit Service fee338
int opt_userns_fd = -1;
Packit Service fee338
int opt_userns2_fd = -1;
Packit Service fee338
int opt_pidns_fd = -1;
Packit Service fee338
Packit Service fee338
#define CAP_TO_MASK_0(x) (1L << ((x) & 31))
Packit Service fee338
#define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32)
Packit Service fee338
Packit Service fee338
typedef struct _NsInfo NsInfo;
Packit Service fee338
Packit Service fee338
struct _NsInfo {
Packit Service fee338
  const char *name;
Packit Service fee338
  bool       *do_unshare;
Packit Service fee338
  ino_t       id;
Packit Service fee338
};
Packit Service fee338
Packit Service fee338
static NsInfo ns_infos[] = {
Packit Service fee338
  {"cgroup", &opt_unshare_cgroup, 0},
Packit Service fee338
  {"ipc",    &opt_unshare_ipc,    0},
Packit Service fee338
  {"mnt",    NULL,                0},
Packit Service fee338
  {"net",    &opt_unshare_net,    0},
Packit Service fee338
  {"pid",    &opt_unshare_pid,    0},
Packit Service fee338
  /* user namespace info omitted because it
Packit Service fee338
   * is not (yet) valid when we obtain the
Packit Service fee338
   * namespace info (get un-shared later) */
Packit Service fee338
  {"uts",    &opt_unshare_uts,    0},
Packit Service fee338
  {NULL,     NULL,                0}
Packit Service fee338
};
Packit Service fee338
Packit Service fee338
typedef enum {
Packit Service fee338
  SETUP_BIND_MOUNT,
Packit Service fee338
  SETUP_RO_BIND_MOUNT,
Packit Service fee338
  SETUP_DEV_BIND_MOUNT,
Packit Service fee338
  SETUP_MOUNT_PROC,
Packit Service fee338
  SETUP_MOUNT_DEV,
Packit Service fee338
  SETUP_MOUNT_TMPFS,
Packit Service fee338
  SETUP_MOUNT_MQUEUE,
Packit Service fee338
  SETUP_MAKE_DIR,
Packit Service fee338
  SETUP_MAKE_FILE,
Packit Service fee338
  SETUP_MAKE_BIND_FILE,
Packit Service fee338
  SETUP_MAKE_RO_BIND_FILE,
Packit Service fee338
  SETUP_MAKE_SYMLINK,
Packit Service fee338
  SETUP_REMOUNT_RO_NO_RECURSIVE,
Packit Service fee338
  SETUP_SET_HOSTNAME,
Packit Service fee338
} SetupOpType;
Packit Service fee338
Packit Service fee338
typedef enum {
Packit Service fee338
  NO_CREATE_DEST = (1 << 0),
Packit Service fee338
  ALLOW_NOTEXIST = (2 << 0),
Packit Service fee338
} SetupOpFlag;
Packit Service fee338
Packit Service fee338
typedef struct _SetupOp SetupOp;
Packit Service fee338
Packit Service fee338
struct _SetupOp
Packit Service fee338
{
Packit Service fee338
  SetupOpType type;
Packit Service fee338
  const char *source;
Packit Service fee338
  const char *dest;
Packit Service fee338
  int         fd;
Packit Service fee338
  SetupOpFlag flags;
Packit Service fee338
  SetupOp    *next;
Packit Service fee338
};
Packit Service fee338
Packit Service fee338
typedef struct _LockFile LockFile;
Packit Service fee338
Packit Service fee338
struct _LockFile
Packit Service fee338
{
Packit Service fee338
  const char *path;
Packit Service fee338
  int         fd;
Packit Service fee338
  LockFile   *next;
Packit Service fee338
};
Packit Service fee338
Packit Service fee338
static SetupOp *ops = NULL;
Packit Service fee338
static SetupOp *last_op = NULL;
Packit Service fee338
static LockFile *lock_files = NULL;
Packit Service fee338
static LockFile *last_lock_file = NULL;
Packit Service fee338
Packit Service fee338
enum {
Packit Service fee338
  PRIV_SEP_OP_DONE,
Packit Service fee338
  PRIV_SEP_OP_BIND_MOUNT,
Packit Service fee338
  PRIV_SEP_OP_PROC_MOUNT,
Packit Service fee338
  PRIV_SEP_OP_TMPFS_MOUNT,
Packit Service fee338
  PRIV_SEP_OP_DEVPTS_MOUNT,
Packit Service fee338
  PRIV_SEP_OP_MQUEUE_MOUNT,
Packit Service fee338
  PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE,
Packit Service fee338
  PRIV_SEP_OP_SET_HOSTNAME,
Packit Service fee338
};
Packit Service fee338
Packit Service fee338
typedef struct
Packit Service fee338
{
Packit Service fee338
  uint32_t op;
Packit Service fee338
  uint32_t flags;
Packit Service fee338
  uint32_t arg1_offset;
Packit Service fee338
  uint32_t arg2_offset;
Packit Service fee338
} PrivSepOp;
Packit Service fee338
Packit Service fee338
static SetupOp *
Packit Service fee338
setup_op_new (SetupOpType type)
Packit Service fee338
{
Packit Service fee338
  SetupOp *op = xcalloc (sizeof (SetupOp));
Packit Service fee338
Packit Service fee338
  op->type = type;
Packit Service fee338
  op->fd = -1;
Packit Service fee338
  op->flags = 0;
Packit Service fee338
  if (last_op != NULL)
Packit Service fee338
    last_op->next = op;
Packit Service fee338
  else
Packit Service fee338
    ops = op;
Packit Service fee338
Packit Service fee338
  last_op = op;
Packit Service fee338
  return op;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static LockFile *
Packit Service fee338
lock_file_new (const char *path)
Packit Service fee338
{
Packit Service fee338
  LockFile *lock = xcalloc (sizeof (LockFile));
Packit Service fee338
Packit Service fee338
  lock->path = path;
Packit Service fee338
  if (last_lock_file != NULL)
Packit Service fee338
    last_lock_file->next = lock;
Packit Service fee338
  else
Packit Service fee338
    lock_files = lock;
Packit Service fee338
Packit Service fee338
  last_lock_file = lock;
Packit Service fee338
  return lock;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
usage (int ecode, FILE *out)
Packit Service fee338
{
Packit Service fee338
  fprintf (out, "usage: %s [OPTIONS...] [--] COMMAND [ARGS...]\n\n", argv0);
Packit Service fee338
Packit Service fee338
  fprintf (out,
Packit Service fee338
           "    --help                       Print this help\n"
Packit Service fee338
           "    --version                    Print version\n"
Packit Service fee338
           "    --args FD                    Parse NUL-separated args from FD\n"
Packit Service fee338
           "    --unshare-all                Unshare every namespace we support by default\n"
Packit Service fee338
           "    --share-net                  Retain the network namespace (can only combine with --unshare-all)\n"
Packit Service fee338
           "    --unshare-user               Create new user namespace (may be automatically implied if not setuid)\n"
Packit Service fee338
           "    --unshare-user-try           Create new user namespace if possible else continue by skipping it\n"
Packit Service fee338
           "    --unshare-ipc                Create new ipc namespace\n"
Packit Service fee338
           "    --unshare-pid                Create new pid namespace\n"
Packit Service fee338
           "    --unshare-net                Create new network namespace\n"
Packit Service fee338
           "    --unshare-uts                Create new uts namespace\n"
Packit Service fee338
           "    --unshare-cgroup             Create new cgroup namespace\n"
Packit Service fee338
           "    --unshare-cgroup-try         Create new cgroup namespace if possible else continue by skipping it\n"
Packit Service fee338
           "    --userns FD                  Use this user namespace (cannot combine with --unshare-user)\n"
Packit Service fee338
           "    --userns2 FD                 After setup switch to this user namspace, only useful with --userns\n"
Packit Service fee338
           "    --pidns FD                   Use this user namespace (as parent namespace if using --unshare-pid)\n"
Packit Service fee338
           "    --uid UID                    Custom uid in the sandbox (requires --unshare-user or --userns)\n"
Packit Service fee338
           "    --gid GID                    Custom gid in the sandbox (requires --unshare-user or --userns)\n"
Packit Service fee338
           "    --hostname NAME              Custom hostname in the sandbox (requires --unshare-uts)\n"
Packit Service fee338
           "    --chdir DIR                  Change directory to DIR\n"
Packit Service fee338
           "    --setenv VAR VALUE           Set an environment variable\n"
Packit Service fee338
           "    --unsetenv VAR               Unset an environment variable\n"
Packit Service fee338
           "    --lock-file DEST             Take a lock on DEST while sandbox is running\n"
Packit Service fee338
           "    --sync-fd FD                 Keep this fd open while sandbox is running\n"
Packit Service fee338
           "    --bind SRC DEST              Bind mount the host path SRC on DEST\n"
Packit Service fee338
           "    --bind-try SRC DEST          Equal to --bind but ignores non-existent SRC\n"
Packit Service fee338
           "    --dev-bind SRC DEST          Bind mount the host path SRC on DEST, allowing device access\n"
Packit Service fee338
           "    --dev-bind-try SRC DEST      Equal to --dev-bind but ignores non-existent SRC\n"
Packit Service fee338
           "    --ro-bind SRC DEST           Bind mount the host path SRC readonly on DEST\n"
Packit Service fee338
           "    --ro-bind-try SRC DEST       Equal to --ro-bind but ignores non-existent SRC\n"
Packit Service fee338
           "    --remount-ro DEST            Remount DEST as readonly; does not recursively remount\n"
Packit Service fee338
           "    --exec-label LABEL           Exec label for the sandbox\n"
Packit Service fee338
           "    --file-label LABEL           File label for temporary sandbox content\n"
Packit Service fee338
           "    --proc DEST                  Mount new procfs on DEST\n"
Packit Service fee338
           "    --dev DEST                   Mount new dev on DEST\n"
Packit Service fee338
           "    --tmpfs DEST                 Mount new tmpfs on DEST\n"
Packit Service fee338
           "    --mqueue DEST                Mount new mqueue on DEST\n"
Packit Service fee338
           "    --dir DEST                   Create dir at DEST\n"
Packit Service fee338
           "    --file FD DEST               Copy from FD to destination DEST\n"
Packit Service fee338
           "    --bind-data FD DEST          Copy from FD to file which is bind-mounted on DEST\n"
Packit Service fee338
           "    --ro-bind-data FD DEST       Copy from FD to file which is readonly bind-mounted on DEST\n"
Packit Service fee338
           "    --symlink SRC DEST           Create symlink at DEST with target SRC\n"
Packit Service fee338
           "    --seccomp FD                 Load and use seccomp rules from FD\n"
Packit Service fee338
           "    --block-fd FD                Block on FD until some data to read is available\n"
Packit Service fee338
           "    --userns-block-fd FD         Block on FD until the user namespace is ready\n"
Packit Service fee338
           "    --info-fd FD                 Write information about the running container to FD\n"
Packit Service fee338
           "    --json-status-fd FD          Write container status to FD as multiple JSON documents\n"
Packit Service fee338
           "    --new-session                Create a new terminal session\n"
Packit Service fee338
           "    --die-with-parent            Kills with SIGKILL child process (COMMAND) when bwrap or bwrap's parent dies.\n"
Packit Service fee338
           "    --as-pid-1                   Do not install a reaper process with PID=1\n"
Packit Service fee338
           "    --cap-add CAP                Add cap CAP when running as privileged user\n"
Packit Service fee338
           "    --cap-drop CAP               Drop cap CAP when running as privileged user\n"
Packit Service fee338
          );
Packit Service fee338
  exit (ecode);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* If --die-with-parent was specified, use PDEATHSIG to ensure SIGKILL
Packit Service fee338
 * is sent to the current process when our parent dies.
Packit Service fee338
 */
Packit Service fee338
static void
Packit Service fee338
handle_die_with_parent (void)
Packit Service fee338
{
Packit Service fee338
  if (opt_die_with_parent && prctl (PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) != 0)
Packit Service fee338
    die_with_error ("prctl");
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
block_sigchild (void)
Packit Service fee338
{
Packit Service fee338
  sigset_t mask;
Packit Service fee338
  int status;
Packit Service fee338
Packit Service fee338
  sigemptyset (&mask);
Packit Service fee338
  sigaddset (&mask, SIGCHLD);
Packit Service fee338
Packit Service fee338
  if (sigprocmask (SIG_BLOCK, &mask, NULL) == -1)
Packit Service fee338
    die_with_error ("sigprocmask");
Packit Service fee338
Packit Service fee338
  /* Reap any outstanding zombies that we may have inherited */
Packit Service fee338
  while (waitpid (-1, &status, WNOHANG) > 0)
Packit Service fee338
    ;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
unblock_sigchild (void)
Packit Service fee338
{
Packit Service fee338
  sigset_t mask;
Packit Service fee338
Packit Service fee338
  sigemptyset (&mask);
Packit Service fee338
  sigaddset (&mask, SIGCHLD);
Packit Service fee338
Packit Service fee338
  if (sigprocmask (SIG_UNBLOCK, &mask, NULL) == -1)
Packit Service fee338
    die_with_error ("sigprocmask");
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* Closes all fd:s except 0,1,2 and the passed in array of extra fds */
Packit Service fee338
static int
Packit Service fee338
close_extra_fds (void *data, int fd)
Packit Service fee338
{
Packit Service fee338
  int *extra_fds = (int *) data;
Packit Service fee338
  int i;
Packit Service fee338
Packit Service fee338
  for (i = 0; extra_fds[i] != -1; i++)
Packit Service fee338
    if (fd == extra_fds[i])
Packit Service fee338
      return 0;
Packit Service fee338
Packit Service fee338
  if (fd <= 2)
Packit Service fee338
    return 0;
Packit Service fee338
Packit Service fee338
  close (fd);
Packit Service fee338
  return 0;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static int
Packit Service fee338
propagate_exit_status (int status)
Packit Service fee338
{
Packit Service fee338
  if (WIFEXITED (status))
Packit Service fee338
    return WEXITSTATUS (status);
Packit Service fee338
Packit Service fee338
  /* The process died of a signal, we can't really report that, but we
Packit Service fee338
   * can at least be bash-compatible. The bash manpage says:
Packit Service fee338
   *   The return value of a simple command is its
Packit Service fee338
   *   exit status, or 128+n if the command is
Packit Service fee338
   *   terminated by signal n.
Packit Service fee338
   */
Packit Service fee338
  if (WIFSIGNALED (status))
Packit Service fee338
    return 128 + WTERMSIG (status);
Packit Service fee338
Packit Service fee338
  /* Weird? */
Packit Service fee338
  return 255;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
dump_info (int fd, const char *output, bool exit_on_error)
Packit Service fee338
{
Packit Service fee338
  size_t len = strlen (output);
Packit Service fee338
  if (write_to_fd (fd, output, len))
Packit Service fee338
    {
Packit Service fee338
      if (exit_on_error)
Packit Service fee338
        die_with_error ("Write to info_fd");
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
report_child_exit_status (int exitc, int setup_finished_fd)
Packit Service fee338
{
Packit Service fee338
  ssize_t s;
Packit Service fee338
  char data[2];
Packit Service fee338
  cleanup_free char *output = NULL;
Packit Service fee338
  if (opt_json_status_fd == -1 || setup_finished_fd == -1)
Packit Service fee338
    return;
Packit Service fee338
Packit Service fee338
  s = TEMP_FAILURE_RETRY (read (setup_finished_fd, data, sizeof data));
Packit Service fee338
  if (s == -1 && errno != EAGAIN)
Packit Service fee338
    die_with_error ("read eventfd");
Packit Service fee338
  if (s != 1) // Is 0 if pipe closed before exec, is 2 if closed after exec.
Packit Service fee338
    return;
Packit Service fee338
Packit Service fee338
  output = xasprintf ("{ \"exit-code\": %i }\n", exitc);
Packit Service fee338
  dump_info (opt_json_status_fd, output, FALSE);
Packit Service fee338
  close (opt_json_status_fd);
Packit Service fee338
  opt_json_status_fd = -1;
Packit Service fee338
  close (setup_finished_fd);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* This stays around for as long as the initial process in the app does
Packit Service fee338
 * and when that exits it exits, propagating the exit status. We do this
Packit Service fee338
 * by having pid 1 in the sandbox detect this exit and tell the monitor
Packit Service fee338
 * the exit status via a eventfd. We also track the exit of the sandbox
Packit Service fee338
 * pid 1 via a signalfd for SIGCHLD, and exit with an error in this case.
Packit Service fee338
 * This is to catch e.g. problems during setup. */
Packit Service fee338
static int
Packit Service fee338
monitor_child (int event_fd, pid_t child_pid, int setup_finished_fd)
Packit Service fee338
{
Packit Service fee338
  int res;
Packit Service fee338
  uint64_t val;
Packit Service fee338
  ssize_t s;
Packit Service fee338
  int signal_fd;
Packit Service fee338
  sigset_t mask;
Packit Service fee338
  struct pollfd fds[2];
Packit Service fee338
  int num_fds;
Packit Service fee338
  struct signalfd_siginfo fdsi;
Packit Service fee338
  int dont_close[] = {-1, -1, -1, -1};
Packit Service fee338
  int j = 0;
Packit Service fee338
  int exitc;
Packit Service fee338
  pid_t died_pid;
Packit Service fee338
  int died_status;
Packit Service fee338
Packit Service fee338
  /* Close all extra fds in the monitoring process.
Packit Service fee338
     Any passed in fds have been passed on to the child anyway. */
Packit Service fee338
  if (event_fd != -1)
Packit Service fee338
    dont_close[j++] = event_fd;
Packit Service fee338
  if (opt_json_status_fd != -1)
Packit Service fee338
    dont_close[j++] = opt_json_status_fd;
Packit Service fee338
  if (setup_finished_fd != -1)
Packit Service fee338
    dont_close[j++] = setup_finished_fd;
Packit Service fee338
  assert (j < sizeof(dont_close)/sizeof(*dont_close));
Packit Service fee338
  fdwalk (proc_fd, close_extra_fds, dont_close);
Packit Service fee338
Packit Service fee338
  sigemptyset (&mask);
Packit Service fee338
  sigaddset (&mask, SIGCHLD);
Packit Service fee338
Packit Service fee338
  signal_fd = signalfd (-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK);
Packit Service fee338
  if (signal_fd == -1)
Packit Service fee338
    die_with_error ("Can't create signalfd");
Packit Service fee338
Packit Service fee338
  num_fds = 1;
Packit Service fee338
  fds[0].fd = signal_fd;
Packit Service fee338
  fds[0].events = POLLIN;
Packit Service fee338
  if (event_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      fds[1].fd = event_fd;
Packit Service fee338
      fds[1].events = POLLIN;
Packit Service fee338
      num_fds++;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  while (1)
Packit Service fee338
    {
Packit Service fee338
      fds[0].revents = fds[1].revents = 0;
Packit Service fee338
      res = poll (fds, num_fds, -1);
Packit Service fee338
      if (res == -1 && errno != EINTR)
Packit Service fee338
        die_with_error ("poll");
Packit Service fee338
Packit Service fee338
      /* Always read from the eventfd first, if pid 2 died then pid 1 often
Packit Service fee338
       * dies too, and we could race, reporting that first and we'd lose
Packit Service fee338
       * the real exit status. */
Packit Service fee338
      if (event_fd != -1)
Packit Service fee338
        {
Packit Service fee338
          s = read (event_fd, &val, 8);
Packit Service fee338
          if (s == -1 && errno != EINTR && errno != EAGAIN)
Packit Service fee338
            die_with_error ("read eventfd");
Packit Service fee338
          else if (s == 8)
Packit Service fee338
            {
Packit Service fee338
              exitc = (int) val - 1;
Packit Service fee338
              report_child_exit_status (exitc, setup_finished_fd);
Packit Service fee338
              return exitc;
Packit Service fee338
            }
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* We need to read the signal_fd, or it will keep polling as read,
Packit Service fee338
       * however we ignore the details as we get them from waitpid
Packit Service fee338
       * below anyway */
Packit Service fee338
      s = read (signal_fd, &fdsi, sizeof (struct signalfd_siginfo));
Packit Service fee338
      if (s == -1 && errno != EINTR && errno != EAGAIN)
Packit Service fee338
        die_with_error ("read signalfd");
Packit Service fee338
Packit Service fee338
      /* We may actually get several sigchld compressed into one
Packit Service fee338
         SIGCHLD, so we have to handle all of them. */
Packit Service fee338
      while ((died_pid = waitpid (-1, &died_status, WNOHANG)) > 0)
Packit Service fee338
        {
Packit Service fee338
          /* We may be getting sigchild from other children too. For instance if
Packit Service fee338
             someone created a child process, and then exec:ed bubblewrap. Ignore them */
Packit Service fee338
          if (died_pid == child_pid)
Packit Service fee338
            {
Packit Service fee338
              exitc = propagate_exit_status (died_status);
Packit Service fee338
              report_child_exit_status (exitc, setup_finished_fd);
Packit Service fee338
              return exitc;
Packit Service fee338
            }
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  die ("Should not be reached");
Packit Service fee338
Packit Service fee338
  return 0;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* This is pid 1 in the app sandbox. It is needed because we're using
Packit Service fee338
 * pid namespaces, and someone has to reap zombies in it. We also detect
Packit Service fee338
 * when the initial process (pid 2) dies and report its exit status to
Packit Service fee338
 * the monitor so that it can return it to the original spawner.
Packit Service fee338
 *
Packit Service fee338
 * When there are no other processes in the sandbox the wait will return
Packit Service fee338
 * ECHILD, and we then exit pid 1 to clean up the sandbox. */
Packit Service fee338
static int
Packit Service fee338
do_init (int event_fd, pid_t initial_pid, struct sock_fprog *seccomp_prog)
Packit Service fee338
{
Packit Service fee338
  int initial_exit_status = 1;
Packit Service fee338
  LockFile *lock;
Packit Service fee338
Packit Service fee338
  for (lock = lock_files; lock != NULL; lock = lock->next)
Packit Service fee338
    {
Packit Service fee338
      int fd = open (lock->path, O_RDONLY | O_CLOEXEC);
Packit Service fee338
      if (fd == -1)
Packit Service fee338
        die_with_error ("Unable to open lock file %s", lock->path);
Packit Service fee338
Packit Service fee338
      struct flock l = {
Packit Service fee338
        .l_type = F_RDLCK,
Packit Service fee338
        .l_whence = SEEK_SET,
Packit Service fee338
        .l_start = 0,
Packit Service fee338
        .l_len = 0
Packit Service fee338
      };
Packit Service fee338
Packit Service fee338
      if (fcntl (fd, F_SETLK, &l) < 0)
Packit Service fee338
        die_with_error ("Unable to lock file %s", lock->path);
Packit Service fee338
Packit Service fee338
      /* Keep fd open to hang on to lock */
Packit Service fee338
      lock->fd = fd;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Optionally bind our lifecycle to that of the caller */
Packit Service fee338
  handle_die_with_parent ();
Packit Service fee338
Packit Service fee338
  if (seccomp_prog != NULL &&
Packit Service fee338
      prctl (PR_SET_SECCOMP, SECCOMP_MODE_FILTER, seccomp_prog) != 0)
Packit Service fee338
    die_with_error ("prctl(PR_SET_SECCOMP)");
Packit Service fee338
Packit Service fee338
  while (TRUE)
Packit Service fee338
    {
Packit Service fee338
      pid_t child;
Packit Service fee338
      int status;
Packit Service fee338
Packit Service fee338
      child = wait (&status);
Packit Service fee338
      if (child == initial_pid && event_fd != -1)
Packit Service fee338
        {
Packit Service fee338
          uint64_t val;
Packit Service fee338
          int res UNUSED;
Packit Service fee338
Packit Service fee338
          initial_exit_status = propagate_exit_status (status);
Packit Service fee338
Packit Service fee338
          val = initial_exit_status + 1;
Packit Service fee338
          res = write (event_fd, &val, 8);
Packit Service fee338
          /* Ignore res, if e.g. the parent died and closed event_fd
Packit Service fee338
             we don't want to error out here */
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (child == -1 && errno != EINTR)
Packit Service fee338
        {
Packit Service fee338
          if (errno != ECHILD)
Packit Service fee338
            die_with_error ("init wait()");
Packit Service fee338
          break;
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Close FDs. */
Packit Service fee338
  for (lock = lock_files; lock != NULL; lock = lock->next)
Packit Service fee338
    {
Packit Service fee338
      if (lock->fd >= 0)
Packit Service fee338
        {
Packit Service fee338
          close (lock->fd);
Packit Service fee338
          lock->fd = -1;
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  return initial_exit_status;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
#define CAP_TO_MASK_0(x) (1L << ((x) & 31))
Packit Service fee338
#define CAP_TO_MASK_1(x) CAP_TO_MASK_0(x - 32)
Packit Service fee338
Packit Service fee338
/* Set if --cap-add or --cap-drop were used */
Packit Service fee338
static bool opt_cap_add_or_drop_used;
Packit Service fee338
/* The capability set we'll target, used if above is true */
Packit Service fee338
static uint32_t requested_caps[2] = {0, 0};
Packit Service fee338
Packit Service fee338
/* low 32bit caps needed */
Packit Service fee338
/* CAP_SYS_PTRACE is needed to dereference the symlinks in /proc/<pid>/ns/, see namespaces(7) */
Packit Service fee338
#define REQUIRED_CAPS_0 (CAP_TO_MASK_0 (CAP_SYS_ADMIN) | CAP_TO_MASK_0 (CAP_SYS_CHROOT) | CAP_TO_MASK_0 (CAP_NET_ADMIN) | CAP_TO_MASK_0 (CAP_SETUID) | CAP_TO_MASK_0 (CAP_SETGID) | CAP_TO_MASK_0 (CAP_SYS_PTRACE))
Packit Service fee338
/* high 32bit caps needed */
Packit Service fee338
#define REQUIRED_CAPS_1 0
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
set_required_caps (void)
Packit Service fee338
{
Packit Service fee338
  struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
Packit Service fee338
  struct __user_cap_data_struct data[2] = { { 0 } };
Packit Service fee338
Packit Service fee338
  /* Drop all non-require capabilities */
Packit Service fee338
  data[0].effective = REQUIRED_CAPS_0;
Packit Service fee338
  data[0].permitted = REQUIRED_CAPS_0;
Packit Service fee338
  data[0].inheritable = 0;
Packit Service fee338
  data[1].effective = REQUIRED_CAPS_1;
Packit Service fee338
  data[1].permitted = REQUIRED_CAPS_1;
Packit Service fee338
  data[1].inheritable = 0;
Packit Service fee338
  if (capset (&hdr, data) < 0)
Packit Service fee338
    die_with_error ("capset failed");
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
drop_all_caps (bool keep_requested_caps)
Packit Service fee338
{
Packit Service fee338
  struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
Packit Service fee338
  struct __user_cap_data_struct data[2] = { { 0 } };
Packit Service fee338
Packit Service fee338
  if (keep_requested_caps)
Packit Service fee338
    {
Packit Service fee338
      /* Avoid calling capset() unless we need to; currently
Packit Service fee338
       * systemd-nspawn at least is known to install a seccomp
Packit Service fee338
       * policy denying capset() for dubious reasons.
Packit Service fee338
       * <https://github.com/projectatomic/bubblewrap/pull/122>
Packit Service fee338
       */
Packit Service fee338
      if (!opt_cap_add_or_drop_used && real_uid == 0)
Packit Service fee338
        {
Packit Service fee338
          assert (!is_privileged);
Packit Service fee338
          return;
Packit Service fee338
        }
Packit Service fee338
      data[0].effective = requested_caps[0];
Packit Service fee338
      data[0].permitted = requested_caps[0];
Packit Service fee338
      data[0].inheritable = requested_caps[0];
Packit Service fee338
      data[1].effective = requested_caps[1];
Packit Service fee338
      data[1].permitted = requested_caps[1];
Packit Service fee338
      data[1].inheritable = requested_caps[1];
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (capset (&hdr, data) < 0)
Packit Service fee338
    {
Packit Service fee338
      /* While the above logic ensures we don't call capset() for the primary
Packit Service fee338
       * process unless configured to do so, we still try to drop privileges for
Packit Service fee338
       * the init process unconditionally. Since due to the systemd seccomp
Packit Service fee338
       * filter that will fail, let's just ignore it.
Packit Service fee338
       */
Packit Service fee338
      if (errno == EPERM && real_uid == 0 && !is_privileged)
Packit Service fee338
        return;
Packit Service fee338
      else
Packit Service fee338
        die_with_error ("capset failed");
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static bool
Packit Service fee338
has_caps (void)
Packit Service fee338
{
Packit Service fee338
  struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
Packit Service fee338
  struct __user_cap_data_struct data[2] = { { 0 } };
Packit Service fee338
Packit Service fee338
  if (capget (&hdr, data)  < 0)
Packit Service fee338
    die_with_error ("capget failed");
Packit Service fee338
Packit Service fee338
  return data[0].permitted != 0 || data[1].permitted != 0;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* Most of the code here is used both to add caps to the ambient capabilities
Packit Service fee338
 * and drop caps from the bounding set.  Handle both cases here and add
Packit Service fee338
 * drop_cap_bounding_set/set_ambient_capabilities wrappers to facilitate its usage.
Packit Service fee338
 */
Packit Service fee338
static void
Packit Service fee338
prctl_caps (uint32_t *caps, bool do_cap_bounding, bool do_set_ambient)
Packit Service fee338
{
Packit Service fee338
  unsigned long cap;
Packit Service fee338
Packit Service fee338
  /* We ignore both EINVAL and EPERM, as we are actually relying
Packit Service fee338
   * on PR_SET_NO_NEW_PRIVS to ensure the right capabilities are
Packit Service fee338
   * available.  EPERM in particular can happen with old, buggy
Packit Service fee338
   * kernels.  See:
Packit Service fee338
   *  https://github.com/projectatomic/bubblewrap/pull/175#issuecomment-278051373
Packit Service fee338
   *  https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/security/commoncap.c?id=160da84dbb39443fdade7151bc63a88f8e953077
Packit Service fee338
   */
Packit Service fee338
  for (cap = 0; cap <= CAP_LAST_CAP; cap++)
Packit Service fee338
    {
Packit Service fee338
      bool keep = FALSE;
Packit Service fee338
      if (cap < 32)
Packit Service fee338
        {
Packit Service fee338
          if (CAP_TO_MASK_0 (cap) & caps[0])
Packit Service fee338
            keep = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else
Packit Service fee338
        {
Packit Service fee338
          if (CAP_TO_MASK_1 (cap) & caps[1])
Packit Service fee338
            keep = TRUE;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (keep && do_set_ambient)
Packit Service fee338
        {
Packit Service fee338
#ifdef PR_CAP_AMBIENT
Packit Service fee338
          int res = prctl (PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0);
Packit Service fee338
          if (res == -1 && !(errno == EINVAL || errno == EPERM))
Packit Service fee338
            die_with_error ("Adding ambient capability %ld", cap);
Packit Service fee338
#else
Packit Service fee338
          /* We ignore the EINVAL that results from not having PR_CAP_AMBIENT
Packit Service fee338
           * in the current kernel at runtime, so also ignore not having it
Packit Service fee338
           * in the current kernel headers at compile-time */
Packit Service fee338
#endif
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (!keep && do_cap_bounding)
Packit Service fee338
        {
Packit Service fee338
          int res = prctl (PR_CAPBSET_DROP, cap, 0, 0, 0);
Packit Service fee338
          if (res == -1 && !(errno == EINVAL || errno == EPERM))
Packit Service fee338
            die_with_error ("Dropping capability %ld from bounds", cap);
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
drop_cap_bounding_set (bool drop_all)
Packit Service fee338
{
Packit Service fee338
  if (!drop_all)
Packit Service fee338
    prctl_caps (requested_caps, TRUE, FALSE);
Packit Service fee338
  else
Packit Service fee338
    {
Packit Service fee338
      uint32_t no_caps[2] = {0, 0};
Packit Service fee338
      prctl_caps (no_caps, TRUE, FALSE);
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
set_ambient_capabilities (void)
Packit Service fee338
{
Packit Service fee338
  if (is_privileged)
Packit Service fee338
    return;
Packit Service fee338
  prctl_caps (requested_caps, FALSE, TRUE);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* This acquires the privileges that the bwrap will need it to work.
Packit Service fee338
 * If bwrap is not setuid, then this does nothing, and it relies on
Packit Service fee338
 * unprivileged user namespaces to be used. This case is
Packit Service fee338
 * "is_privileged = FALSE".
Packit Service fee338
 *
Packit Service fee338
 * If bwrap is setuid, then we do things in phases.
Packit Service fee338
 * The first part is run as euid 0, but with fsuid as the real user.
Packit Service fee338
 * The second part, inside the child, is run as the real user but with
Packit Service fee338
 * capabilities.
Packit Service fee338
 * And finally we drop all capabilities.
Packit Service fee338
 * The reason for the above dance is to avoid having the setup phase
Packit Service fee338
 * being able to read files the user can't, while at the same time
Packit Service fee338
 * working around various kernel issues. See below for details.
Packit Service fee338
 */
Packit Service fee338
static void
Packit Service fee338
acquire_privs (void)
Packit Service fee338
{
Packit Service fee338
  uid_t euid, new_fsuid;
Packit Service fee338
Packit Service fee338
  euid = geteuid ();
Packit Service fee338
Packit Service fee338
  /* Are we setuid ? */
Packit Service fee338
  if (real_uid != euid)
Packit Service fee338
    {
Packit Service fee338
      if (euid != 0)
Packit Service fee338
        die ("Unexpected setuid user %d, should be 0", euid);
Packit Service fee338
Packit Service fee338
      is_privileged = TRUE;
Packit Service fee338
      /* We want to keep running as euid=0 until at the clone()
Packit Service fee338
       * operation because doing so will make the user namespace be
Packit Service fee338
       * owned by root, which makes it not ptrace:able by the user as
Packit Service fee338
       * it otherwise would be. After that we will run fully as the
Packit Service fee338
       * user, which is necessary e.g. to be able to read from a fuse
Packit Service fee338
       * mount from the user.
Packit Service fee338
       *
Packit Service fee338
       * However, we don't want to accidentally mis-use euid=0 for
Packit Service fee338
       * escalated filesystem access before the clone(), so we set
Packit Service fee338
       * fsuid to the uid.
Packit Service fee338
       */
Packit Service fee338
      if (setfsuid (real_uid) < 0)
Packit Service fee338
        die_with_error ("Unable to set fsuid");
Packit Service fee338
Packit Service fee338
      /* setfsuid can't properly report errors, check that it worked (as per manpage) */
Packit Service fee338
      new_fsuid = setfsuid (-1);
Packit Service fee338
      if (new_fsuid != real_uid)
Packit Service fee338
        die ("Unable to set fsuid (was %d)", (int)new_fsuid);
Packit Service fee338
Packit Service fee338
      /* We never need capabilities after execve(), so lets drop everything from the bounding set */
Packit Service fee338
      drop_cap_bounding_set (TRUE);
Packit Service fee338
Packit Service fee338
      /* Keep only the required capabilities for setup */
Packit Service fee338
      set_required_caps ();
Packit Service fee338
    }
Packit Service fee338
  else if (real_uid != 0 && has_caps ())
Packit Service fee338
    {
Packit Service fee338
      /* We have some capabilities in the non-setuid case, which should not happen.
Packit Service fee338
         Probably caused by the binary being setcap instead of setuid which we
Packit Service fee338
         don't support anymore */
Packit Service fee338
      die ("Unexpected capabilities but not setuid, old file caps config?");
Packit Service fee338
    }
Packit Service fee338
  else if (real_uid == 0)
Packit Service fee338
    {
Packit Service fee338
      /* If our uid is 0, default to inheriting all caps; the caller
Packit Service fee338
       * can drop them via --cap-drop.  This is used by at least rpm-ostree.
Packit Service fee338
       * Note this needs to happen before the argument parsing of --cap-drop.
Packit Service fee338
       */
Packit Service fee338
      struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
Packit Service fee338
      struct __user_cap_data_struct data[2] = { { 0 } };
Packit Service fee338
Packit Service fee338
      if (capget (&hdr, data) < 0)
Packit Service fee338
        die_with_error ("capget (for uid == 0) failed");
Packit Service fee338
Packit Service fee338
      requested_caps[0] = data[0].effective;
Packit Service fee338
      requested_caps[1] = data[1].effective;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Else, we try unprivileged user namespaces */
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* This is called once we're inside the namespace */
Packit Service fee338
static void
Packit Service fee338
switch_to_user_with_privs (void)
Packit Service fee338
{
Packit Service fee338
  /* If we're in a new user namespace, we got back the bounding set, clear it again */
Packit Service fee338
  if (opt_unshare_user || opt_userns_fd != -1)
Packit Service fee338
    drop_cap_bounding_set (FALSE);
Packit Service fee338
Packit Service fee338
  /* If we switched to a new user namespace it may allow other uids/gids, so switch to the target one */
Packit Service fee338
  if (opt_userns_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      if (opt_sandbox_uid != real_uid && setuid (opt_sandbox_uid) < 0)
Packit Service fee338
        die_with_error ("unable to switch to uid %d", opt_sandbox_uid);
Packit Service fee338
Packit Service fee338
      if (opt_sandbox_gid != real_gid && setgid (opt_sandbox_gid) < 0)
Packit Service fee338
        die_with_error ("unable to switch to gid %d", opt_sandbox_gid);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (!is_privileged)
Packit Service fee338
    return;
Packit Service fee338
Packit Service fee338
  /* Tell kernel not clear capabilities when later dropping root uid */
Packit Service fee338
  if (prctl (PR_SET_KEEPCAPS, 1, 0, 0, 0) < 0)
Packit Service fee338
    die_with_error ("prctl(PR_SET_KEEPCAPS) failed");
Packit Service fee338
Packit Service fee338
  if (setuid (opt_sandbox_uid) < 0)
Packit Service fee338
    die_with_error ("unable to drop root uid");
Packit Service fee338
Packit Service fee338
  /* Regain effective required capabilities from permitted */
Packit Service fee338
  set_required_caps ();
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* Call setuid() and use capset() to adjust capabilities */
Packit Service fee338
static void
Packit Service fee338
drop_privs (bool keep_requested_caps)
Packit Service fee338
{
Packit Service fee338
  assert (!keep_requested_caps || !is_privileged);
Packit Service fee338
  /* Drop root uid */
Packit Service fee338
  if (geteuid () == 0 && setuid (opt_sandbox_uid) < 0)
Packit Service fee338
    die_with_error ("unable to drop root uid");
Packit Service fee338
Packit Service fee338
  drop_all_caps (keep_requested_caps);
Packit Service fee338
Packit Service fee338
  /* We don't have any privs now, so mark us dumpable which makes /proc/self be owned by the user instead of root */
Packit Service fee338
  if (prctl (PR_SET_DUMPABLE, 1, 0, 0, 0) != 0)
Packit Service fee338
    die_with_error ("can't set dumpable");
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static char *
Packit Service fee338
get_newroot_path (const char *path)
Packit Service fee338
{
Packit Service fee338
  while (*path == '/')
Packit Service fee338
    path++;
Packit Service fee338
  return strconcat ("/newroot/", path);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static char *
Packit Service fee338
get_oldroot_path (const char *path)
Packit Service fee338
{
Packit Service fee338
  while (*path == '/')
Packit Service fee338
    path++;
Packit Service fee338
  return strconcat ("/oldroot/", path);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
write_uid_gid_map (uid_t sandbox_uid,
Packit Service fee338
                   uid_t parent_uid,
Packit Service fee338
                   uid_t sandbox_gid,
Packit Service fee338
                   uid_t parent_gid,
Packit Service fee338
                   pid_t pid,
Packit Service fee338
                   bool  deny_groups,
Packit Service fee338
                   bool  map_root)
Packit Service fee338
{
Packit Service fee338
  cleanup_free char *uid_map = NULL;
Packit Service fee338
  cleanup_free char *gid_map = NULL;
Packit Service fee338
  cleanup_free char *dir = NULL;
Packit Service fee338
  cleanup_fd int dir_fd = -1;
Packit Service fee338
  uid_t old_fsuid = -1;
Packit Service fee338
Packit Service fee338
  if (pid == -1)
Packit Service fee338
    dir = xstrdup ("self");
Packit Service fee338
  else
Packit Service fee338
    dir = xasprintf ("%d", pid);
Packit Service fee338
Packit Service fee338
  dir_fd = openat (proc_fd, dir, O_PATH);
Packit Service fee338
  if (dir_fd < 0)
Packit Service fee338
    die_with_error ("open /proc/%s failed", dir);
Packit Service fee338
Packit Service fee338
  if (map_root && parent_uid != 0 && sandbox_uid != 0)
Packit Service fee338
    uid_map = xasprintf ("0 %d 1\n"
Packit Service fee338
                         "%d %d 1\n", overflow_uid, sandbox_uid, parent_uid);
Packit Service fee338
  else
Packit Service fee338
    uid_map = xasprintf ("%d %d 1\n", sandbox_uid, parent_uid);
Packit Service fee338
Packit Service fee338
  if (map_root && parent_gid != 0 && sandbox_gid != 0)
Packit Service fee338
    gid_map = xasprintf ("0 %d 1\n"
Packit Service fee338
                         "%d %d 1\n", overflow_gid, sandbox_gid, parent_gid);
Packit Service fee338
  else
Packit Service fee338
    gid_map = xasprintf ("%d %d 1\n", sandbox_gid, parent_gid);
Packit Service fee338
Packit Service fee338
  /* We have to be root to be allowed to write to the uid map
Packit Service fee338
   * for setuid apps, so temporary set fsuid to 0 */
Packit Service fee338
  if (is_privileged)
Packit Service fee338
    old_fsuid = setfsuid (0);
Packit Service fee338
Packit Service fee338
  if (write_file_at (dir_fd, "uid_map", uid_map) != 0)
Packit Service fee338
    die_with_error ("setting up uid map");
Packit Service fee338
Packit Service fee338
  if (deny_groups &&
Packit Service fee338
      write_file_at (dir_fd, "setgroups", "deny\n") != 0)
Packit Service fee338
    {
Packit Service fee338
      /* If /proc/[pid]/setgroups does not exist, assume we are
Packit Service fee338
       * running a linux kernel < 3.19, i.e. we live with the
Packit Service fee338
       * vulnerability known as CVE-2014-8989 in older kernels
Packit Service fee338
       * where setgroups does not exist.
Packit Service fee338
       */
Packit Service fee338
      if (errno != ENOENT)
Packit Service fee338
        die_with_error ("error writing to setgroups");
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (write_file_at (dir_fd, "gid_map", gid_map) != 0)
Packit Service fee338
    die_with_error ("setting up gid map");
Packit Service fee338
Packit Service fee338
  if (is_privileged)
Packit Service fee338
    {
Packit Service fee338
      setfsuid (old_fsuid);
Packit Service fee338
      if (setfsuid (-1) != real_uid)
Packit Service fee338
        die ("Unable to re-set fsuid");
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
privileged_op (int         privileged_op_socket,
Packit Service fee338
               uint32_t    op,
Packit Service fee338
               uint32_t    flags,
Packit Service fee338
               const char *arg1,
Packit Service fee338
               const char *arg2)
Packit Service fee338
{
Packit Service fee338
  if (privileged_op_socket != -1)
Packit Service fee338
    {
Packit Service fee338
      uint32_t buffer[2048];  /* 8k, but is int32 to guarantee nice alignment */
Packit Service fee338
      PrivSepOp *op_buffer = (PrivSepOp *) buffer;
Packit Service fee338
      size_t buffer_size = sizeof (PrivSepOp);
Packit Service fee338
      uint32_t arg1_offset = 0, arg2_offset = 0;
Packit Service fee338
Packit Service fee338
      /* We're unprivileged, send this request to the privileged part */
Packit Service fee338
Packit Service fee338
      if (arg1 != NULL)
Packit Service fee338
        {
Packit Service fee338
          arg1_offset = buffer_size;
Packit Service fee338
          buffer_size += strlen (arg1) + 1;
Packit Service fee338
        }
Packit Service fee338
      if (arg2 != NULL)
Packit Service fee338
        {
Packit Service fee338
          arg2_offset = buffer_size;
Packit Service fee338
          buffer_size += strlen (arg2) + 1;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (buffer_size >= sizeof (buffer))
Packit Service fee338
        die ("privilege separation operation to large");
Packit Service fee338
Packit Service fee338
      op_buffer->op = op;
Packit Service fee338
      op_buffer->flags = flags;
Packit Service fee338
      op_buffer->arg1_offset = arg1_offset;
Packit Service fee338
      op_buffer->arg2_offset = arg2_offset;
Packit Service fee338
      if (arg1 != NULL)
Packit Service fee338
        strcpy ((char *) buffer + arg1_offset, arg1);
Packit Service fee338
      if (arg2 != NULL)
Packit Service fee338
        strcpy ((char *) buffer + arg2_offset, arg2);
Packit Service fee338
Packit Service fee338
      if (write (privileged_op_socket, buffer, buffer_size) != buffer_size)
Packit Service fee338
        die ("Can't write to privileged_op_socket");
Packit Service fee338
Packit Service fee338
      if (read (privileged_op_socket, buffer, 1) != 1)
Packit Service fee338
        die ("Can't read from privileged_op_socket");
Packit Service fee338
Packit Service fee338
      return;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /*
Packit Service fee338
   * This runs a privileged request for the unprivileged setup
Packit Service fee338
   * code. Note that since the setup code is unprivileged it is not as
Packit Service fee338
   * trusted, so we need to verify that all requests only affect the
Packit Service fee338
   * child namespace as set up by the privileged parts of the setup,
Packit Service fee338
   * and that all the code is very careful about handling input.
Packit Service fee338
   *
Packit Service fee338
   * This means:
Packit Service fee338
   *  * Bind mounts are safe, since we always use filesystem namespace. They
Packit Service fee338
   *     must be recursive though, as otherwise you can use a non-recursive bind
Packit Service fee338
   *     mount to access an otherwise over-mounted mountpoint.
Packit Service fee338
   *  * Mounting proc, tmpfs, mqueue, devpts in the child namespace is assumed to
Packit Service fee338
   *    be safe.
Packit Service fee338
   *  * Remounting RO (even non-recursive) is safe because it decreases privileges.
Packit Service fee338
   *  * sethostname() is safe only if we set up a UTS namespace
Packit Service fee338
   */
Packit Service fee338
  switch (op)
Packit Service fee338
    {
Packit Service fee338
    case PRIV_SEP_OP_DONE:
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE:
Packit Service fee338
      if (bind_mount (proc_fd, NULL, arg2, BIND_READONLY) != 0)
Packit Service fee338
        die_with_error ("Can't remount readonly on %s", arg2);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_BIND_MOUNT:
Packit Service fee338
      /* We always bind directories recursively, otherwise this would let us
Packit Service fee338
         access files that are otherwise covered on the host */
Packit Service fee338
      if (bind_mount (proc_fd, arg1, arg2, BIND_RECURSIVE | flags) != 0)
Packit Service fee338
        die_with_error ("Can't bind mount %s on %s", arg1, arg2);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_PROC_MOUNT:
Packit Service fee338
      if (mount ("proc", arg1, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL) != 0)
Packit Service fee338
        die_with_error ("Can't mount proc on %s", arg1);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_TMPFS_MOUNT:
Packit Service fee338
      {
Packit Service fee338
        cleanup_free char *opt = label_mount ("mode=0755", opt_file_label);
Packit Service fee338
        if (mount ("tmpfs", arg1, "tmpfs", MS_NOSUID | MS_NODEV, opt) != 0)
Packit Service fee338
          die_with_error ("Can't mount tmpfs on %s", arg1);
Packit Service fee338
        break;
Packit Service fee338
      }
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_DEVPTS_MOUNT:
Packit Service fee338
      if (mount ("devpts", arg1, "devpts", MS_NOSUID | MS_NOEXEC,
Packit Service fee338
                 "newinstance,ptmxmode=0666,mode=620") != 0)
Packit Service fee338
        die_with_error ("Can't mount devpts on %s", arg1);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_MQUEUE_MOUNT:
Packit Service fee338
      if (mount ("mqueue", arg1, "mqueue", 0, NULL) != 0)
Packit Service fee338
        die_with_error ("Can't mount mqueue on %s", arg1);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    case PRIV_SEP_OP_SET_HOSTNAME:
Packit Service fee338
      /* This is checked at the start, but lets verify it here in case
Packit Service fee338
         something manages to send hacked priv-sep operation requests. */
Packit Service fee338
      if (!opt_unshare_uts)
Packit Service fee338
        die ("Refusing to set hostname in original namespace");
Packit Service fee338
      if (sethostname (arg1, strlen(arg1)) != 0)
Packit Service fee338
        die_with_error ("Can't set hostname to %s", arg1);
Packit Service fee338
      break;
Packit Service fee338
Packit Service fee338
    default:
Packit Service fee338
      die ("Unexpected privileged op %d", op);
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* This is run unprivileged in the child namespace but can request
Packit Service fee338
 * some privileged operations (also in the child namespace) via the
Packit Service fee338
 * privileged_op_socket.
Packit Service fee338
 */
Packit Service fee338
static void
Packit Service fee338
setup_newroot (bool unshare_pid,
Packit Service fee338
               int  privileged_op_socket)
Packit Service fee338
{
Packit Service fee338
  SetupOp *op;
Packit Service fee338
Packit Service fee338
  for (op = ops; op != NULL; op = op->next)
Packit Service fee338
    {
Packit Service fee338
      cleanup_free char *source = NULL;
Packit Service fee338
      cleanup_free char *dest = NULL;
Packit Service fee338
      int source_mode = 0;
Packit Service fee338
      int i;
Packit Service fee338
Packit Service fee338
      if (op->source &&
Packit Service fee338
          op->type != SETUP_MAKE_SYMLINK)
Packit Service fee338
        {
Packit Service fee338
          source = get_oldroot_path (op->source);
Packit Service fee338
          source_mode = get_file_mode (source);
Packit Service fee338
          if (source_mode < 0)
Packit Service fee338
            {
Packit Service fee338
              if (op->flags & ALLOW_NOTEXIST && errno == ENOENT)
Packit Service fee338
                continue; /* Ignore and move on */
Packit Service fee338
              die_with_error("Can't get type of source %s", op->source);
Packit Service fee338
            }
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (op->dest &&
Packit Service fee338
          (op->flags & NO_CREATE_DEST) == 0)
Packit Service fee338
        {
Packit Service fee338
          dest = get_newroot_path (op->dest);
Packit Service fee338
          if (mkdir_with_parents (dest, 0755, FALSE) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir parents for %s", op->dest);
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      switch (op->type)
Packit Service fee338
        {
Packit Service fee338
        case SETUP_RO_BIND_MOUNT:
Packit Service fee338
        case SETUP_DEV_BIND_MOUNT:
Packit Service fee338
        case SETUP_BIND_MOUNT:
Packit Service fee338
          if (source_mode == S_IFDIR)
Packit Service fee338
            {
Packit Service fee338
              if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
                die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
            }
Packit Service fee338
          else if (ensure_file (dest, 0666) != 0)
Packit Service fee338
            die_with_error ("Can't create file at %s", op->dest);
Packit Service fee338
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_BIND_MOUNT,
Packit Service fee338
                         (op->type == SETUP_RO_BIND_MOUNT ? BIND_READONLY : 0) |
Packit Service fee338
                         (op->type == SETUP_DEV_BIND_MOUNT ? BIND_DEVICES : 0),
Packit Service fee338
                         source, dest);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_REMOUNT_RO_NO_RECURSIVE:
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_REMOUNT_RO_NO_RECURSIVE, 0, NULL, dest);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MOUNT_PROC:
Packit Service fee338
          if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
Packit Service fee338
          if (unshare_pid || opt_pidns_fd != -1)
Packit Service fee338
            {
Packit Service fee338
              /* Our own procfs */
Packit Service fee338
              privileged_op (privileged_op_socket,
Packit Service fee338
                             PRIV_SEP_OP_PROC_MOUNT, 0,
Packit Service fee338
                             dest, NULL);
Packit Service fee338
            }
Packit Service fee338
          else
Packit Service fee338
            {
Packit Service fee338
              /* Use system procfs, as we share pid namespace anyway */
Packit Service fee338
              privileged_op (privileged_op_socket,
Packit Service fee338
                             PRIV_SEP_OP_BIND_MOUNT, 0,
Packit Service fee338
                             "oldroot/proc", dest);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          /* There are a bunch of weird old subdirs of /proc that could potentially be
Packit Service fee338
             problematic (for instance /proc/sysrq-trigger lets you shut down the machine
Packit Service fee338
             if you have write access). We should not have access to these as a non-privileged
Packit Service fee338
             user, but lets cover them anyway just to make sure */
Packit Service fee338
          const char *cover_proc_dirs[] = { "sys", "sysrq-trigger", "irq", "bus" };
Packit Service fee338
          for (i = 0; i < N_ELEMENTS (cover_proc_dirs); i++)
Packit Service fee338
            {
Packit Service fee338
              cleanup_free char *subdir = strconcat3 (dest, "/", cover_proc_dirs[i]);
Packit Service fee338
              if (access (subdir, W_OK) < 0)
Packit Service fee338
                {
Packit Service fee338
                  /* The file is already read-only or doesn't exist.  */
Packit Service fee338
                  if (errno == EACCES || errno == ENOENT)
Packit Service fee338
                    continue;
Packit Service fee338
Packit Service fee338
                  die_with_error ("Can't access %s", subdir);
Packit Service fee338
                }
Packit Service fee338
Packit Service fee338
              privileged_op (privileged_op_socket,
Packit Service fee338
                             PRIV_SEP_OP_BIND_MOUNT, BIND_READONLY,
Packit Service fee338
                             subdir, subdir);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MOUNT_DEV:
Packit Service fee338
          if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_TMPFS_MOUNT, 0,
Packit Service fee338
                         dest, NULL);
Packit Service fee338
Packit Service fee338
          static const char *const devnodes[] = { "null", "zero", "full", "random", "urandom", "tty" };
Packit Service fee338
          for (i = 0; i < N_ELEMENTS (devnodes); i++)
Packit Service fee338
            {
Packit Service fee338
              cleanup_free char *node_dest = strconcat3 (dest, "/", devnodes[i]);
Packit Service fee338
              cleanup_free char *node_src = strconcat ("/oldroot/dev/", devnodes[i]);
Packit Service fee338
              if (create_file (node_dest, 0666, NULL) != 0)
Packit Service fee338
                die_with_error ("Can't create file %s/%s", op->dest, devnodes[i]);
Packit Service fee338
              privileged_op (privileged_op_socket,
Packit Service fee338
                             PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES,
Packit Service fee338
                             node_src, node_dest);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          static const char *const stdionodes[] = { "stdin", "stdout", "stderr" };
Packit Service fee338
          for (i = 0; i < N_ELEMENTS (stdionodes); i++)
Packit Service fee338
            {
Packit Service fee338
              cleanup_free char *target = xasprintf ("/proc/self/fd/%d", i);
Packit Service fee338
              cleanup_free char *node_dest = strconcat3 (dest, "/", stdionodes[i]);
Packit Service fee338
              if (symlink (target, node_dest) < 0)
Packit Service fee338
                die_with_error ("Can't create symlink %s/%s", op->dest, stdionodes[i]);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          /* /dev/fd and /dev/core - legacy, but both nspawn and docker do these */
Packit Service fee338
          { cleanup_free char *dev_fd = strconcat (dest, "/fd");
Packit Service fee338
            if (symlink ("/proc/self/fd", dev_fd) < 0)
Packit Service fee338
              die_with_error ("Can't create symlink %s", dev_fd);
Packit Service fee338
          }
Packit Service fee338
          { cleanup_free char *dev_core = strconcat (dest, "/core");
Packit Service fee338
            if (symlink ("/proc/kcore", dev_core) < 0)
Packit Service fee338
              die_with_error ("Can't create symlink %s", dev_core);
Packit Service fee338
          }
Packit Service fee338
Packit Service fee338
          {
Packit Service fee338
            cleanup_free char *pts = strconcat (dest, "/pts");
Packit Service fee338
            cleanup_free char *ptmx = strconcat (dest, "/ptmx");
Packit Service fee338
            cleanup_free char *shm = strconcat (dest, "/shm");
Packit Service fee338
Packit Service fee338
            if (mkdir (shm, 0755) == -1)
Packit Service fee338
              die_with_error ("Can't create %s/shm", op->dest);
Packit Service fee338
Packit Service fee338
            if (mkdir (pts, 0755) == -1)
Packit Service fee338
              die_with_error ("Can't create %s/devpts", op->dest);
Packit Service fee338
            privileged_op (privileged_op_socket,
Packit Service fee338
                           PRIV_SEP_OP_DEVPTS_MOUNT, 0, pts, NULL);
Packit Service fee338
Packit Service fee338
            if (symlink ("pts/ptmx", ptmx) != 0)
Packit Service fee338
              die_with_error ("Can't make symlink at %s/ptmx", op->dest);
Packit Service fee338
          }
Packit Service fee338
Packit Service fee338
          /* If stdout is a tty, that means the sandbox can write to the
Packit Service fee338
             outside-sandbox tty. In that case we also create a /dev/console
Packit Service fee338
             that points to this tty device. This should not cause any more
Packit Service fee338
             access than we already have, and it makes ttyname() work in the
Packit Service fee338
             sandbox. */
Packit Service fee338
          if (host_tty_dev != NULL && *host_tty_dev != 0)
Packit Service fee338
            {
Packit Service fee338
              cleanup_free char *src_tty_dev = strconcat ("/oldroot", host_tty_dev);
Packit Service fee338
              cleanup_free char *dest_console = strconcat (dest, "/console");
Packit Service fee338
Packit Service fee338
              if (create_file (dest_console, 0666, NULL) != 0)
Packit Service fee338
                die_with_error ("creating %s/console", op->dest);
Packit Service fee338
Packit Service fee338
              privileged_op (privileged_op_socket,
Packit Service fee338
                             PRIV_SEP_OP_BIND_MOUNT, BIND_DEVICES,
Packit Service fee338
                             src_tty_dev, dest_console);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MOUNT_TMPFS:
Packit Service fee338
          if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_TMPFS_MOUNT, 0,
Packit Service fee338
                         dest, NULL);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MOUNT_MQUEUE:
Packit Service fee338
          if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_MQUEUE_MOUNT, 0,
Packit Service fee338
                         dest, NULL);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MAKE_DIR:
Packit Service fee338
          if (ensure_dir (dest, 0755) != 0)
Packit Service fee338
            die_with_error ("Can't mkdir %s", op->dest);
Packit Service fee338
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MAKE_FILE:
Packit Service fee338
          {
Packit Service fee338
            cleanup_fd int dest_fd = -1;
Packit Service fee338
Packit Service fee338
            dest_fd = creat (dest, 0666);
Packit Service fee338
            if (dest_fd == -1)
Packit Service fee338
              die_with_error ("Can't create file %s", op->dest);
Packit Service fee338
Packit Service fee338
            if (copy_file_data (op->fd, dest_fd) != 0)
Packit Service fee338
              die_with_error ("Can't write data to file %s", op->dest);
Packit Service fee338
Packit Service fee338
            close (op->fd);
Packit Service fee338
            op->fd = -1;
Packit Service fee338
          }
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MAKE_BIND_FILE:
Packit Service fee338
        case SETUP_MAKE_RO_BIND_FILE:
Packit Service fee338
          {
Packit Service fee338
            cleanup_fd int dest_fd = -1;
Packit Service fee338
            char tempfile[] = "/bindfileXXXXXX";
Packit Service fee338
Packit Service fee338
            dest_fd = mkstemp (tempfile);
Packit Service fee338
            if (dest_fd == -1)
Packit Service fee338
              die_with_error ("Can't create tmpfile for %s", op->dest);
Packit Service fee338
Packit Service fee338
            if (copy_file_data (op->fd, dest_fd) != 0)
Packit Service fee338
              die_with_error ("Can't write data to file %s", op->dest);
Packit Service fee338
Packit Service fee338
            close (op->fd);
Packit Service fee338
            op->fd = -1;
Packit Service fee338
Packit Service fee338
            assert (dest != NULL);
Packit Service fee338
Packit Service fee338
            if (ensure_file (dest, 0666) != 0)
Packit Service fee338
              die_with_error ("Can't create file at %s", op->dest);
Packit Service fee338
Packit Service fee338
            privileged_op (privileged_op_socket,
Packit Service fee338
                           PRIV_SEP_OP_BIND_MOUNT,
Packit Service fee338
                           (op->type == SETUP_MAKE_RO_BIND_FILE ? BIND_READONLY : 0),
Packit Service fee338
                           tempfile, dest);
Packit Service fee338
Packit Service fee338
            /* Remove the file so we're sure the app can't get to it in any other way.
Packit Service fee338
               Its outside the container chroot, so it shouldn't be possible, but lets
Packit Service fee338
               make it really sure. */
Packit Service fee338
            unlink (tempfile);
Packit Service fee338
          }
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_MAKE_SYMLINK:
Packit Service fee338
          assert (op->source != NULL);  /* guaranteed by the constructor */
Packit Service fee338
          if (symlink (op->source, dest) != 0)
Packit Service fee338
            die_with_error ("Can't make symlink at %s", op->dest);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        case SETUP_SET_HOSTNAME:
Packit Service fee338
          assert (op->dest != NULL);  /* guaranteed by the constructor */
Packit Service fee338
          privileged_op (privileged_op_socket,
Packit Service fee338
                         PRIV_SEP_OP_SET_HOSTNAME, 0,
Packit Service fee338
                         op->dest, NULL);
Packit Service fee338
          break;
Packit Service fee338
Packit Service fee338
        default:
Packit Service fee338
          die ("Unexpected type %d", op->type);
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
  privileged_op (privileged_op_socket,
Packit Service fee338
                 PRIV_SEP_OP_DONE, 0, NULL, NULL);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* Do not leak file descriptors already used by setup_newroot () */
Packit Service fee338
static void
Packit Service fee338
close_ops_fd (void)
Packit Service fee338
{
Packit Service fee338
  SetupOp *op;
Packit Service fee338
Packit Service fee338
  for (op = ops; op != NULL; op = op->next)
Packit Service fee338
    {
Packit Service fee338
      if (op->fd != -1)
Packit Service fee338
        {
Packit Service fee338
          (void) close (op->fd);
Packit Service fee338
          op->fd = -1;
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
/* We need to resolve relative symlinks in the sandbox before we
Packit Service fee338
   chroot so that absolute symlinks are handled correctly. We also
Packit Service fee338
   need to do this after we've switched to the real uid so that
Packit Service fee338
   e.g. paths on fuse mounts work */
Packit Service fee338
static void
Packit Service fee338
resolve_symlinks_in_ops (void)
Packit Service fee338
{
Packit Service fee338
  SetupOp *op;
Packit Service fee338
Packit Service fee338
  for (op = ops; op != NULL; op = op->next)
Packit Service fee338
    {
Packit Service fee338
      const char *old_source;
Packit Service fee338
Packit Service fee338
      switch (op->type)
Packit Service fee338
        {
Packit Service fee338
        case SETUP_RO_BIND_MOUNT:
Packit Service fee338
        case SETUP_DEV_BIND_MOUNT:
Packit Service fee338
        case SETUP_BIND_MOUNT:
Packit Service fee338
          old_source = op->source;
Packit Service fee338
          op->source = realpath (old_source, NULL);
Packit Service fee338
          if (op->source == NULL)
Packit Service fee338
            {
Packit Service fee338
              if (op->flags & ALLOW_NOTEXIST && errno == ENOENT)
Packit Service fee338
                op->source = old_source;
Packit Service fee338
              else
Packit Service fee338
                die_with_error("Can't find source path %s", old_source);
Packit Service fee338
            }
Packit Service fee338
          break;
Packit Service fee338
        default:
Packit Service fee338
          break;
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
Packit Service fee338
static const char *
Packit Service fee338
resolve_string_offset (void    *buffer,
Packit Service fee338
                       size_t   buffer_size,
Packit Service fee338
                       uint32_t offset)
Packit Service fee338
{
Packit Service fee338
  if (offset == 0)
Packit Service fee338
    return NULL;
Packit Service fee338
Packit Service fee338
  if (offset > buffer_size)
Packit Service fee338
    die ("Invalid string offset %d (buffer size %zd)", offset, buffer_size);
Packit Service fee338
Packit Service fee338
  return (const char *) buffer + offset;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static uint32_t
Packit Service fee338
read_priv_sec_op (int          read_socket,
Packit Service fee338
                  void        *buffer,
Packit Service fee338
                  size_t       buffer_size,
Packit Service fee338
                  uint32_t    *flags,
Packit Service fee338
                  const char **arg1,
Packit Service fee338
                  const char **arg2)
Packit Service fee338
{
Packit Service fee338
  const PrivSepOp *op = (const PrivSepOp *) buffer;
Packit Service fee338
  ssize_t rec_len;
Packit Service fee338
Packit Service fee338
  do
Packit Service fee338
    rec_len = read (read_socket, buffer, buffer_size - 1);
Packit Service fee338
  while (rec_len == -1 && errno == EINTR);
Packit Service fee338
Packit Service fee338
  if (rec_len < 0)
Packit Service fee338
    die_with_error ("Can't read from unprivileged helper");
Packit Service fee338
Packit Service fee338
  if (rec_len == 0)
Packit Service fee338
    exit (1); /* Privileged helper died and printed error, so exit silently */
Packit Service fee338
Packit Service fee338
  if (rec_len < sizeof (PrivSepOp))
Packit Service fee338
    die ("Invalid size %zd from unprivileged helper", rec_len);
Packit Service fee338
Packit Service fee338
  /* Guarantee zero termination of any strings */
Packit Service fee338
  ((char *) buffer)[rec_len] = 0;
Packit Service fee338
Packit Service fee338
  *flags = op->flags;
Packit Service fee338
  *arg1 = resolve_string_offset (buffer, rec_len, op->arg1_offset);
Packit Service fee338
  *arg2 = resolve_string_offset (buffer, rec_len, op->arg2_offset);
Packit Service fee338
Packit Service fee338
  return op->op;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void __attribute__ ((noreturn))
Packit Service fee338
print_version_and_exit (void)
Packit Service fee338
{
Packit Service fee338
  printf ("%s\n", PACKAGE_STRING);
Packit Service fee338
  exit (0);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
parse_args_recurse (int          *argcp,
Packit Service fee338
                    const char ***argvp,
Packit Service fee338
                    bool          in_file,
Packit Service fee338
                    int          *total_parsed_argc_p)
Packit Service fee338
{
Packit Service fee338
  SetupOp *op;
Packit Service fee338
  int argc = *argcp;
Packit Service fee338
  const char **argv = *argvp;
Packit Service fee338
  /* I can't imagine a case where someone wants more than this.
Packit Service fee338
   * If you do...you should be able to pass multiple files
Packit Service fee338
   * via a single tmpfs and linking them there, etc.
Packit Service fee338
   *
Packit Service fee338
   * We're adding this hardening due to precedent from
Packit Service fee338
   * http://googleprojectzero.blogspot.com/2014/08/the-poisoned-nul-byte-2014-edition.html
Packit Service fee338
   *
Packit Service fee338
   * I picked 9000 because the Internet told me to and it was hard to
Packit Service fee338
   * resist.
Packit Service fee338
   */
Packit Service fee338
  static const uint32_t MAX_ARGS = 9000;
Packit Service fee338
Packit Service fee338
  if (*total_parsed_argc_p > MAX_ARGS)
Packit Service fee338
    die ("Exceeded maximum number of arguments %u", MAX_ARGS);
Packit Service fee338
Packit Service fee338
  while (argc > 0)
Packit Service fee338
    {
Packit Service fee338
      const char *arg = argv[0];
Packit Service fee338
Packit Service fee338
      if (strcmp (arg, "--help") == 0)
Packit Service fee338
        {
Packit Service fee338
          usage (EXIT_SUCCESS, stdout);
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--version") == 0)
Packit Service fee338
        {
Packit Service fee338
          print_version_and_exit ();
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--args") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
          const char *p, *data_end;
Packit Service fee338
          size_t data_len;
Packit Service fee338
          cleanup_free const char **data_argv = NULL;
Packit Service fee338
          const char **data_argv_copy;
Packit Service fee338
          int data_argc;
Packit Service fee338
          int i;
Packit Service fee338
Packit Service fee338
          if (in_file)
Packit Service fee338
            die ("--args not supported in arguments file");
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--args takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          /* opt_args_data is essentially a recursive argv array, which we must
Packit Service fee338
           * keep allocated until exit time, since its argv entries get used
Packit Service fee338
           * by the other cases in parse_args_recurse() when we recurse. */
Packit Service fee338
          opt_args_data = load_file_data (the_fd, &data_len);
Packit Service fee338
          if (opt_args_data == NULL)
Packit Service fee338
            die_with_error ("Can't read --args data");
Packit Service fee338
          (void) close (the_fd);
Packit Service fee338
Packit Service fee338
          data_end = opt_args_data + data_len;
Packit Service fee338
          data_argc = 0;
Packit Service fee338
Packit Service fee338
          p = opt_args_data;
Packit Service fee338
          while (p != NULL && p < data_end)
Packit Service fee338
            {
Packit Service fee338
              data_argc++;
Packit Service fee338
              (*total_parsed_argc_p)++;
Packit Service fee338
              if (*total_parsed_argc_p > MAX_ARGS)
Packit Service fee338
                die ("Exceeded maximum number of arguments %u", MAX_ARGS);
Packit Service fee338
              p = memchr (p, 0, data_end - p);
Packit Service fee338
              if (p != NULL)
Packit Service fee338
                p++;
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          data_argv = xcalloc (sizeof (char *) * (data_argc + 1));
Packit Service fee338
Packit Service fee338
          i = 0;
Packit Service fee338
          p = opt_args_data;
Packit Service fee338
          while (p != NULL && p < data_end)
Packit Service fee338
            {
Packit Service fee338
              /* Note: load_file_data always adds a nul terminator, so this is safe
Packit Service fee338
               * even for the last string. */
Packit Service fee338
              data_argv[i++] = p;
Packit Service fee338
              p = memchr (p, 0, data_end - p);
Packit Service fee338
              if (p != NULL)
Packit Service fee338
                p++;
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          data_argv_copy = data_argv; /* Don't change data_argv, we need to free it */
Packit Service fee338
          parse_args_recurse (&data_argc, &data_argv_copy, TRUE, total_parsed_argc_p);
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-all") == 0)
Packit Service fee338
        {
Packit Service fee338
          /* Keep this in order with the older (legacy) --unshare arguments,
Packit Service fee338
           * we use the --try variants of user and cgroup, since we want
Packit Service fee338
           * to support systems/kernels without support for those.
Packit Service fee338
           */
Packit Service fee338
          opt_unshare_user_try = opt_unshare_ipc = opt_unshare_pid =
Packit Service fee338
            opt_unshare_uts = opt_unshare_cgroup_try =
Packit Service fee338
            opt_unshare_net = TRUE;
Packit Service fee338
        }
Packit Service fee338
      /* Begin here the older individual --unshare variants */
Packit Service fee338
      else if (strcmp (arg, "--unshare-user") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_user = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-user-try") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_user_try = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-ipc") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_ipc = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-pid") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_pid = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-net") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_net = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-uts") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_uts = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-cgroup") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_cgroup = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unshare-cgroup-try") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_cgroup_try = TRUE;
Packit Service fee338
        }
Packit Service fee338
      /* Begin here the newer --share variants */
Packit Service fee338
      else if (strcmp (arg, "--share-net") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_unshare_net = FALSE;
Packit Service fee338
        }
Packit Service fee338
      /* End --share variants, other arguments begin */
Packit Service fee338
      else if (strcmp (arg, "--chdir") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--chdir takes one argument");
Packit Service fee338
Packit Service fee338
          opt_chdir_path = argv[1];
Packit Service fee338
          argv++;
Packit Service fee338
          argc--;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--remount-ro") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--remount-ro takes one argument");
Packit Service fee338
Packit Service fee338
          SetupOp *op = setup_op_new (SETUP_REMOUNT_RO_NO_RECURSIVE);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
Packit Service fee338
          argv++;
Packit Service fee338
          argc--;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp(arg, "--bind") == 0 ||
Packit Service fee338
               strcmp(arg, "--bind-try") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("%s takes two arguments", arg);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_BIND_MOUNT);
Packit Service fee338
          op->source = argv[1];
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
          if (strcmp(arg, "--bind-try") == 0)
Packit Service fee338
            op->flags = ALLOW_NOTEXIST;
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp(arg, "--ro-bind") == 0 ||
Packit Service fee338
               strcmp(arg, "--ro-bind-try") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("%s takes two arguments", arg);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_RO_BIND_MOUNT);
Packit Service fee338
          op->source = argv[1];
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
          if (strcmp(arg, "--ro-bind-try") == 0)
Packit Service fee338
            op->flags = ALLOW_NOTEXIST;
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--dev-bind") == 0 ||
Packit Service fee338
               strcmp (arg, "--dev-bind-try") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("%s takes two arguments", arg);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_DEV_BIND_MOUNT);
Packit Service fee338
          op->source = argv[1];
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
          if (strcmp(arg, "--dev-bind-try") == 0)
Packit Service fee338
            op->flags = ALLOW_NOTEXIST;
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--proc") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--proc takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MOUNT_PROC);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--exec-label") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--exec-label takes an argument");
Packit Service fee338
          opt_exec_label = argv[1];
Packit Service fee338
          die_unless_label_valid (opt_exec_label);
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--file-label") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--file-label takes an argument");
Packit Service fee338
          opt_file_label = argv[1];
Packit Service fee338
          die_unless_label_valid (opt_file_label);
Packit Service fee338
          if (label_create_file (opt_file_label))
Packit Service fee338
            die_with_error ("--file-label setup failed");
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--dev") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--dev takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MOUNT_DEV);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
          opt_needs_devpts = TRUE;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--tmpfs") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--tmpfs takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MOUNT_TMPFS);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--mqueue") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--mqueue takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MOUNT_MQUEUE);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--dir") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--dir takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MAKE_DIR);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--file") == 0)
Packit Service fee338
        {
Packit Service fee338
          int file_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("--file takes two arguments");
Packit Service fee338
Packit Service fee338
          file_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || file_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MAKE_FILE);
Packit Service fee338
          op->fd = file_fd;
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--bind-data") == 0)
Packit Service fee338
        {
Packit Service fee338
          int file_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("--bind-data takes two arguments");
Packit Service fee338
Packit Service fee338
          file_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || file_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MAKE_BIND_FILE);
Packit Service fee338
          op->fd = file_fd;
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--ro-bind-data") == 0)
Packit Service fee338
        {
Packit Service fee338
          int file_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("--ro-bind-data takes two arguments");
Packit Service fee338
Packit Service fee338
          file_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || file_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MAKE_RO_BIND_FILE);
Packit Service fee338
          op->fd = file_fd;
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--symlink") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("--symlink takes two arguments");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_MAKE_SYMLINK);
Packit Service fee338
          op->source = argv[1];
Packit Service fee338
          op->dest = argv[2];
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--lock-file") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--lock-file takes an argument");
Packit Service fee338
Packit Service fee338
          (void) lock_file_new (argv[1]);
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--sync-fd") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--sync-fd takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_sync_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--block-fd") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--block-fd takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_block_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--userns-block-fd") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--userns-block-fd takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_userns_block_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--info-fd") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--info-fd takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_info_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--json-status-fd") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--json-status-fd takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_json_status_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--seccomp") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--seccomp takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_seccomp_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--userns") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--userns takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_userns_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--userns2") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--userns2 takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_userns2_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--pidns") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_fd;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--pidns takes an argument");
Packit Service fee338
Packit Service fee338
          the_fd = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_fd < 0)
Packit Service fee338
            die ("Invalid fd: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_pidns_fd = the_fd;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--setenv") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 3)
Packit Service fee338
            die ("--setenv takes two arguments");
Packit Service fee338
Packit Service fee338
          xsetenv (argv[1], argv[2], 1);
Packit Service fee338
Packit Service fee338
          argv += 2;
Packit Service fee338
          argc -= 2;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--unsetenv") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--unsetenv takes an argument");
Packit Service fee338
Packit Service fee338
          xunsetenv (argv[1]);
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--uid") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_uid;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--uid takes an argument");
Packit Service fee338
Packit Service fee338
          the_uid = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_uid < 0)
Packit Service fee338
            die ("Invalid uid: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_sandbox_uid = the_uid;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--gid") == 0)
Packit Service fee338
        {
Packit Service fee338
          int the_gid;
Packit Service fee338
          char *endptr;
Packit Service fee338
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--gid takes an argument");
Packit Service fee338
Packit Service fee338
          the_gid = strtol (argv[1], &endptr, 10);
Packit Service fee338
          if (argv[1][0] == 0 || endptr[0] != 0 || the_gid < 0)
Packit Service fee338
            die ("Invalid gid: %s", argv[1]);
Packit Service fee338
Packit Service fee338
          opt_sandbox_gid = the_gid;
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--hostname") == 0)
Packit Service fee338
        {
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--hostname takes an argument");
Packit Service fee338
Packit Service fee338
          op = setup_op_new (SETUP_SET_HOSTNAME);
Packit Service fee338
          op->dest = argv[1];
Packit Service fee338
          op->flags = NO_CREATE_DEST;
Packit Service fee338
Packit Service fee338
          opt_sandbox_hostname = argv[1];
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--new-session") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_new_session = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--die-with-parent") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_die_with_parent = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--as-pid-1") == 0)
Packit Service fee338
        {
Packit Service fee338
          opt_as_pid_1 = TRUE;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--cap-add") == 0)
Packit Service fee338
        {
Packit Service fee338
          cap_value_t cap;
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--cap-add takes an argument");
Packit Service fee338
Packit Service fee338
          opt_cap_add_or_drop_used = TRUE;
Packit Service fee338
Packit Service fee338
          if (strcasecmp (argv[1], "ALL") == 0)
Packit Service fee338
            {
Packit Service fee338
              requested_caps[0] = requested_caps[1] = 0xFFFFFFFF;
Packit Service fee338
            }
Packit Service fee338
          else
Packit Service fee338
            {
Packit Service fee338
              if (cap_from_name (argv[1], &cap) < 0)
Packit Service fee338
                die ("unknown cap: %s", argv[1]);
Packit Service fee338
Packit Service fee338
              if (cap < 32)
Packit Service fee338
                requested_caps[0] |= CAP_TO_MASK_0 (cap);
Packit Service fee338
              else
Packit Service fee338
                requested_caps[1] |= CAP_TO_MASK_1 (cap - 32);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--cap-drop") == 0)
Packit Service fee338
        {
Packit Service fee338
          cap_value_t cap;
Packit Service fee338
          if (argc < 2)
Packit Service fee338
            die ("--cap-drop takes an argument");
Packit Service fee338
Packit Service fee338
          opt_cap_add_or_drop_used = TRUE;
Packit Service fee338
Packit Service fee338
          if (strcasecmp (argv[1], "ALL") == 0)
Packit Service fee338
            {
Packit Service fee338
              requested_caps[0] = requested_caps[1] = 0;
Packit Service fee338
            }
Packit Service fee338
          else
Packit Service fee338
            {
Packit Service fee338
              if (cap_from_name (argv[1], &cap) < 0)
Packit Service fee338
                die ("unknown cap: %s", argv[1]);
Packit Service fee338
Packit Service fee338
              if (cap < 32)
Packit Service fee338
                requested_caps[0] &= ~CAP_TO_MASK_0 (cap);
Packit Service fee338
              else
Packit Service fee338
                requested_caps[1] &= ~CAP_TO_MASK_1 (cap - 32);
Packit Service fee338
            }
Packit Service fee338
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
        }
Packit Service fee338
      else if (strcmp (arg, "--") == 0)
Packit Service fee338
        {
Packit Service fee338
          argv += 1;
Packit Service fee338
          argc -= 1;
Packit Service fee338
          break;
Packit Service fee338
        }
Packit Service fee338
      else if (*arg == '-')
Packit Service fee338
        {
Packit Service fee338
          die ("Unknown option %s", arg);
Packit Service fee338
        }
Packit Service fee338
      else
Packit Service fee338
        {
Packit Service fee338
          break;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      argv++;
Packit Service fee338
      argc--;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  *argcp = argc;
Packit Service fee338
  *argvp = argv;
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
parse_args (int          *argcp,
Packit Service fee338
            const char ***argvp)
Packit Service fee338
{
Packit Service fee338
  int total_parsed_argc = *argcp;
Packit Service fee338
Packit Service fee338
  parse_args_recurse (argcp, argvp, FALSE, &total_parsed_argc);
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
read_overflowids (void)
Packit Service fee338
{
Packit Service fee338
  cleanup_free char *uid_data = NULL;
Packit Service fee338
  cleanup_free char *gid_data = NULL;
Packit Service fee338
Packit Service fee338
  uid_data = load_file_at (AT_FDCWD, "/proc/sys/kernel/overflowuid");
Packit Service fee338
  if (uid_data == NULL)
Packit Service fee338
    die_with_error ("Can't read /proc/sys/kernel/overflowuid");
Packit Service fee338
Packit Service fee338
  overflow_uid = strtol (uid_data, NULL, 10);
Packit Service fee338
  if (overflow_uid == 0)
Packit Service fee338
    die ("Can't parse /proc/sys/kernel/overflowuid");
Packit Service fee338
Packit Service fee338
  gid_data = load_file_at (AT_FDCWD, "/proc/sys/kernel/overflowgid");
Packit Service fee338
  if (gid_data == NULL)
Packit Service fee338
    die_with_error ("Can't read /proc/sys/kernel/overflowgid");
Packit Service fee338
Packit Service fee338
  overflow_gid = strtol (gid_data, NULL, 10);
Packit Service fee338
  if (overflow_gid == 0)
Packit Service fee338
    die ("Can't parse /proc/sys/kernel/overflowgid");
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
namespace_ids_read (pid_t  pid)
Packit Service fee338
{
Packit Service fee338
  cleanup_free char *dir = NULL;
Packit Service fee338
  cleanup_fd int ns_fd = -1;
Packit Service fee338
  NsInfo *info;
Packit Service fee338
Packit Service fee338
  dir = xasprintf ("%d/ns", pid);
Packit Service fee338
  ns_fd = openat (proc_fd, dir, O_PATH);
Packit Service fee338
Packit Service fee338
  if (ns_fd < 0)
Packit Service fee338
    die_with_error ("open /proc/%s/ns failed", dir);
Packit Service fee338
Packit Service fee338
  for (info = ns_infos; info->name; info++)
Packit Service fee338
    {
Packit Service fee338
      bool *do_unshare = info->do_unshare;
Packit Service fee338
      struct stat st;
Packit Service fee338
      int r;
Packit Service fee338
Packit Service fee338
      /* if we don't unshare this ns, ignore it */
Packit Service fee338
      if (do_unshare && *do_unshare == FALSE)
Packit Service fee338
        continue;
Packit Service fee338
Packit Service fee338
      r = fstatat (ns_fd, info->name, &st, 0);
Packit Service fee338
Packit Service fee338
      /* if we can't get the information, ignore it */
Packit Service fee338
      if (r != 0)
Packit Service fee338
        continue;
Packit Service fee338
Packit Service fee338
      info->id = st.st_ino;
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
static void
Packit Service fee338
namespace_ids_write (int    fd,
Packit Service fee338
                     bool   in_json)
Packit Service fee338
{
Packit Service fee338
  NsInfo *info;
Packit Service fee338
Packit Service fee338
  for (info = ns_infos; info->name; info++)
Packit Service fee338
    {
Packit Service fee338
      cleanup_free char *output = NULL;
Packit Service fee338
      const char *indent;
Packit Service fee338
      uintmax_t nsid;
Packit Service fee338
Packit Service fee338
      nsid = (uintmax_t) info->id;
Packit Service fee338
Packit Service fee338
      /* if we don't have the information, we don't write it */
Packit Service fee338
      if (nsid == 0)
Packit Service fee338
        continue;
Packit Service fee338
Packit Service fee338
      indent = in_json ? " " : "\n    ";
Packit Service fee338
      output = xasprintf (",%s\"%s-namespace\": %ju",
Packit Service fee338
                          indent, info->name, nsid);
Packit Service fee338
Packit Service fee338
      dump_info (fd, output, TRUE);
Packit Service fee338
    }
Packit Service fee338
}
Packit Service fee338
Packit Service fee338
int
Packit Service fee338
main (int    argc,
Packit Service fee338
      char **argv)
Packit Service fee338
{
Packit Service fee338
  mode_t old_umask;
Packit Service fee338
  const char *base_path = NULL;
Packit Service fee338
  int clone_flags;
Packit Service fee338
  char *old_cwd = NULL;
Packit Service fee338
  pid_t pid;
Packit Service fee338
  int event_fd = -1;
Packit Service fee338
  int child_wait_fd = -1;
Packit Service fee338
  int setup_finished_pipe[] = {-1, -1};
Packit Service fee338
  const char *new_cwd;
Packit Service fee338
  uid_t ns_uid;
Packit Service fee338
  gid_t ns_gid;
Packit Service fee338
  struct stat sbuf;
Packit Service fee338
  uint64_t val;
Packit Service fee338
  int res UNUSED;
Packit Service fee338
  cleanup_free char *seccomp_data = NULL;
Packit Service fee338
  size_t seccomp_len;
Packit Service fee338
  struct sock_fprog seccomp_prog;
Packit Service fee338
  cleanup_free char *args_data = NULL;
Packit Service fee338
  int intermediate_pids_sockets[2] = {-1, -1};
Packit Service fee338
Packit Service fee338
  /* Handle --version early on before we try to acquire/drop
Packit Service fee338
   * any capabilities so it works in a build environment;
Packit Service fee338
   * right now flatpak's build runs bubblewrap --version.
Packit Service fee338
   * https://github.com/projectatomic/bubblewrap/issues/185
Packit Service fee338
   */
Packit Service fee338
  if (argc == 2 && (strcmp (argv[1], "--version") == 0))
Packit Service fee338
    print_version_and_exit ();
Packit Service fee338
Packit Service fee338
  real_uid = getuid ();
Packit Service fee338
  real_gid = getgid ();
Packit Service fee338
Packit Service fee338
  /* Get the (optional) privileges we need */
Packit Service fee338
  acquire_privs ();
Packit Service fee338
Packit Service fee338
  /* Never gain any more privs during exec */
Packit Service fee338
  if (prctl (PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
Packit Service fee338
    die_with_error ("prctl(PR_SET_NO_NEW_CAPS) failed");
Packit Service fee338
Packit Service fee338
  /* The initial code is run with high permissions
Packit Service fee338
     (i.e. CAP_SYS_ADMIN), so take lots of care. */
Packit Service fee338
Packit Service fee338
  read_overflowids ();
Packit Service fee338
Packit Service fee338
  argv0 = argv[0];
Packit Service fee338
Packit Service fee338
  if (isatty (1))
Packit Service fee338
    host_tty_dev = ttyname (1);
Packit Service fee338
Packit Service fee338
  argv++;
Packit Service fee338
  argc--;
Packit Service fee338
Packit Service fee338
  if (argc == 0)
Packit Service fee338
    usage (EXIT_FAILURE, stderr);
Packit Service fee338
Packit Service fee338
  parse_args (&argc, (const char ***) &argv);
Packit Service fee338
Packit Service fee338
  /* suck the args into a cleanup_free variable to control their lifecycle */
Packit Service fee338
  args_data = opt_args_data;
Packit Service fee338
  opt_args_data = NULL;
Packit Service fee338
Packit Service fee338
  if ((requested_caps[0] || requested_caps[1]) && is_privileged)
Packit Service fee338
    die ("--cap-add in setuid mode can be used only by root");
Packit Service fee338
Packit Service fee338
  if (opt_userns_block_fd != -1 && !opt_unshare_user)
Packit Service fee338
    die ("--userns-block-fd requires --unshare-user");
Packit Service fee338
Packit Service fee338
  if (opt_userns_block_fd != -1 && opt_info_fd == -1)
Packit Service fee338
    die ("--userns-block-fd requires --info-fd");
Packit Service fee338
Packit Service fee338
  if (opt_userns_fd != -1 && opt_unshare_user)
Packit Service fee338
    die ("--userns not compatible --unshare-user");
Packit Service fee338
Packit Service fee338
  if (opt_userns_fd != -1 && opt_unshare_user_try)
Packit Service fee338
    die ("--userns not compatible --unshare-user-try");
Packit Service fee338
Packit Service fee338
  /* Technically using setns() is probably safe even in the privileged
Packit Service fee338
   * case, because we got passed in a file descriptor to the
Packit Service fee338
   * namespace, and that can only be gotten if you have ptrace
Packit Service fee338
   * permissions against the target, and then you could do whatever to
Packit Service fee338
   * the namespace anyway.
Packit Service fee338
   *
Packit Service fee338
   * However, for practical reasons this isn't possible to use,
Packit Service fee338
   * because (as described in acquire_privs()) setuid bwrap causes
Packit Service fee338
   * root to own the namespaces that it creates, so you will not be
Packit Service fee338
   * able to access these namespaces anyway. So, best just not support
Packit Service fee338
   * it anway.
Packit Service fee338
   */
Packit Service fee338
  if (opt_userns_fd != -1 && is_privileged)
Packit Service fee338
    die ("--userns doesn't work in setuid mode");
Packit Service fee338
Packit Service fee338
  /* We have to do this if we weren't installed setuid (and we're not
Packit Service fee338
   * root), so let's just DWIM */
Packit Service fee338
  if (!is_privileged && getuid () != 0 && opt_userns_fd == -1)
Packit Service fee338
    opt_unshare_user = TRUE;
Packit Service fee338
Packit Service fee338
#ifdef ENABLE_REQUIRE_USERNS
Packit Service fee338
  /* In this build option, we require userns. */
Packit Service fee338
  if (is_privileged && getuid () != 0 && opt_userns_fd == -1)
Packit Service fee338
    opt_unshare_user = TRUE;
Packit Service fee338
#endif
Packit Service fee338
Packit Service fee338
  if (opt_unshare_user_try &&
Packit Service fee338
      stat ("/proc/self/ns/user", &sbuf) == 0)
Packit Service fee338
    {
Packit Service fee338
      bool disabled = FALSE;
Packit Service fee338
Packit Service fee338
      /* RHEL7 has a kernel module parameter that lets you enable user namespaces */
Packit Service fee338
      if (stat ("/sys/module/user_namespace/parameters/enable", &sbuf) == 0)
Packit Service fee338
        {
Packit Service fee338
          cleanup_free char *enable = NULL;
Packit Service fee338
          enable = load_file_at (AT_FDCWD, "/sys/module/user_namespace/parameters/enable");
Packit Service fee338
          if (enable != NULL && enable[0] == 'N')
Packit Service fee338
            disabled = TRUE;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* Check for max_user_namespaces */
Packit Service fee338
      if (stat ("/proc/sys/user/max_user_namespaces", &sbuf) == 0)
Packit Service fee338
        {
Packit Service fee338
          cleanup_free char *max_user_ns = NULL;
Packit Service fee338
          max_user_ns = load_file_at (AT_FDCWD, "/proc/sys/user/max_user_namespaces");
Packit Service fee338
          if (max_user_ns != NULL && strcmp(max_user_ns, "0\n") == 0)
Packit Service fee338
            disabled = TRUE;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* Debian lets you disable *unprivileged* user namespaces. However this is not
Packit Service fee338
         a problem if we're privileged, and if we're not opt_unshare_user is TRUE
Packit Service fee338
         already, and there is not much we can do, its just a non-working setup. */
Packit Service fee338
Packit Service fee338
      if (!disabled)
Packit Service fee338
        opt_unshare_user = TRUE;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (argc == 0)
Packit Service fee338
    usage (EXIT_FAILURE, stderr);
Packit Service fee338
Packit Service fee338
  __debug__ (("Creating root mount point\n"));
Packit Service fee338
Packit Service fee338
  if (opt_sandbox_uid == -1)
Packit Service fee338
    opt_sandbox_uid = real_uid;
Packit Service fee338
  if (opt_sandbox_gid == -1)
Packit Service fee338
    opt_sandbox_gid = real_gid;
Packit Service fee338
Packit Service fee338
  if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_uid != real_uid)
Packit Service fee338
    die ("Specifying --uid requires --unshare-user or --userns");
Packit Service fee338
Packit Service fee338
  if (!opt_unshare_user && opt_userns_fd == -1 && opt_sandbox_gid != real_gid)
Packit Service fee338
    die ("Specifying --gid requires --unshare-user or --userns");
Packit Service fee338
Packit Service fee338
  if (!opt_unshare_uts && opt_sandbox_hostname != NULL)
Packit Service fee338
    die ("Specifying --hostname requires --unshare-uts");
Packit Service fee338
Packit Service fee338
  if (opt_as_pid_1 && !opt_unshare_pid)
Packit Service fee338
    die ("Specifying --as-pid-1 requires --unshare-pid");
Packit Service fee338
Packit Service fee338
  if (opt_as_pid_1 && lock_files != NULL)
Packit Service fee338
    die ("Specifying --as-pid-1 and --lock-file is not permitted");
Packit Service fee338
Packit Service fee338
  /* We need to read stuff from proc during the pivot_root dance, etc.
Packit Service fee338
     Lets keep a fd to it open */
Packit Service fee338
  proc_fd = open ("/proc", O_PATH);
Packit Service fee338
  if (proc_fd == -1)
Packit Service fee338
    die_with_error ("Can't open /proc");
Packit Service fee338
Packit Service fee338
  /* We need *some* mountpoint where we can mount the root tmpfs.
Packit Service fee338
   * Because we use pivot_root, it won't appear to be mounted from
Packit Service fee338
   * the perspective of the sandboxed process, so we can use anywhere
Packit Service fee338
   * that is sure to exist, that is sure to not be a symlink controlled
Packit Service fee338
   * by someone malicious, and that we won't immediately need to
Packit Service fee338
   * access ourselves. */
Packit Service fee338
  base_path = "/tmp";
Packit Service fee338
Packit Service fee338
  __debug__ (("creating new namespace\n"));
Packit Service fee338
Packit Service fee338
  if (opt_unshare_pid && !opt_as_pid_1)
Packit Service fee338
    {
Packit Service fee338
      event_fd = eventfd (0, EFD_CLOEXEC | EFD_NONBLOCK);
Packit Service fee338
      if (event_fd == -1)
Packit Service fee338
        die_with_error ("eventfd()");
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* We block sigchild here so that we can use signalfd in the monitor. */
Packit Service fee338
  block_sigchild ();
Packit Service fee338
Packit Service fee338
  clone_flags = SIGCHLD | CLONE_NEWNS;
Packit Service fee338
  if (opt_unshare_user)
Packit Service fee338
    clone_flags |= CLONE_NEWUSER;
Packit Service fee338
  if (opt_unshare_pid && opt_pidns_fd == -1)
Packit Service fee338
    clone_flags |= CLONE_NEWPID;
Packit Service fee338
  if (opt_unshare_net)
Packit Service fee338
    clone_flags |= CLONE_NEWNET;
Packit Service fee338
  if (opt_unshare_ipc)
Packit Service fee338
    clone_flags |= CLONE_NEWIPC;
Packit Service fee338
  if (opt_unshare_uts)
Packit Service fee338
    clone_flags |= CLONE_NEWUTS;
Packit Service fee338
  if (opt_unshare_cgroup)
Packit Service fee338
    {
Packit Service fee338
      if (stat ("/proc/self/ns/cgroup", &sbuf))
Packit Service fee338
        {
Packit Service fee338
          if (errno == ENOENT)
Packit Service fee338
            die ("Cannot create new cgroup namespace because the kernel does not support it");
Packit Service fee338
          else
Packit Service fee338
            die_with_error ("stat on /proc/self/ns/cgroup failed");
Packit Service fee338
        }
Packit Service fee338
      clone_flags |= CLONE_NEWCGROUP;
Packit Service fee338
    }
Packit Service fee338
  if (opt_unshare_cgroup_try)
Packit Service fee338
    {
Packit Service fee338
      opt_unshare_cgroup = !stat ("/proc/self/ns/cgroup", &sbuf);
Packit Service fee338
      if (opt_unshare_cgroup)
Packit Service fee338
        clone_flags |= CLONE_NEWCGROUP;
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  child_wait_fd = eventfd (0, EFD_CLOEXEC);
Packit Service fee338
  if (child_wait_fd == -1)
Packit Service fee338
    die_with_error ("eventfd()");
Packit Service fee338
Packit Service fee338
  /* Track whether pre-exec setup finished if we're reporting process exit */
Packit Service fee338
  if (opt_json_status_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      int ret;
Packit Service fee338
      ret = pipe2 (setup_finished_pipe, O_CLOEXEC);
Packit Service fee338
      if (ret == -1)
Packit Service fee338
        die_with_error ("pipe2()");
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Switch to the custom user ns before the clone, gets us privs in that ns (assuming its a child of the current and thus allowed) */
Packit Service fee338
  if (opt_userns_fd > 0 && setns (opt_userns_fd, CLONE_NEWUSER) != 0)
Packit Service fee338
    {
Packit Service fee338
      if (errno == EINVAL)
Packit Service fee338
        die ("Joining the specified user namespace failed, it might not be a descendant of the current user namespace.");
Packit Service fee338
      die_with_error ("Joining specified user namespace failed");
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Sometimes we have uninteresting intermediate pids during the setup, set up code to pass the real pid down */
Packit Service fee338
  if (opt_pidns_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      /* Mark us as a subreaper, this way we can get exit status from grandchildren */
Packit Service fee338
      prctl (PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0);
Packit Service fee338
      create_pid_socketpair (intermediate_pids_sockets);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  pid = raw_clone (clone_flags, NULL);
Packit Service fee338
  if (pid == -1)
Packit Service fee338
    {
Packit Service fee338
      if (opt_unshare_user)
Packit Service fee338
        {
Packit Service fee338
          if (errno == EINVAL)
Packit Service fee338
            die ("Creating new namespace failed, likely because the kernel does not support user namespaces.  bwrap must be installed setuid on such systems.");
Packit Service fee338
          else if (errno == EPERM && !is_privileged)
Packit Service fee338
            die ("No permissions to creating new namespace, likely because the kernel does not allow non-privileged user namespaces. On e.g. debian this can be enabled with 'sysctl kernel.unprivileged_userns_clone=1'.");
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      die_with_error ("Creating new namespace failed");
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  ns_uid = opt_sandbox_uid;
Packit Service fee338
  ns_gid = opt_sandbox_gid;
Packit Service fee338
Packit Service fee338
  if (pid != 0)
Packit Service fee338
    {
Packit Service fee338
      /* Parent, outside sandbox, privileged (initially) */
Packit Service fee338
Packit Service fee338
      if (intermediate_pids_sockets[0] != -1)
Packit Service fee338
        {
Packit Service fee338
          close (intermediate_pids_sockets[1]);
Packit Service fee338
          pid = read_pid_from_socket (intermediate_pids_sockets[0]);
Packit Service fee338
          close (intermediate_pids_sockets[0]);
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* Discover namespace ids before we drop privileges */
Packit Service fee338
      namespace_ids_read (pid);
Packit Service fee338
Packit Service fee338
      if (is_privileged && opt_unshare_user && opt_userns_block_fd == -1)
Packit Service fee338
        {
Packit Service fee338
          /* We're running as euid 0, but the uid we want to map is
Packit Service fee338
           * not 0. This means we're not allowed to write this from
Packit Service fee338
           * the child user namespace, so we do it from the parent.
Packit Service fee338
           *
Packit Service fee338
           * Also, we map uid/gid 0 in the namespace (to overflowuid)
Packit Service fee338
           * if opt_needs_devpts is true, because otherwise the mount
Packit Service fee338
           * of devpts fails due to root not being mapped.
Packit Service fee338
           */
Packit Service fee338
          write_uid_gid_map (ns_uid, real_uid,
Packit Service fee338
                             ns_gid, real_gid,
Packit Service fee338
                             pid, TRUE, opt_needs_devpts);
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* Initial launched process, wait for pid 1 or exec:ed command to exit */
Packit Service fee338
Packit Service fee338
      if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
Packit Service fee338
        die_with_error ("Setting userns2 failed");
Packit Service fee338
Packit Service fee338
      /* We don't need any privileges in the launcher, drop them immediately. */
Packit Service fee338
      drop_privs (FALSE);
Packit Service fee338
Packit Service fee338
      /* Optionally bind our lifecycle to that of the parent */
Packit Service fee338
      handle_die_with_parent ();
Packit Service fee338
Packit Service fee338
      if (opt_info_fd != -1)
Packit Service fee338
        {
Packit Service fee338
          cleanup_free char *output = xasprintf ("{\n    \"child-pid\": %i", pid);
Packit Service fee338
          dump_info (opt_info_fd, output, TRUE);
Packit Service fee338
          namespace_ids_write (opt_info_fd, FALSE);
Packit Service fee338
          dump_info (opt_info_fd, "\n}\n", TRUE);
Packit Service fee338
          close (opt_info_fd);
Packit Service fee338
        }
Packit Service fee338
      if (opt_json_status_fd != -1)
Packit Service fee338
        {
Packit Service fee338
          cleanup_free char *output = xasprintf ("{ \"child-pid\": %i", pid);
Packit Service fee338
          dump_info (opt_json_status_fd, output, TRUE);
Packit Service fee338
          namespace_ids_write (opt_json_status_fd, TRUE);
Packit Service fee338
          dump_info (opt_json_status_fd, " }\n", TRUE);
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      if (opt_userns_block_fd != -1)
Packit Service fee338
        {
Packit Service fee338
          char b[1];
Packit Service fee338
          (void) TEMP_FAILURE_RETRY (read (opt_userns_block_fd, b, 1));
Packit Service fee338
          close (opt_userns_block_fd);
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* Let child run now that the uid maps are set up */
Packit Service fee338
      val = 1;
Packit Service fee338
      res = write (child_wait_fd, &val, 8);
Packit Service fee338
      /* Ignore res, if e.g. the child died and closed child_wait_fd we don't want to error out here */
Packit Service fee338
      close (child_wait_fd);
Packit Service fee338
Packit Service fee338
      return monitor_child (event_fd, pid, setup_finished_pipe[0]);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (opt_pidns_fd > 0)
Packit Service fee338
    {
Packit Service fee338
      if (setns (opt_pidns_fd, CLONE_NEWPID) != 0)
Packit Service fee338
        die_with_error ("Setting pidns failed");
Packit Service fee338
Packit Service fee338
      /* fork to get the passed in pid ns */
Packit Service fee338
      fork_intermediate_child ();
Packit Service fee338
Packit Service fee338
      /* We might both have specified an --pidns *and* --unshare-pid, so set up a new child pid namespace under the specified one */
Packit Service fee338
      if (opt_unshare_pid)
Packit Service fee338
        {
Packit Service fee338
          if (unshare (CLONE_NEWPID))
Packit Service fee338
            die_with_error ("unshare pid ns");
Packit Service fee338
Packit Service fee338
          /* fork to get the new pid ns */
Packit Service fee338
          fork_intermediate_child ();
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      /* We're back, either in a child or grandchild, so message the actual pid to the monitor */
Packit Service fee338
Packit Service fee338
      close (intermediate_pids_sockets[0]);
Packit Service fee338
      send_pid_on_socket (intermediate_pids_sockets[1]);
Packit Service fee338
      close (intermediate_pids_sockets[1]);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* Child, in sandbox, privileged in the parent or in the user namespace (if --unshare-user).
Packit Service fee338
   *
Packit Service fee338
   * Note that for user namespaces we run as euid 0 during clone(), so
Packit Service fee338
   * the child user namespace is owned by euid 0., This means that the
Packit Service fee338
   * regular user namespace parent (with uid != 0) doesn't have any
Packit Service fee338
   * capabilities in it, which is nice as we can't exploit those. In
Packit Service fee338
   * particular the parent user namespace doesn't have CAP_PTRACE
Packit Service fee338
   * which would otherwise allow the parent to hijack of the child
Packit Service fee338
   * after this point.
Packit Service fee338
   *
Packit Service fee338
   * Unfortunately this also means you can't ptrace the final
Packit Service fee338
   * sandboxed process from outside the sandbox either.
Packit Service fee338
   */
Packit Service fee338
Packit Service fee338
  if (opt_info_fd != -1)
Packit Service fee338
    close (opt_info_fd);
Packit Service fee338
Packit Service fee338
  if (opt_json_status_fd != -1)
Packit Service fee338
    close (opt_json_status_fd);
Packit Service fee338
Packit Service fee338
  /* Wait for the parent to init uid/gid maps and drop caps */
Packit Service fee338
  res = read (child_wait_fd, &val, 8);
Packit Service fee338
  close (child_wait_fd);
Packit Service fee338
Packit Service fee338
  /* At this point we can completely drop root uid, but retain the
Packit Service fee338
   * required permitted caps. This allow us to do full setup as
Packit Service fee338
   * the user uid, which makes e.g. fuse access work.
Packit Service fee338
   */
Packit Service fee338
  switch_to_user_with_privs ();
Packit Service fee338
Packit Service fee338
  if (opt_unshare_net)
Packit Service fee338
    loopback_setup (); /* Will exit if unsuccessful */
Packit Service fee338
Packit Service fee338
  ns_uid = opt_sandbox_uid;
Packit Service fee338
  ns_gid = opt_sandbox_gid;
Packit Service fee338
  if (!is_privileged && opt_unshare_user && opt_userns_block_fd == -1)
Packit Service fee338
    {
Packit Service fee338
      /* In the unprivileged case we have to write the uid/gid maps in
Packit Service fee338
       * the child, because we have no caps in the parent */
Packit Service fee338
Packit Service fee338
      if (opt_needs_devpts)
Packit Service fee338
        {
Packit Service fee338
          /* This is a bit hacky, but we need to first map the real uid/gid to
Packit Service fee338
             0, otherwise we can't mount the devpts filesystem because root is
Packit Service fee338
             not mapped. Later we will create another child user namespace and
Packit Service fee338
             map back to the real uid */
Packit Service fee338
          ns_uid = 0;
Packit Service fee338
          ns_gid = 0;
Packit Service fee338
        }
Packit Service fee338
Packit Service fee338
      write_uid_gid_map (ns_uid, real_uid,
Packit Service fee338
                         ns_gid, real_gid,
Packit Service fee338
                         -1, TRUE, FALSE);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  old_umask = umask (0);
Packit Service fee338
Packit Service fee338
  /* Need to do this before the chroot, but after we're the real uid */
Packit Service fee338
  resolve_symlinks_in_ops ();
Packit Service fee338
Packit Service fee338
  /* Mark everything as slave, so that we still
Packit Service fee338
   * receive mounts from the real root, but don't
Packit Service fee338
   * propagate mounts to the real root. */
Packit Service fee338
  if (mount (NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
Packit Service fee338
    die_with_error ("Failed to make / slave");
Packit Service fee338
Packit Service fee338
  /* Create a tmpfs which we will use as / in the namespace */
Packit Service fee338
  if (mount ("tmpfs", base_path, "tmpfs", MS_NODEV | MS_NOSUID, NULL) != 0)
Packit Service fee338
    die_with_error ("Failed to mount tmpfs");
Packit Service fee338
Packit Service fee338
  old_cwd = get_current_dir_name ();
Packit Service fee338
Packit Service fee338
  /* Chdir to the new root tmpfs mount. This will be the CWD during
Packit Service fee338
     the entire setup. Access old or new root via "oldroot" and "newroot". */
Packit Service fee338
  if (chdir (base_path) != 0)
Packit Service fee338
    die_with_error ("chdir base_path");
Packit Service fee338
Packit Service fee338
  /* We create a subdir "$base_path/newroot" for the new root, that
Packit Service fee338
   * way we can pivot_root to base_path, and put the old root at
Packit Service fee338
   * "$base_path/oldroot". This avoids problems accessing the oldroot
Packit Service fee338
   * dir if the user requested to bind mount something over / (or
Packit Service fee338
   * over /tmp, now that we use that for base_path). */
Packit Service fee338
Packit Service fee338
  if (mkdir ("newroot", 0755))
Packit Service fee338
    die_with_error ("Creating newroot failed");
Packit Service fee338
Packit Service fee338
  if (mount ("newroot", "newroot", NULL, MS_MGC_VAL | MS_BIND | MS_REC, NULL) < 0)
Packit Service fee338
    die_with_error ("setting up newroot bind");
Packit Service fee338
Packit Service fee338
  if (mkdir ("oldroot", 0755))
Packit Service fee338
    die_with_error ("Creating oldroot failed");
Packit Service fee338
Packit Service fee338
  if (pivot_root (base_path, "oldroot"))
Packit Service fee338
    die_with_error ("pivot_root");
Packit Service fee338
Packit Service fee338
  if (chdir ("/") != 0)
Packit Service fee338
    die_with_error ("chdir / (base path)");
Packit Service fee338
Packit Service fee338
  if (is_privileged)
Packit Service fee338
    {
Packit Service fee338
      pid_t child;
Packit Service fee338
      int privsep_sockets[2];
Packit Service fee338
Packit Service fee338
      if (socketpair (AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, privsep_sockets) != 0)
Packit Service fee338
        die_with_error ("Can't create privsep socket");
Packit Service fee338
Packit Service fee338
      child = fork ();
Packit Service fee338
      if (child == -1)
Packit Service fee338
        die_with_error ("Can't fork unprivileged helper");
Packit Service fee338
Packit Service fee338
      if (child == 0)
Packit Service fee338
        {
Packit Service fee338
          /* Unprivileged setup process */
Packit Service fee338
          drop_privs (FALSE);
Packit Service fee338
          close (privsep_sockets[0]);
Packit Service fee338
          setup_newroot (opt_unshare_pid, privsep_sockets[1]);
Packit Service fee338
          exit (0);
Packit Service fee338
        }
Packit Service fee338
      else
Packit Service fee338
        {
Packit Service fee338
          int status;
Packit Service fee338
          uint32_t buffer[2048];  /* 8k, but is int32 to guarantee nice alignment */
Packit Service fee338
          uint32_t op, flags;
Packit Service fee338
          const char *arg1, *arg2;
Packit Service fee338
          cleanup_fd int unpriv_socket = -1;
Packit Service fee338
Packit Service fee338
          unpriv_socket = privsep_sockets[0];
Packit Service fee338
          close (privsep_sockets[1]);
Packit Service fee338
Packit Service fee338
          do
Packit Service fee338
            {
Packit Service fee338
              op = read_priv_sec_op (unpriv_socket, buffer, sizeof (buffer),
Packit Service fee338
                                     &flags, &arg1, &arg2);
Packit Service fee338
              privileged_op (-1, op, flags, arg1, arg2);
Packit Service fee338
              if (write (unpriv_socket, buffer, 1) != 1)
Packit Service fee338
                die ("Can't write to op_socket");
Packit Service fee338
            }
Packit Service fee338
          while (op != PRIV_SEP_OP_DONE);
Packit Service fee338
Packit Service fee338
          waitpid (child, &status, 0);
Packit Service fee338
          /* Continue post setup */
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
  else
Packit Service fee338
    {
Packit Service fee338
      setup_newroot (opt_unshare_pid, -1);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  close_ops_fd ();
Packit Service fee338
Packit Service fee338
  /* The old root better be rprivate or we will send unmount events to the parent namespace */
Packit Service fee338
  if (mount ("oldroot", "oldroot", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
Packit Service fee338
    die_with_error ("Failed to make old root rprivate");
Packit Service fee338
Packit Service fee338
  if (umount2 ("oldroot", MNT_DETACH))
Packit Service fee338
    die_with_error ("unmount old root");
Packit Service fee338
Packit Service fee338
  /* This is our second pivot. It's like we're a Silicon Valley startup flush
Packit Service fee338
   * with cash but short on ideas!
Packit Service fee338
   *
Packit Service fee338
   * We're aiming to make /newroot the real root, and get rid of /oldroot. To do
Packit Service fee338
   * that we need a temporary place to store it before we can unmount it.
Packit Service fee338
   */
Packit Service fee338
  { cleanup_fd int oldrootfd = open ("/", O_DIRECTORY | O_RDONLY);
Packit Service fee338
    if (oldrootfd < 0)
Packit Service fee338
      die_with_error ("can't open /");
Packit Service fee338
    if (chdir ("/newroot") != 0)
Packit Service fee338
      die_with_error ("chdir /newroot");
Packit Service fee338
    /* While the documentation claims that put_old must be underneath
Packit Service fee338
     * new_root, it is perfectly fine to use the same directory as the
Packit Service fee338
     * kernel checks only if old_root is accessible from new_root.
Packit Service fee338
     *
Packit Service fee338
     * Both runc and LXC are using this "alternative" method for
Packit Service fee338
     * setting up the root of the container:
Packit Service fee338
     *
Packit Service fee338
     * https://github.com/opencontainers/runc/blob/master/libcontainer/rootfs_linux.go#L671
Packit Service fee338
     * https://github.com/lxc/lxc/blob/master/src/lxc/conf.c#L1121
Packit Service fee338
     */
Packit Service fee338
    if (pivot_root (".", ".") != 0)
Packit Service fee338
      die_with_error ("pivot_root(/newroot)");
Packit Service fee338
    if (fchdir (oldrootfd) < 0)
Packit Service fee338
      die_with_error ("fchdir to oldroot");
Packit Service fee338
    if (umount2 (".", MNT_DETACH) < 0)
Packit Service fee338
      die_with_error ("umount old root");
Packit Service fee338
    if (chdir ("/") != 0)
Packit Service fee338
      die_with_error ("chdir /");
Packit Service fee338
  }
Packit Service fee338
Packit Service fee338
  if (opt_userns2_fd > 0 && setns (opt_userns2_fd, CLONE_NEWUSER) != 0)
Packit Service fee338
    die_with_error ("Setting userns2 failed");
Packit Service fee338
Packit Service fee338
  if (opt_unshare_user &&
Packit Service fee338
      (ns_uid != opt_sandbox_uid || ns_gid != opt_sandbox_gid) &&
Packit Service fee338
      opt_userns_block_fd == -1)
Packit Service fee338
    {
Packit Service fee338
      /* Now that devpts is mounted and we've no need for mount
Packit Service fee338
         permissions we can create a new userspace and map our uid
Packit Service fee338
         1:1 */
Packit Service fee338
Packit Service fee338
      if (unshare (CLONE_NEWUSER))
Packit Service fee338
        die_with_error ("unshare user ns");
Packit Service fee338
Packit Service fee338
      write_uid_gid_map (opt_sandbox_uid, ns_uid,
Packit Service fee338
                         opt_sandbox_gid, ns_gid,
Packit Service fee338
                         -1, FALSE, FALSE);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* All privileged ops are done now, so drop caps we don't need */
Packit Service fee338
  drop_privs (!is_privileged);
Packit Service fee338
Packit Service fee338
  if (opt_block_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      char b[1];
Packit Service fee338
      (void) TEMP_FAILURE_RETRY (read (opt_block_fd, b, 1));
Packit Service fee338
      close (opt_block_fd);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (opt_seccomp_fd != -1)
Packit Service fee338
    {
Packit Service fee338
      seccomp_data = load_file_data (opt_seccomp_fd, &seccomp_len);
Packit Service fee338
      if (seccomp_data == NULL)
Packit Service fee338
        die_with_error ("Can't read seccomp data");
Packit Service fee338
Packit Service fee338
      if (seccomp_len % 8 != 0)
Packit Service fee338
        die ("Invalid seccomp data, must be multiple of 8");
Packit Service fee338
Packit Service fee338
      seccomp_prog.len = seccomp_len / 8;
Packit Service fee338
      seccomp_prog.filter = (struct sock_filter *) seccomp_data;
Packit Service fee338
Packit Service fee338
      close (opt_seccomp_fd);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  umask (old_umask);
Packit Service fee338
Packit Service fee338
  new_cwd = "/";
Packit Service fee338
  if (opt_chdir_path)
Packit Service fee338
    {
Packit Service fee338
      if (chdir (opt_chdir_path))
Packit Service fee338
        die_with_error ("Can't chdir to %s", opt_chdir_path);
Packit Service fee338
      new_cwd = opt_chdir_path;
Packit Service fee338
    }
Packit Service fee338
  else if (chdir (old_cwd) == 0)
Packit Service fee338
    {
Packit Service fee338
      /* If the old cwd is mapped in the sandbox, go there */
Packit Service fee338
      new_cwd = old_cwd;
Packit Service fee338
    }
Packit Service fee338
  else
Packit Service fee338
    {
Packit Service fee338
      /* If the old cwd is not mapped, go to home */
Packit Service fee338
      const char *home = getenv ("HOME");
Packit Service fee338
      if (home != NULL &&
Packit Service fee338
          chdir (home) == 0)
Packit Service fee338
        new_cwd = home;
Packit Service fee338
    }
Packit Service fee338
  xsetenv ("PWD", new_cwd, 1);
Packit Service fee338
  free (old_cwd);
Packit Service fee338
Packit Service fee338
  if (opt_new_session &&
Packit Service fee338
      setsid () == (pid_t) -1)
Packit Service fee338
    die_with_error ("setsid");
Packit Service fee338
Packit Service fee338
  if (label_exec (opt_exec_label) == -1)
Packit Service fee338
    die_with_error ("label_exec %s", argv[0]);
Packit Service fee338
Packit Service fee338
  __debug__ (("forking for child\n"));
Packit Service fee338
Packit Service fee338
  if (!opt_as_pid_1 && (opt_unshare_pid || lock_files != NULL || opt_sync_fd != -1))
Packit Service fee338
    {
Packit Service fee338
      /* We have to have a pid 1 in the pid namespace, because
Packit Service fee338
       * otherwise we'll get a bunch of zombies as nothing reaps
Packit Service fee338
       * them. Alternatively if we're using sync_fd or lock_files we
Packit Service fee338
       * need some process to own these.
Packit Service fee338
       */
Packit Service fee338
Packit Service fee338
      pid = fork ();
Packit Service fee338
      if (pid == -1)
Packit Service fee338
        die_with_error ("Can't fork for pid 1");
Packit Service fee338
Packit Service fee338
      if (pid != 0)
Packit Service fee338
        {
Packit Service fee338
          drop_all_caps (FALSE);
Packit Service fee338
Packit Service fee338
          /* Close fds in pid 1, except stdio and optionally event_fd
Packit Service fee338
             (for syncing pid 2 lifetime with monitor_child) and
Packit Service fee338
             opt_sync_fd (for syncing sandbox lifetime with outside
Packit Service fee338
             process).
Packit Service fee338
             Any other fds will been passed on to the child though. */
Packit Service fee338
          {
Packit Service fee338
            int dont_close[3];
Packit Service fee338
            int j = 0;
Packit Service fee338
            if (event_fd != -1)
Packit Service fee338
              dont_close[j++] = event_fd;
Packit Service fee338
            if (opt_sync_fd != -1)
Packit Service fee338
              dont_close[j++] = opt_sync_fd;
Packit Service fee338
            dont_close[j++] = -1;
Packit Service fee338
            fdwalk (proc_fd, close_extra_fds, dont_close);
Packit Service fee338
          }
Packit Service fee338
Packit Service fee338
          return do_init (event_fd, pid, seccomp_data != NULL ? &seccomp_prog : NULL);
Packit Service fee338
        }
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  __debug__ (("launch executable %s\n", argv[0]));
Packit Service fee338
Packit Service fee338
  if (proc_fd != -1)
Packit Service fee338
    close (proc_fd);
Packit Service fee338
Packit Service fee338
  /* If we are using --as-pid-1 leak the sync fd into the sandbox.
Packit Service fee338
     --sync-fd will still work unless the container process doesn't close this file.  */
Packit Service fee338
  if (!opt_as_pid_1)
Packit Service fee338
    {
Packit Service fee338
      if (opt_sync_fd != -1)
Packit Service fee338
        close (opt_sync_fd);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  /* We want sigchild in the child */
Packit Service fee338
  unblock_sigchild ();
Packit Service fee338
Packit Service fee338
  /* Optionally bind our lifecycle */
Packit Service fee338
  handle_die_with_parent ();
Packit Service fee338
Packit Service fee338
  if (!is_privileged)
Packit Service fee338
    set_ambient_capabilities ();
Packit Service fee338
Packit Service fee338
  /* Should be the last thing before execve() so that filters don't
Packit Service fee338
   * need to handle anything above */
Packit Service fee338
  if (seccomp_data != NULL &&
Packit Service fee338
      prctl (PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &seccomp_prog) != 0)
Packit Service fee338
    die_with_error ("prctl(PR_SET_SECCOMP)");
Packit Service fee338
Packit Service fee338
  if (setup_finished_pipe[1] != -1)
Packit Service fee338
    {
Packit Service fee338
      char data = 0;
Packit Service fee338
      res = write_to_fd (setup_finished_pipe[1], &data, 1);
Packit Service fee338
      /* Ignore res, if e.g. the parent died and closed setup_finished_pipe[0]
Packit Service fee338
         we don't want to error out here */
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  if (execvp (argv[0], argv) == -1)
Packit Service fee338
    {
Packit Service fee338
      if (setup_finished_pipe[1] != -1)
Packit Service fee338
        {
Packit Service fee338
          int saved_errno = errno;
Packit Service fee338
          char data = 0;
Packit Service fee338
          res = write_to_fd (setup_finished_pipe[1], &data, 1);
Packit Service fee338
          errno = saved_errno;
Packit Service fee338
          /* Ignore res, if e.g. the parent died and closed setup_finished_pipe[0]
Packit Service fee338
             we don't want to error out here */
Packit Service fee338
        }
Packit Service fee338
      die_with_error ("execvp %s", argv[0]);
Packit Service fee338
    }
Packit Service fee338
Packit Service fee338
  return 0;
Packit Service fee338
}