diff --git a/src/core/main.c b/src/core/main.c index c83249a..b8c1e56 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -136,7 +136,8 @@ static EmergencyAction arg_cad_burst_action; static CPUSet arg_cpu_affinity; static NUMAPolicy arg_numa_policy; -static int parse_configuration(void); +static int parse_configuration(const struct rlimit *saved_rlimit_nofile, + const struct rlimit *saved_rlimit_memlock); _noreturn_ static void freeze_or_reboot(void) { @@ -1149,25 +1150,6 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { int r, nr; - assert(saved_rlimit); - - /* Save the original RLIMIT_NOFILE so that we can reset it - * later when transitioning from the initrd to the main - * systemd or suchlike. */ - if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0) - return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); - - /* Make sure forked processes get the default kernel setting */ - if (!arg_default_rlimit[RLIMIT_NOFILE]) { - struct rlimit *rl; - - rl = newdup(struct rlimit, saved_rlimit, 1); - if (!rl) - return log_oom(); - - arg_default_rlimit[RLIMIT_NOFILE] = rl; - } - /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */ nr = read_nr_open(); r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr)); @@ -1180,16 +1162,12 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { static int bump_rlimit_memlock(struct rlimit *saved_rlimit) { int r; - assert(saved_rlimit); assert(getuid() == 0); /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's * bump the value high enough for the root user. */ - if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0) - return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); - r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL)); if (r < 0) return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m"); @@ -1651,6 +1629,8 @@ static void do_reexecute( static int invoke_main_loop( Manager *m, + const struct rlimit *saved_rlimit_nofile, + const struct rlimit *saved_rlimit_memlock, bool *ret_reexecute, int *ret_retval, /* Return parameters relevant for shutting down */ const char **ret_shutdown_verb, /* … */ @@ -1662,6 +1642,8 @@ static int invoke_main_loop( int r; assert(m); + assert(saved_rlimit_nofile); + assert(saved_rlimit_memlock); assert(ret_reexecute); assert(ret_retval); assert(ret_shutdown_verb); @@ -1691,7 +1673,7 @@ static int invoke_main_loop( saved_log_level = m->log_level_overridden ? log_get_max_level() : -1; saved_log_target = m->log_target_overridden ? log_get_target() : _LOG_TARGET_INVALID; - (void) parse_configuration(); + (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock); set_manager_defaults(m); @@ -1983,6 +1965,80 @@ static int do_queue_default_job( return 0; } +static void save_rlimits(struct rlimit *saved_rlimit_nofile, + struct rlimit *saved_rlimit_memlock) { + + assert(saved_rlimit_nofile); + assert(saved_rlimit_memlock); + + if (getrlimit(RLIMIT_NOFILE, saved_rlimit_nofile) < 0) + log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m"); + + if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit_memlock) < 0) + log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m"); +} + +static void fallback_rlimit_nofile(const struct rlimit *saved_rlimit_nofile) { + struct rlimit *rl; + + if (arg_default_rlimit[RLIMIT_NOFILE]) + return; + + /* Make sure forked processes get limits based on the original kernel setting */ + + rl = newdup(struct rlimit, saved_rlimit_nofile, 1); + if (!rl) { + log_oom(); + return; + } + + /* Bump the hard limit for system services to a substantially higher value. The default + * hard limit current kernels set is pretty low (4K), mostly for historical + * reasons. According to kernel developers, the fd handling in recent kernels has been + * optimized substantially enough, so that we can bump the limit now, without paying too + * high a price in memory or performance. Note however that we only bump the hard limit, + * not the soft limit. That's because select() works the way it works, and chokes on fds + * >= 1024. If we'd bump the soft limit globally, it might accidentally happen to + * unexpecting programs that they get fds higher than what they can process using + * select(). By only bumping the hard limit but leaving the low limit as it is we avoid + * this pitfall: programs that are written by folks aware of the select() problem in mind + * (and thus use poll()/epoll instead of select(), the way everybody should) can + * explicitly opt into high fds by bumping their soft limit beyond 1024, to the hard limit + * we pass. */ + if (arg_system) { + int nr; + + /* Get the underlying absolute limit the kernel enforces */ + nr = read_nr_open(); + + rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE)); + } + + /* If for some reason we were invoked with a soft limit above 1024 (which should never + * happen!, but who knows what we get passed in from pam_limit when invoked as --user + * instance), then lower what we pass on to not confuse our children */ + rl->rlim_cur = MIN(rl->rlim_cur, (rlim_t) FD_SETSIZE); + + arg_default_rlimit[RLIMIT_NOFILE] = rl; +} + +static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) { + struct rlimit *rl; + + /* Pass the original value down to invoked processes */ + + if (arg_default_rlimit[RLIMIT_MEMLOCK]) + return; + + rl = newdup(struct rlimit, saved_rlimit_memlock, 1); + if (!rl) { + log_oom(); + return; + } + + arg_default_rlimit[RLIMIT_MEMLOCK] = rl; +} + static void reset_arguments(void) { /* Frees/resets arg_* variables, with a few exceptions commented below. */ @@ -2040,9 +2096,13 @@ static void reset_arguments(void) { numa_policy_reset(&arg_numa_policy); } -static int parse_configuration(void) { +static int parse_configuration(const struct rlimit *saved_rlimit_nofile, + const struct rlimit *saved_rlimit_memlock) { int r; + assert(saved_rlimit_nofile); + assert(saved_rlimit_memlock); + arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U); /* Assign configuration defaults */ @@ -2058,18 +2118,29 @@ static int parse_configuration(void) { log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m"); } + /* Initialize some default rlimits for services if they haven't been configured */ + fallback_rlimit_nofile(saved_rlimit_nofile); + fallback_rlimit_memlock(saved_rlimit_memlock); + /* Note that this also parses bits from the kernel command line, including "debug". */ log_parse_environment(); return 0; } -static int load_configuration(int argc, char **argv, const char **ret_error_message) { +static int load_configuration( + int argc, + char **argv, + const struct rlimit *saved_rlimit_nofile, + const struct rlimit *saved_rlimit_memlock, + const char **ret_error_message) { int r; + assert(saved_rlimit_nofile); + assert(saved_rlimit_memlock); assert(ret_error_message); - (void) parse_configuration(); + (void) parse_configuration(saved_rlimit_nofile, saved_rlimit_memlock); r = parse_argv(argc, argv); if (r < 0) { @@ -2403,11 +2474,15 @@ int main(int argc, char *argv[]) { } } + /* Save the original RLIMIT_NOFILE/RLIMIT_MEMLOCK so that we can reset it later when + * transitioning from the initrd to the main systemd or suchlike. */ + save_rlimits(&saved_rlimit_nofile, &saved_rlimit_memlock); + /* Reset all signal handlers. */ (void) reset_all_signal_handlers(); (void) ignore_signals(SIGNALS_IGNORE, -1); - r = load_configuration(argc, argv, &error_message); + r = load_configuration(argc, argv, &saved_rlimit_nofile, &saved_rlimit_memlock, &error_message); if (r < 0) goto finish; @@ -2522,6 +2597,8 @@ int main(int argc, char *argv[]) { } (void) invoke_main_loop(m, + &saved_rlimit_nofile, + &saved_rlimit_memlock, &reexecute, &retval, &shutdown_verb,