summaryrefslogtreecommitdiff
path: root/src/core/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/main.c')
-rw-r--r--src/core/main.c464
1 files changed, 308 insertions, 156 deletions
diff --git a/src/core/main.c b/src/core/main.c
index 44dd8348be..839dc062ff 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -28,6 +28,7 @@
#include "bus-error.h"
#include "bus-util.h"
#include "capability-util.h"
+#include "cgroup-util.h"
#include "clock-util.h"
#include "conf-parser.h"
#include "cpu-set-util.h"
@@ -57,6 +58,7 @@
#include "pager.h"
#include "parse-util.h"
#include "path-util.h"
+#include "pretty-print.h"
#include "proc-cmdline.h"
#include "process-util.h"
#include "raw-clone.h"
@@ -73,6 +75,7 @@
#include "stdio-util.h"
#include "strv.h"
#include "switch-root.h"
+#include "sysctl-util.h"
#include "terminal-util.h"
#include "umask-util.h"
#include "user-util.h"
@@ -95,11 +98,10 @@ static int arg_crash_chvt = -1;
static bool arg_crash_shell = false;
static bool arg_crash_reboot = false;
static char *arg_confirm_spawn = NULL;
-static ShowStatus arg_show_status = _SHOW_STATUS_UNSET;
+static ShowStatus arg_show_status = _SHOW_STATUS_INVALID;
static bool arg_switched_root = false;
-static bool arg_no_pager = false;
+static PagerFlags arg_pager_flags = 0;
static bool arg_service_watchdogs = true;
-static char ***arg_join_controllers = NULL;
static ExecOutput arg_default_std_output = EXEC_OUTPUT_JOURNAL;
static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
static usec_t arg_default_restart_usec = DEFAULT_RESTART_USEC;
@@ -109,6 +111,7 @@ static usec_t arg_default_start_limit_interval = DEFAULT_START_LIMIT_INTERVAL;
static unsigned arg_default_start_limit_burst = DEFAULT_START_LIMIT_BURST;
static usec_t arg_runtime_watchdog = 0;
static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
+static char *arg_early_core_pattern = NULL;
static char *arg_watchdog_device = NULL;
static char **arg_default_environment = NULL;
static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {};
@@ -118,7 +121,7 @@ static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
-static bool arg_default_cpu_accounting = false;
+static int arg_default_cpu_accounting = -1;
static bool arg_default_io_accounting = false;
static bool arg_default_ip_accounting = false;
static bool arg_default_blockio_accounting = false;
@@ -128,7 +131,14 @@ static uint64_t arg_default_tasks_max = UINT64_MAX;
static sd_id128_t arg_machine_id = {};
static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
-_noreturn_ static void freeze_or_reboot(void) {
+_noreturn_ static void freeze_or_exit_or_reboot(void) {
+
+ /* If we are running in a contianer, let's prefer exiting, after all we can propagate an exit code to the
+ * container manager, and thus inform it that something went wrong. */
+ if (detect_container() > 0) {
+ log_emergency("Exiting PID 1...");
+ exit(EXIT_EXCEPTION);
+ }
if (arg_crash_reboot) {
log_notice("Rebooting in 10s...");
@@ -183,7 +193,7 @@ _noreturn_ static void crash(int sig) {
(void) kill(pid, sig); /* raise() would kill the parent */
assert_not_reached("We shouldn't be here...");
- _exit(EXIT_FAILURE);
+ _exit(EXIT_EXCEPTION);
} else {
siginfo_t status;
int r;
@@ -226,17 +236,18 @@ _noreturn_ static void crash(int sig) {
else if (pid == 0) {
(void) setsid();
(void) make_console_stdio();
+ (void) rlimit_nofile_safe();
(void) execle("/bin/sh", "/bin/sh", NULL, environ);
log_emergency_errno(errno, "execle() failed: %m");
- _exit(EXIT_FAILURE);
+ _exit(EXIT_EXCEPTION);
} else {
log_info("Spawned crash shell as PID "PID_FMT".", pid);
(void) wait_for_terminate(pid, NULL);
}
}
- freeze_or_reboot();
+ freeze_or_exit_or_reboot();
}
static void install_crash_handler(void) {
@@ -347,22 +358,35 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse dump core switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse dump core switch %s, ignoring: %m", value);
else
arg_dump_core = r;
+ } else if (proc_cmdline_key_streq(key, "systemd.early_core_pattern")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (path_is_absolute(value))
+ (void) parse_path_argument_and_warn(value, false, &arg_early_core_pattern);
+ else
+ log_warning("Specified core pattern '%s' is not an absolute path, ignoring.", value);
+
} else if (proc_cmdline_key_streq(key, "systemd.crash_chvt")) {
if (!value)
arg_crash_chvt = 0; /* turn on */
- else if (parse_crash_chvt(value) < 0)
- log_warning("Failed to parse crash chvt switch %s. Ignoring.", value);
+ else {
+ r = parse_crash_chvt(value);
+ if (r < 0)
+ log_warning_errno(r, "Failed to parse crash chvt switch %s, ignoring: %m", value);
+ }
} else if (proc_cmdline_key_streq(key, "systemd.crash_shell")) {
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse crash shell switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse crash shell switch %s, ignoring: %m", value);
else
arg_crash_shell = r;
@@ -370,7 +394,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse crash reboot switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
else
arg_crash_reboot = r;
@@ -379,17 +403,15 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = parse_confirm_spawn(value, &s);
if (r < 0)
- log_warning_errno(r, "Failed to parse confirm_spawn switch %s. Ignoring.", value);
- else {
- free(arg_confirm_spawn);
- arg_confirm_spawn = s;
- }
+ log_warning_errno(r, "Failed to parse confirm_spawn switch %s, ignoring: %m", value);
+ else
+ free_and_replace(arg_confirm_spawn, s);
} else if (proc_cmdline_key_streq(key, "systemd.service_watchdogs")) {
r = value ? parse_boolean(value) : true;
if (r < 0)
- log_warning("Failed to parse service watchdog switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse service watchdog switch %s, ignoring: %m", value);
else
arg_service_watchdogs = r;
@@ -398,7 +420,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
if (value) {
r = parse_show_status(value, &arg_show_status);
if (r < 0)
- log_warning("Failed to parse show status switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse show status switch %s, ignoring: %m", value);
} else
arg_show_status = SHOW_STATUS_YES;
@@ -409,7 +431,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = exec_output_from_string(value);
if (r < 0)
- log_warning("Failed to parse default standard output switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse default standard output switch %s, ignoring: %m", value);
else
arg_default_std_output = r;
@@ -420,7 +442,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = exec_output_from_string(value);
if (r < 0)
- log_warning("Failed to parse default standard error switch %s. Ignoring.", value);
+ log_warning_errno(r, "Failed to parse default standard error switch %s, ignoring: %m", value);
else
arg_default_std_error = r;
@@ -447,7 +469,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = set_machine_id(value);
if (r < 0)
- log_warning("MachineID '%s' is not valid. Ignoring.", value);
+ log_warning_errno(r, "MachineID '%s' is not valid, ignoring: %m", value);
} else if (proc_cmdline_key_streq(key, "systemd.default_timeout_start_sec")) {
@@ -456,7 +478,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
r = parse_sec(value, &arg_default_timeout_start_usec);
if (r < 0)
- log_warning_errno(r, "Failed to parse default start timeout: %s, ignoring.", value);
+ log_warning_errno(r, "Failed to parse default start timeout '%s', ignoring: %m", value);
if (arg_default_timeout_start_usec <= 0)
arg_default_timeout_start_usec = USEC_INFINITY;
@@ -466,11 +488,11 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
if (proc_cmdline_value_missing(key, value))
return 0;
- parse_path_argument_and_warn(value, false, &arg_watchdog_device);
+ (void) parse_path_argument_and_warn(value, false, &arg_watchdog_device);
} else if (streq(key, "quiet") && !value) {
- if (arg_show_status == _SHOW_STATUS_UNSET)
+ if (arg_show_status == _SHOW_STATUS_INVALID)
arg_show_status = SHOW_STATUS_AUTO;
} else if (streq(key, "debug") && !value) {
@@ -604,8 +626,8 @@ static int config_parse_output_restricted(
return 0;
}
- if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE)) {
- log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file: are not supported as defaults, ignoring: %s", rvalue);
+ if (IN_SET(t, EXEC_OUTPUT_SOCKET, EXEC_OUTPUT_NAMED_FD, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND)) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Standard output types socket, fd:, file:, append: are not supported as defaults, ignoring: %s", rvalue);
return 0;
}
@@ -654,7 +676,7 @@ static int parse_config_file(void) {
{ "Manager", "CrashReboot", config_parse_bool, 0, &arg_crash_reboot },
{ "Manager", "ShowStatus", config_parse_show_status, 0, &arg_show_status },
{ "Manager", "CPUAffinity", config_parse_cpu_affinity2, 0, NULL },
- { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
+ { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
{ "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_shutdown_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
@@ -690,7 +712,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
{ "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
{ "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
- { "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
+ { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
{ "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
@@ -739,7 +761,14 @@ static void set_manager_defaults(Manager *m) {
m->default_restart_usec = arg_default_restart_usec;
m->default_start_limit_interval = arg_default_start_limit_interval;
m->default_start_limit_burst = arg_default_start_limit_burst;
- m->default_cpu_accounting = arg_default_cpu_accounting;
+
+ /* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
+ * controller to be enabled, so the default is to enable it unless we got told otherwise. */
+ if (arg_default_cpu_accounting >= 0)
+ m->default_cpu_accounting = arg_default_cpu_accounting;
+ else
+ m->default_cpu_accounting = cpu_accounting_is_cheap();
+
m->default_io_accounting = arg_default_io_accounting;
m->default_ip_accounting = arg_default_ip_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting;
@@ -747,8 +776,10 @@ static void set_manager_defaults(Manager *m) {
m->default_tasks_accounting = arg_default_tasks_accounting;
m->default_tasks_max = arg_default_tasks_max;
- manager_set_default_rlimits(m, arg_default_rlimit);
- manager_environment_add(m, NULL, arg_default_environment);
+ (void) manager_set_default_rlimits(m, arg_default_rlimit);
+
+ (void) manager_default_environment(m);
+ (void) manager_transient_environment_add(m, arg_default_environment);
}
static void set_manager_settings(Manager *m) {
@@ -838,19 +869,15 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_LOG_LEVEL:
r = log_set_max_level_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log level %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log level \"%s\": %m", optarg);
break;
case ARG_LOG_TARGET:
r = log_set_target_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log target %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log target \"%s\": %m", optarg);
break;
@@ -858,10 +885,9 @@ static int parse_argv(int argc, char *argv[]) {
if (optarg) {
r = log_show_color_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log color setting %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log color setting \"%s\": %m",
+ optarg);
} else
log_show_color(true);
@@ -870,10 +896,9 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_LOG_LOCATION:
if (optarg) {
r = log_show_location_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse log location setting %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse log location setting \"%s\": %m",
+ optarg);
} else
log_show_location(true);
@@ -881,26 +906,24 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_DEFAULT_STD_OUTPUT:
r = exec_output_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse default standard output setting %s.", optarg);
- return r;
- } else
- arg_default_std_output = r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard output setting \"%s\": %m",
+ optarg);
+ arg_default_std_output = r;
break;
case ARG_DEFAULT_STD_ERROR:
r = exec_output_from_string(optarg);
- if (r < 0) {
- log_error("Failed to parse default standard error output setting %s.", optarg);
- return r;
- } else
- arg_default_std_error = r;
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse default standard error output setting \"%s\": %m",
+ optarg);
+ arg_default_std_error = r;
break;
case ARG_UNIT:
r = free_and_strdup(&arg_default_unit, optarg);
if (r < 0)
- return log_error_errno(r, "Failed to set default unit %s: %m", optarg);
+ return log_error_errno(r, "Failed to set default unit \"%s\": %m", optarg);
break;
@@ -917,7 +940,7 @@ static int parse_argv(int argc, char *argv[]) {
break;
case ARG_NO_PAGER:
- arg_no_pager = true;
+ arg_pager_flags |= PAGER_DISABLE;
break;
case ARG_VERSION:
@@ -938,7 +961,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse dump core boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse dump core boolean: \"%s\": %m",
+ optarg);
arg_dump_core = r;
}
break;
@@ -946,7 +970,8 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_CRASH_CHVT:
r = parse_crash_chvt(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash virtual terminal index: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash virtual terminal index: \"%s\": %m",
+ optarg);
break;
case ARG_CRASH_SHELL:
@@ -955,7 +980,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
arg_crash_shell = r;
}
break;
@@ -966,7 +992,8 @@ static int parse_argv(int argc, char *argv[]) {
else {
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse crash shell boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse crash shell boolean: \"%s\": %m",
+ optarg);
arg_crash_reboot = r;
}
break;
@@ -976,23 +1003,24 @@ static int parse_argv(int argc, char *argv[]) {
r = parse_confirm_spawn(optarg, &arg_confirm_spawn);
if (r < 0)
- return log_error_errno(r, "Failed to parse confirm spawn option: %m");
+ return log_error_errno(r, "Failed to parse confirm spawn option: \"%s\": %m",
+ optarg);
break;
case ARG_SERVICE_WATCHDOGS:
r = parse_boolean(optarg);
if (r < 0)
- return log_error_errno(r, "Failed to parse service watchdogs boolean: %s", optarg);
+ return log_error_errno(r, "Failed to parse service watchdogs boolean: \"%s\": %m",
+ optarg);
arg_service_watchdogs = r;
break;
case ARG_SHOW_STATUS:
if (optarg) {
r = parse_show_status(optarg, &arg_show_status);
- if (r < 0) {
- log_error("Failed to parse show status boolean %s.", optarg);
- return r;
- }
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse show status boolean: \"%s\": %m",
+ optarg);
} else
arg_show_status = SHOW_STATUS_YES;
break;
@@ -1002,16 +1030,18 @@ static int parse_argv(int argc, char *argv[]) {
FILE *f;
r = safe_atoi(optarg, &fd);
- if (r < 0 || fd < 0) {
- log_error("Failed to parse deserialize option %s.", optarg);
- return -EINVAL;
- }
+ if (r < 0)
+ log_error_errno(r, "Failed to parse deserialize option \"%s\": %m", optarg);
+ if (fd < 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Invalid deserialize fd: %d",
+ fd);
(void) fd_cloexec(fd, true);
f = fdopen(fd, "r");
if (!f)
- return log_error_errno(errno, "Failed to open serialization fd: %m");
+ return log_error_errno(errno, "Failed to open serialization fd %d: %m", fd);
safe_fclose(arg_serialization);
arg_serialization = f;
@@ -1026,7 +1056,7 @@ static int parse_argv(int argc, char *argv[]) {
case ARG_MACHINE_ID:
r = set_machine_id(optarg);
if (r < 0)
- return log_error_errno(r, "MachineID '%s' is not valid.", optarg);
+ return log_error_errno(r, "MachineID '%s' is not valid: %m", optarg);
break;
case 'h':
@@ -1059,14 +1089,20 @@ static int parse_argv(int argc, char *argv[]) {
/* Hmm, when we aren't run as init system
* let's complain about excess arguments */
- log_error("Excess arguments.");
- return -EINVAL;
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "Excess arguments.");
}
return 0;
}
static int help(void) {
+ _cleanup_free_ char *link = NULL;
+ int r;
+
+ r = terminal_urlify_man("systemd", "1", &link);
+ if (r < 0)
+ return log_oom();
printf("%s [OPTIONS...]\n\n"
"Starts up and maintains the system or user services.\n\n"
@@ -1090,20 +1126,28 @@ static int help(void) {
" --log-color[=BOOL] Highlight important log messages\n"
" --log-location[=BOOL] Include code location in log messages\n"
" --default-standard-output= Set default standard output for services\n"
- " --default-standard-error= Set default standard error output for services\n",
- program_invocation_short_name);
+ " --default-standard-error= Set default standard error output for services\n"
+ "\nSee the %s for details.\n"
+ , program_invocation_short_name
+ , link
+ );
return 0;
}
-static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching_root) {
+static int prepare_reexecute(
+ Manager *m,
+ FILE **ret_f,
+ FDSet **ret_fds,
+ bool switching_root) {
+
_cleanup_fdset_free_ FDSet *fds = NULL;
_cleanup_fclose_ FILE *f = NULL;
int r;
assert(m);
- assert(_f);
- assert(_fds);
+ assert(ret_f);
+ assert(ret_fds);
r = manager_open_serialization(m, &f);
if (r < 0)
@@ -1119,7 +1163,7 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching
r = manager_serialize(m, f, fds, switching_root);
if (r < 0)
- return log_error_errno(r, "Failed to serialize state: %m");
+ return r;
if (fseeko(f, 0, SEEK_SET) == (off_t) -1)
return log_error_errno(errno, "Failed to rewind serialization fd: %m");
@@ -1132,24 +1176,108 @@ static int prepare_reexecute(Manager *m, FILE **_f, FDSet **_fds, bool switching
if (r < 0)
return log_error_errno(r, "Failed to disable O_CLOEXEC for serialization fds: %m");
- *_f = TAKE_PTR(f);
- *_fds = TAKE_PTR(fds);
+ *ret_f = TAKE_PTR(f);
+ *ret_fds = TAKE_PTR(fds);
return 0;
}
+static void bump_file_max_and_nr_open(void) {
+
+ /* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
+ * descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
+ * them and limiting them in another two layers of limits is unnecessary and just complicates things. This
+ * function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
+ * hard) the only ones that really matter. */
+
+#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
+ _cleanup_free_ char *t = NULL;
+ int r;
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX
+ /* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as
+ * "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */
+ if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/file-max", t);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
+#endif
+
+#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN
+ t = mfree(t);
+#endif
+
+#if BUMP_PROC_SYS_FS_NR_OPEN
+ int v = INT_MAX;
+
+ /* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
+ * are. The expression by which the maximum is determined is dependent on the architecture, and is something we
+ * don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
+ * the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
+ * INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
+ * APIs are kernel APIs, so what do can we do... 🤯 */
+
+ for (;;) {
+ int k;
+
+ v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
+ if (v < 1024) {
+ log_warning("Can't bump fs.nr_open, value too small.");
+ break;
+ }
+
+ k = read_nr_open();
+ if (k < 0) {
+ log_error_errno(k, "Failed to read fs.nr_open: %m");
+ break;
+ }
+ if (k >= v) { /* Already larger */
+ log_debug("Skipping bump, value is already larger.");
+ break;
+ }
+
+ if (asprintf(&t, "%i\n", v) < 0) {
+ log_oom();
+ return;
+ }
+
+ r = sysctl_write("fs/nr_open", t);
+ t = mfree(t);
+ if (r == -EINVAL) {
+ log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
+ v /= 2;
+ continue;
+ }
+ if (r < 0) {
+ log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
+ break;
+ }
+
+ log_debug("Successfully bumped fs.nr_open to %i", v);
+ break;
+ }
+#endif
+}
+
static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
int r, nr;
assert(saved_rlimit);
- /* Save the original RLIMIT_NOFILE so that we can reset it
- * later when transitioning from the initrd to the main
+ /* Save the original RLIMIT_NOFILE so that we can reset it later when transitioning from the initrd to the main
* systemd or suchlike. */
if (getrlimit(RLIMIT_NOFILE, saved_rlimit) < 0)
return log_warning_errno(errno, "Reading RLIMIT_NOFILE failed, ignoring: %m");
- /* Make sure forked processes get the default kernel setting */
+ /* Get the underlying absolute limit the kernel enforces */
+ nr = read_nr_open();
+
+ /* Make sure forked processes get limits based on the original kernel setting */
if (!arg_default_rlimit[RLIMIT_NOFILE]) {
struct rlimit *rl;
@@ -1157,11 +1285,25 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
if (!rl)
return log_oom();
+ /* Bump the hard limit for system services to a substantially higher value. The default hard limit
+ * current kernels set is pretty low (4K), mostly for historical reasons. According to kernel
+ * developers, the fd handling in recent kernels has been optimized substantially enough, so that we
+ * can bump the limit now, without paying too high a price in memory or performance. Note however that
+ * we only bump the hard limit, not the soft limit. That's because select() works the way it works, and
+ * chokes on fds >= 1024. If we'd bump the soft limit globally, it might accidentally happen to
+ * unexpecting programs that they get fds higher than what they can process using select(). By only
+ * bumping the hard limit but leaving the low limit as it is we avoid this pitfall: programs that are
+ * written by folks aware of the select() problem in mind (and thus use poll()/epoll instead of
+ * select(), the way everybody should) can explicitly opt into high fds by bumping their soft limit
+ * beyond 1024, to the hard limit we pass. */
+ if (arg_system)
+ rl->rlim_max = MIN((rlim_t) nr, MAX(rl->rlim_max, (rlim_t) HIGH_RLIMIT_NOFILE));
+
arg_default_rlimit[RLIMIT_NOFILE] = rl;
}
- /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows */
- nr = read_nr_open();
+ /* Bump up the resource limit for ourselves substantially, all the way to the maximum the kernel allows, for
+ * both hard and soft. */
r = setrlimit_closest(RLIMIT_NOFILE, &RLIMIT_MAKE_CONST(nr));
if (r < 0)
return log_warning_errno(r, "Setting RLIMIT_NOFILE failed, ignoring: %m");
@@ -1173,16 +1315,15 @@ static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
int r;
assert(saved_rlimit);
- assert(getuid() == 0);
- /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
- * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
- * bump the value high enough for the root user. */
+ /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even if we have CAP_IPC_LOCK which should
+ * normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's bump
+ * the value high enough for our user. */
if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
- r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+ r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(HIGH_RLIMIT_MEMLOCK));
if (r < 0)
return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
@@ -1219,7 +1360,7 @@ static int status_welcome(void) {
_cleanup_free_ char *pretty_name = NULL, *ansi_color = NULL;
int r;
- if (arg_show_status <= 0)
+ if (IN_SET(arg_show_status, SHOW_STATUS_NO, SHOW_STATUS_AUTO))
return 0;
r = parse_os_release(NULL,
@@ -1231,12 +1372,12 @@ static int status_welcome(void) {
"Failed to read os-release file, ignoring: %m");
if (log_get_show_color())
- return status_printf(NULL, false, false,
+ return status_printf(NULL, 0,
"\nWelcome to \x1B[%sm%s\x1B[0m!\n",
isempty(ansi_color) ? "1" : ansi_color,
isempty(pretty_name) ? "Linux" : pretty_name);
else
- return status_printf(NULL, false, false,
+ return status_printf(NULL, 0,
"\nWelcome to %s!\n",
isempty(pretty_name) ? "Linux" : pretty_name);
}
@@ -1268,7 +1409,7 @@ static int bump_unix_max_dgram_qlen(void) {
r = read_one_line_file("/proc/sys/net/unix/max_dgram_qlen", &qlen);
if (r < 0)
- return log_warning_errno(r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
+ return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r, "Failed to read AF_UNIX datagram queue length, ignoring: %m");
r = safe_atolu(qlen, &v);
if (r < 0)
@@ -1277,7 +1418,7 @@ static int bump_unix_max_dgram_qlen(void) {
if (v >= DEFAULT_UNIX_MAX_DGRAM_QLEN)
return 0;
- r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", 0, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
+ r = write_string_filef("/proc/sys/net/unix/max_dgram_qlen", WRITE_STRING_FILE_DISABLE_BUFFER, "%lu", DEFAULT_UNIX_MAX_DGRAM_QLEN);
if (r < 0)
return log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to bump AF_UNIX datagram queue length, ignoring: %m");
@@ -1474,13 +1615,29 @@ static void initialize_coredump(bool skip_setup) {
if (setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(RLIM_INFINITY)) < 0)
log_warning_errno(errno, "Failed to set RLIMIT_CORE: %m");
- /* But at the same time, turn off the core_pattern logic by default, so that no coredumps are stored
- * until the systemd-coredump tool is enabled via sysctl. */
+ /* But at the same time, turn off the core_pattern logic by default, so that no
+ * coredumps are stored until the systemd-coredump tool is enabled via
+ * sysctl. However it can be changed via the kernel command line later so core
+ * dumps can still be generated during early startup and in initramfs. */
if (!skip_setup)
disable_coredumps();
#endif
}
+static void initialize_core_pattern(bool skip_setup) {
+ int r;
+
+ if (skip_setup || !arg_early_core_pattern)
+ return;
+
+ if (getpid_cached() != 1)
+ return;
+
+ r = write_string_file("/proc/sys/kernel/core_pattern", arg_early_core_pattern, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ log_warning_errno(r, "Failed to write '%s' to /proc/sys/kernel/core_pattern, ignoring: %m", arg_early_core_pattern);
+}
+
static void do_reexecute(
int argc,
char *argv[],
@@ -1577,6 +1734,7 @@ static void do_reexecute(
/* Reenable any blocked signals, especially important if we switch from initial ramdisk to init=... */
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
+ (void) rlimit_nofile_safe();
if (switch_root_init) {
args[0] = switch_root_init;
@@ -1633,7 +1791,7 @@ static int invoke_main_loop(
return log_emergency_errno(r, "Failed to run main loop: %m");
}
- switch (m->exit_code) {
+ switch ((ManagerObjective) r) {
case MANAGER_RELOAD: {
LogTarget saved_log_target;
@@ -1660,7 +1818,8 @@ static int invoke_main_loop(
r = manager_reload(m);
if (r < 0)
- log_warning_errno(r, "Failed to reload, ignoring: %m");
+ /* Reloading failed before the point of no return. Let's continue running as if nothing happened. */
+ m->objective = MANAGER_OK;
break;
}
@@ -1724,19 +1883,19 @@ static int invoke_main_loop(
case MANAGER_POWEROFF:
case MANAGER_HALT:
case MANAGER_KEXEC: {
- static const char * const table[_MANAGER_EXIT_CODE_MAX] = {
- [MANAGER_EXIT] = "exit",
- [MANAGER_REBOOT] = "reboot",
+ static const char * const table[_MANAGER_OBJECTIVE_MAX] = {
+ [MANAGER_EXIT] = "exit",
+ [MANAGER_REBOOT] = "reboot",
[MANAGER_POWEROFF] = "poweroff",
- [MANAGER_HALT] = "halt",
- [MANAGER_KEXEC] = "kexec"
+ [MANAGER_HALT] = "halt",
+ [MANAGER_KEXEC] = "kexec",
};
log_notice("Shutting down.");
*ret_reexecute = false;
*ret_retval = m->return_value;
- assert_se(*ret_shutdown_verb = table[m->exit_code]);
+ assert_se(*ret_shutdown_verb = table[m->objective]);
*ret_fds = NULL;
*ret_switch_root_dir = *ret_switch_root_init = NULL;
@@ -1744,7 +1903,7 @@ static int invoke_main_loop(
}
default:
- assert_not_reached("Unknown exit code.");
+ assert_not_reached("Unknown or unexpected manager objective.");
}
}
}
@@ -1816,7 +1975,7 @@ static int initialize_runtime(
install_crash_handler();
if (!skip_setup) {
- r = mount_cgroup_controllers(arg_join_controllers);
+ r = mount_cgroup_controllers();
if (r < 0) {
*ret_error_message = "Failed to mount cgroup hierarchies";
return r;
@@ -1827,6 +1986,7 @@ static int initialize_runtime(
machine_id_setup(NULL, arg_machine_id, NULL);
loopback_setup();
bump_unix_max_dgram_qlen();
+ bump_file_max_and_nr_open();
test_usr();
write_container_id();
}
@@ -1879,11 +2039,9 @@ static int initialize_runtime(
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
log_warning_errno(errno, "Failed to make us a subreaper: %m");
- if (arg_system) {
- /* Bump up RLIMIT_NOFILE for systemd itself */
- (void) bump_rlimit_nofile(saved_rlimit_nofile);
- (void) bump_rlimit_memlock(saved_rlimit_memlock);
- }
+ /* Bump up RLIMIT_NOFILE for systemd itself */
+ (void) bump_rlimit_nofile(saved_rlimit_nofile);
+ (void) bump_rlimit_memlock(saved_rlimit_memlock);
return 0;
}
@@ -1942,7 +2100,6 @@ static void free_arguments(void) {
arg_default_unit = mfree(arg_default_unit);
arg_confirm_spawn = mfree(arg_confirm_spawn);
- arg_join_controllers = strv_free_free(arg_join_controllers);
arg_default_environment = strv_free(arg_default_environment);
arg_syscall_archs = set_free(arg_syscall_archs);
}
@@ -1985,7 +2142,7 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess
}
/* Initialize the show status setting if it hasn't been set explicitly yet */
- if (arg_show_status == _SHOW_STATUS_UNSET)
+ if (arg_show_status == _SHOW_STATUS_INVALID)
arg_show_status = SHOW_STATUS_YES;
return 0;
@@ -1994,50 +2151,43 @@ static int load_configuration(int argc, char **argv, const char **ret_error_mess
static int safety_checks(void) {
if (getpid_cached() == 1 &&
- arg_action != ACTION_RUN) {
- log_error("Unsupported execution mode while PID 1.");
- return -EPERM;
- }
+ arg_action != ACTION_RUN)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Unsupported execution mode while PID 1.");
if (getpid_cached() == 1 &&
- !arg_system) {
- log_error("Can't run --user mode as PID 1.");
- return -EPERM;
- }
+ !arg_system)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run --user mode as PID 1.");
if (arg_action == ACTION_RUN &&
arg_system &&
- getpid_cached() != 1) {
- log_error("Can't run system mode unless PID 1.");
- return -EPERM;
- }
+ getpid_cached() != 1)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Can't run system mode unless PID 1.");
if (arg_action == ACTION_TEST &&
- geteuid() == 0) {
- log_error("Don't run test mode as root.");
- return -EPERM;
- }
+ geteuid() == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EPERM),
+ "Don't run test mode as root.");
if (!arg_system &&
arg_action == ACTION_RUN &&
- sd_booted() <= 0) {
- log_error("Trying to run as user instance, but the system has not been booted with systemd.");
- return -EOPNOTSUPP;
- }
+ sd_booted() <= 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Trying to run as user instance, but the system has not been booted with systemd.");
if (!arg_system &&
arg_action == ACTION_RUN &&
- !getenv("XDG_RUNTIME_DIR")) {
- log_error("Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
- return -EUNATCH;
- }
+ !getenv("XDG_RUNTIME_DIR"))
+ return log_error_errno(SYNTHETIC_ERRNO(EUNATCH),
+ "Trying to run as user instance, but $XDG_RUNTIME_DIR is not set.");
if (arg_system &&
arg_action == ACTION_RUN &&
- running_in_chroot() > 0) {
- log_error("Cannot be run in a chroot() environment.");
- return -EOPNOTSUPP;
- }
+ running_in_chroot() > 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Cannot be run in a chroot() environment.");
return 0;
}
@@ -2309,13 +2459,13 @@ int main(int argc, char *argv[]) {
goto finish;
if (IN_SET(arg_action, ACTION_TEST, ACTION_HELP, ACTION_DUMP_CONFIGURATION_ITEMS, ACTION_DUMP_BUS_PROPERTIES))
- (void) pager_open(arg_no_pager, false);
+ (void) pager_open(arg_pager_flags);
if (arg_action != ACTION_RUN)
skip_setup = true;
if (arg_action == ACTION_HELP) {
- retval = help();
+ retval = help() < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
goto finish;
} else if (arg_action == ACTION_VERSION) {
retval = version();
@@ -2337,6 +2487,9 @@ int main(int argc, char *argv[]) {
if (arg_action == ACTION_RUN) {
+ /* A core pattern might have been specified via the cmdline. */
+ initialize_core_pattern(skip_setup);
+
/* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
log_close();
@@ -2373,8 +2526,8 @@ int main(int argc, char *argv[]) {
m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp;
m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp;
m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp;
- m->timestamps[MANAGER_TIMESTAMP_SECURITY_START] = security_start_timestamp;
- m->timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH] = security_finish_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp;
+ m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp;
set_manager_defaults(m);
set_manager_settings(m);
@@ -2387,7 +2540,6 @@ int main(int argc, char *argv[]) {
r = manager_startup(m, arg_serialization, fds);
if (r < 0) {
- log_error_errno(r, "Failed to fully start up daemon: %m");
error_message = "Failed to start up manager";
goto finish;
}
@@ -2473,8 +2625,8 @@ finish:
if (error_message)
manager_status_printf(NULL, STATUS_TYPE_EMERGENCY,
ANSI_HIGHLIGHT_RED "!!!!!!" ANSI_NORMAL,
- "%s, freezing.", error_message);
- freeze_or_reboot();
+ "%s.", error_message);
+ freeze_or_exit_or_reboot();
}
return retval;