summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2012-05-24 04:00:56 +0200
committerLennart Poettering <lennart@poettering.net>2012-05-24 04:00:56 +0200
commitec8927ca5940e809f0b72f530582c76f1db4f065 (patch)
treeb230d2458088a82b879afc39a2752d5fc674974e
parente056b01d8acea7fc06d52ef91d227d744faf5259 (diff)
downloadsystemd-ec8927ca5940e809f0b72f530582c76f1db4f065.tar.gz
main: add configuration option to alter capability bounding set for PID 1
This also ensures that caps dropped from the bounding set are also dropped from the inheritable set, to be extra-secure. Usually that should change very little though as the inheritable set is empty for all our uses anyway.
-rw-r--r--TODO18
-rw-r--r--man/systemd.conf.xml45
-rw-r--r--man/systemd.exec.xml16
-rw-r--r--src/core/execute.c64
-rw-r--r--src/core/load-fragment-gperf.gperf.m42
-rw-r--r--src/core/load-fragment.c13
-rw-r--r--src/core/load-fragment.h2
-rw-r--r--src/core/main.c11
-rw-r--r--src/core/system.conf17
-rw-r--r--src/nspawn/nspawn.c70
-rw-r--r--src/shared/capability.c89
-rw-r--r--src/shared/capability.h5
12 files changed, 211 insertions, 141 deletions
diff --git a/TODO b/TODO
index 6553cb0bc4..6d5eb73262 100644
--- a/TODO
+++ b/TODO
@@ -23,7 +23,9 @@ Bugfixes:
Features:
-* For Type=idle don't get confused by ExecStartPre= getting the effect of the idle hup but delaying jobs going away
+* make use of /sys/power/wake_lock in inhibitors
+
+* introduce "systemctl help" which invokes man for the man pages listed in Documentation=
* drop accountsservice's StandardOutput=syslog and Type=dbus fields
@@ -140,8 +142,6 @@ Features:
* There's currently no way to cancel fsck (used to be possible via C-c or c on the console)
-* keep an eye on https://bugzilla.gnome.org/show_bug.cgi?id=670100
-
* journal: sanely deal with entries which are larger than the individual file size, but where the componets would fit
* add command to systemctl to plot dependency graph as tree (see rhbz 795365)
@@ -152,9 +152,6 @@ Features:
* default unix qlen is too small (10). bump sysctl? add sockopt?
-* support units generated by a generator and placed in /run/systemd/system/; the directory is
- currently ignored because it is empty before the generatores are executed
-
* Possibly, detect whether SysV init scripts can do reloading by looking for "echo Usage:" lines
* figure out whether we should leave dbus around during shutdown
@@ -213,8 +210,6 @@ Features:
* when an instanced service exits, remove its parent cgroup too if possible.
-* as Tom Gundersen pointed out there's a always a dep loop if people use crypto file systems with random keys
-
* automatically escape unit names passed on the service (i.e. think "systemctl start serial-getty.service@serial/by-path/jshdfjsdfhkjh" being automatically escaped as necessary.
* if we can not get user quota for tmpfs, mount a separate tmpfs instance
@@ -361,10 +356,6 @@ External:
- allow disabling of UID passing for AUTH EXTERNAL
- always pass cred data along each message
-* systemd --user
- PR_SET_CHILD_REAPER patch: https://lkml.org/lkml/2011/7/28/426
- (patch in linux-next, on the way to the next kernel)
-
* fix alsa mixer restore to not print error when no config is stored
* gnome-shell python script/glxinfo/is-accelerated must die
@@ -379,9 +370,6 @@ External:
we are in 11-minutes-mode. When we trust the system time to NTP we also want
the RTC to sync up.
-* patch kernel for cpu feature modalias for autoloading aes/kvm/...
- (patches in linux-next, on the way to the next kernel)
-
* kernel: add device_type = "fb", "fbcon" to class "graphics"
Regularly:
diff --git a/man/systemd.conf.xml b/man/systemd.conf.xml
index 7dfaa18c18..2659f9ab7b 100644
--- a/man/systemd.conf.xml
+++ b/man/systemd.conf.xml
@@ -184,6 +184,38 @@
</varlistentry>
<varlistentry>
+ <term><varname>CapabilityBoundingSet=</varname></term>
+
+ <listitem><para>Controls which
+ capabilities to include in the
+ capability bounding set for PID 1 and
+ its children. See
+ <citerefentry><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+ for details. Takes a whitespace
+ separated list of capability names as
+ read by
+ <citerefentry><refentrytitle>cap_from_name</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
+ Capabilities listed will be included
+ in the bounding set, all others are
+ removed. If the list of capabilities
+ is prefixed with ~ all but the listed
+ capabilities will be included, the
+ effect of the assignment
+ inverted. Note that this option also
+ effects the respective capabilities in
+ the effective, permitted and
+ inheritable capability sets. The
+ capability bounding set may also be
+ individually configured for units
+ using the
+ <varname>CapabilityBoundingSet=</varname>
+ directive for units, but note that
+ capabilities dropped for PID 1 cannot
+ be regained in individual units, they
+ are lost for good.</para></listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><varname>DefaultLimitCPU=</varname></term>
<term><varname>DefaultLimitFSIZE=</varname></term>
<term><varname>DefaultLimitDATA=</varname></term>
@@ -200,14 +232,21 @@
<term><varname>DefaultLimitNICE=</varname></term>
<term><varname>DefaultLimitRTPRIO=</varname></term>
<term><varname>DefaultLimitRTTIME=</varname></term>
+
<listitem><para>These settings control
- various default resource limits for units. See
+ various default resource limits for
+ units. See
<citerefentry><refentrytitle>setrlimit</refentrytitle><manvolnum>2</manvolnum></citerefentry>
for details. Use the string
<varname>infinity</varname> to
configure no limit on a specific
- resource. They can be overriden in units files
- using corresponding LimitXXXX parameter.</para></listitem>
+ resource. These settings may be
+ overriden in individual units
+ using the corresponding LimitXXX=
+ directives. Note that these resource
+ limits are only defaults for units,
+ they are not applied to PID 1
+ itself.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 219733be37..0dc2ed48b5 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -678,17 +678,17 @@
is prefixed with ~ all but the listed
capabilities will be included, the
effect of the assignment
- inverted. Note that this option does
- not actually set or unset any
- capabilities in the effective,
- permitted or inherited capability
- sets. That's what
- <varname>Capabilities=</varname> is
- for. If this option is not used the
+ inverted. Note that this option also
+ effects the respective capabilities in
+ the effective, permitted and
+ inheritable capability sets, on top of
+ what <varname>Capabilities=</varname>
+ does. If this option is not used the
capability bounding set is not
modified on process execution, hence
no limits on the capabilities of the
- process are enforced.</para></listitem>
+ process are
+ enforced.</para></listitem>
</varlistentry>
<varlistentry>
diff --git a/src/core/execute.c b/src/core/execute.c
index bb841b7fcf..9c2006ebc7 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -870,68 +870,6 @@ fail:
}
#endif
-static int do_capability_bounding_set_drop(uint64_t drop) {
- unsigned long i;
- cap_t old_cap = NULL, new_cap = NULL;
- cap_flag_value_t fv;
- int r;
-
- /* If we are run as PID 1 we will lack CAP_SETPCAP by default
- * in the effective set (yes, the kernel drops that when
- * executing init!), so get it back temporarily so that we can
- * call PR_CAPBSET_DROP. */
-
- old_cap = cap_get_proc();
- if (!old_cap)
- return -errno;
-
- if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (fv != CAP_SET) {
- static const cap_value_t v = CAP_SETPCAP;
-
- new_cap = cap_dup(old_cap);
- if (!new_cap) {
- r = -errno;
- goto finish;
- }
-
- if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (cap_set_proc(new_cap) < 0) {
- r = -errno;
- goto finish;
- }
- }
-
- for (i = 0; i <= cap_last_cap(); i++)
- if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
- if (prctl(PR_CAPBSET_DROP, i) < 0) {
- r = -errno;
- goto finish;
- }
- }
-
- r = 0;
-
-finish:
- if (new_cap)
- cap_free(new_cap);
-
- if (old_cap) {
- cap_set_proc(old_cap);
- cap_free(old_cap);
- }
-
- return r;
-}
-
static void rename_process_from_path(const char *path) {
char process_name[11];
const char *p;
@@ -1398,7 +1336,7 @@ int exec_spawn(ExecCommand *command,
}
if (context->capability_bounding_set_drop) {
- err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
+ err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
if (err < 0) {
r = EXIT_CAPABILITIES;
goto fail_child;
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index ea502bd288..6f2a0d63ef 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -46,7 +46,7 @@ $1.SyslogLevel, config_parse_level, 0,
$1.SyslogLevelPrefix, config_parse_bool, 0, offsetof($1, exec_context.syslog_level_prefix)
$1.Capabilities, config_parse_exec_capabilities, 0, offsetof($1, exec_context)
$1.SecureBits, config_parse_exec_secure_bits, 0, offsetof($1, exec_context)
-$1.CapabilityBoundingSet, config_parse_exec_bounding_set, 0, offsetof($1, exec_context)
+$1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop)
$1.TimerSlackNSec, config_parse_exec_timer_slack_nsec, 0, offsetof($1, exec_context)
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index d2267722dd..ff6e13e599 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -931,7 +931,7 @@ int config_parse_exec_secure_bits(
return 0;
}
-int config_parse_exec_bounding_set(
+int config_parse_bounding_set(
const char *filename,
unsigned line,
const char *section,
@@ -941,7 +941,7 @@ int config_parse_exec_bounding_set(
void *data,
void *userdata) {
- ExecContext *c = data;
+ uint64_t *capability_bounding_set_drop = data;
char *w;
size_t l;
char *state;
@@ -968,7 +968,8 @@ int config_parse_exec_bounding_set(
int r;
cap_value_t cap;
- if (!(t = strndup(w, l)))
+ t = strndup(w, l);
+ if (!t)
return -ENOMEM;
r = cap_from_name(t, &cap);
@@ -983,9 +984,9 @@ int config_parse_exec_bounding_set(
}
if (invert)
- c->capability_bounding_set_drop |= sum;
+ *capability_bounding_set_drop |= sum;
else
- c->capability_bounding_set_drop |= ~sum;
+ *capability_bounding_set_drop |= ~sum;
return 0;
}
@@ -2447,7 +2448,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_level, "LEVEL" },
{ config_parse_exec_capabilities, "CAPABILITIES" },
{ config_parse_exec_secure_bits, "SECUREBITS" },
- { config_parse_exec_bounding_set, "BOUNDINGSET" },
+ { config_parse_bounding_set, "BOUNDINGSET" },
{ config_parse_exec_timer_slack_nsec, "TIMERSLACK" },
{ config_parse_limit, "LIMIT" },
{ config_parse_unit_cgroup, "CGROUP [...]" },
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 3b2ed096b5..6b382494aa 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -56,7 +56,7 @@ int config_parse_exec_cpu_sched_prio(const char *filename, unsigned line, const
int config_parse_exec_cpu_affinity(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_capabilities(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_secure_bits(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_exec_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_exec_timer_slack_nsec(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_unit_cgroup(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
diff --git a/src/core/main.c b/src/core/main.c
index 4d9a2d453e..9248c388a4 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -50,6 +50,7 @@
#include "watchdog.h"
#include "path-util.h"
#include "switch-root.h"
+#include "capability.h"
#include "mount-setup.h"
#include "loopback-setup.h"
@@ -88,6 +89,7 @@ static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
static usec_t arg_runtime_watchdog = 0;
static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
+static uint64_t arg_capability_bounding_set_drop = 0;
static FILE* serialization = NULL;
@@ -678,6 +680,7 @@ static int parse_config_file(void) {
{ "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers },
{ "Manager", "RuntimeWatchdogSec", config_parse_usec, 0, &arg_runtime_watchdog },
{ "Manager", "ShutdownWatchdogSec", config_parse_usec, 0, &arg_shutdown_watchdog },
+ { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
{ "Manager", "DefaultLimitCPU", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_CPU]},
{ "Manager", "DefaultLimitFSIZE", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_FSIZE]},
{ "Manager", "DefaultLimitDATA", config_parse_limit, 0, &arg_default_rlimit[RLIMIT_DATA]},
@@ -1484,6 +1487,14 @@ int main(int argc, char *argv[]) {
if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0)
watchdog_set_timeout(&arg_runtime_watchdog);
+ if (arg_capability_bounding_set_drop) {
+ r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
+ if (r < 0) {
+ log_error("Failed to drop capability bounding set: %s", strerror(-r));
+ goto finish;
+ }
+ }
+
r = manager_new(arg_running_as, &m);
if (r < 0) {
log_error("Failed to allocate manager object: %s", strerror(-r));
diff --git a/src/core/system.conf b/src/core/system.conf
index 2b14d3e31e..7b9171b803 100644
--- a/src/core/system.conf
+++ b/src/core/system.conf
@@ -24,3 +24,20 @@
#JoinControllers=cpu,cpuacct
#RuntimeWatchdogSec=0
#ShutdownWatchdogSec=10min
+#CapabilityBoundingSet=
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index 31e8b015df..8a5eb34c7f 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -544,49 +544,31 @@ static int setup_hostname(void) {
}
static int drop_capabilities(void) {
- static const unsigned long retain[] = {
- CAP_CHOWN,
- CAP_DAC_OVERRIDE,
- CAP_DAC_READ_SEARCH,
- CAP_FOWNER,
- CAP_FSETID,
- CAP_IPC_OWNER,
- CAP_KILL,
- CAP_LEASE,
- CAP_LINUX_IMMUTABLE,
- CAP_NET_BIND_SERVICE,
- CAP_NET_BROADCAST,
- CAP_NET_RAW,
- CAP_SETGID,
- CAP_SETFCAP,
- CAP_SETPCAP,
- CAP_SETUID,
- CAP_SYS_ADMIN,
- CAP_SYS_CHROOT,
- CAP_SYS_NICE,
- CAP_SYS_PTRACE,
- CAP_SYS_TTY_CONFIG
- };
-
- unsigned long l;
-
- for (l = 0; l <= cap_last_cap(); l++) {
- unsigned i;
-
- for (i = 0; i < ELEMENTSOF(retain); i++)
- if (retain[i] == l)
- break;
-
- if (i < ELEMENTSOF(retain))
- continue;
- if (prctl(PR_CAPBSET_DROP, l) < 0) {
- log_error("PR_CAPBSET_DROP failed: %m");
- return -errno;
- }
- }
-
- return 0;
+ static const uint64_t retain =
+ (1ULL << CAP_CHOWN) |
+ (1ULL << CAP_DAC_OVERRIDE) |
+ (1ULL << CAP_DAC_READ_SEARCH) |
+ (1ULL << CAP_FOWNER) |
+ (1ULL << CAP_FSETID) |
+ (1ULL << CAP_IPC_OWNER) |
+ (1ULL << CAP_KILL) |
+ (1ULL << CAP_LEASE) |
+ (1ULL << CAP_LINUX_IMMUTABLE) |
+ (1ULL << CAP_NET_BIND_SERVICE) |
+ (1ULL << CAP_NET_BROADCAST) |
+ (1ULL << CAP_NET_RAW) |
+ (1ULL << CAP_SETGID) |
+ (1ULL << CAP_SETFCAP) |
+ (1ULL << CAP_SETPCAP) |
+ (1ULL << CAP_SETUID) |
+ (1ULL << CAP_SYS_ADMIN) |
+ (1ULL << CAP_SYS_CHROOT) |
+ (1ULL << CAP_SYS_NICE) |
+ (1ULL << CAP_SYS_PTRACE) |
+ (1ULL << CAP_SYS_TTY_CONFIG);
+
+ return capability_bounding_set_drop(~retain, false);
}
static int is_os_tree(const char *path) {
@@ -1041,8 +1023,10 @@ int main(int argc, char *argv[]) {
loopback_setup();
- if (drop_capabilities() < 0)
+ if (drop_capabilities() < 0) {
+ log_error("drop_capabilities() failed: %m");
goto child_fail;
+ }
if (arg_user) {
diff --git a/src/shared/capability.c b/src/shared/capability.c
index b2cd9ed75e..08b7a209da 100644
--- a/src/shared/capability.c
+++ b/src/shared/capability.c
@@ -40,7 +40,8 @@ int have_effective_cap(int value) {
cap_flag_value_t fv;
int r;
- if (!(cap = cap_get_proc()))
+ cap = cap_get_proc();
+ if (!cap)
return -errno;
if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
@@ -84,3 +85,89 @@ unsigned long cap_last_cap(void) {
return p;
}
+
+int capability_bounding_set_drop(uint64_t drop, bool right_now) {
+ unsigned long i;
+ cap_t after_cap = NULL, temp_cap = NULL;
+ cap_flag_value_t fv;
+ int r;
+
+ /* If we are run as PID 1 we will lack CAP_SETPCAP by default
+ * in the effective set (yes, the kernel drops that when
+ * executing init!), so get it back temporarily so that we can
+ * call PR_CAPBSET_DROP. */
+
+ after_cap = cap_get_proc();
+ if (!after_cap)
+ return -errno;
+
+ if (cap_get_flag(after_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
+ cap_free(after_cap);
+ return -errno;
+ }
+
+ if (fv != CAP_SET) {
+ static const cap_value_t v = CAP_SETPCAP;
+
+ temp_cap = cap_dup(after_cap);
+ if (!temp_cap) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ if (cap_set_proc(temp_cap) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+
+ for (i = 0; i <= cap_last_cap(); i++) {
+
+ if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
+ cap_value_t v;
+
+ /* Drop it from the bounding set */
+ if (prctl(PR_CAPBSET_DROP, i) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ v = i;
+
+ /* Also drop it from the inheritable set, so
+ * that anything we exec() loses the
+ * capability for good. */
+ if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+
+ /* If we shall apply this right now drop it
+ * also from our own capability sets. */
+ if (right_now) {
+ if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
+ cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
+ r = -errno;
+ goto finish;
+ }
+ }
+ }
+ }
+
+ r = 0;
+
+finish:
+ if (temp_cap)
+ cap_free(temp_cap);
+
+ if (after_cap) {
+ cap_set_proc(after_cap);
+ cap_free(after_cap);
+ }
+
+ return r;
+}
diff --git a/src/shared/capability.h b/src/shared/capability.h
index 9f9c49cf5b..0cc5dd08aa 100644
--- a/src/shared/capability.h
+++ b/src/shared/capability.h
@@ -22,6 +22,11 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
+#include <inttypes.h>
+#include <stdbool.h>
+
unsigned long cap_last_cap(void);
int have_effective_cap(int value);
+int capability_bounding_set_drop(uint64_t caps, bool right_now);
+
#endif